libc/src/__support/FPUtil/bfloat16.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

//===-- Definition of bfloat16 data type. -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H

#include "hdr/stdint_proxy.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/FPUtil/cast.h"
#include "src/__support/FPUtil/comparison_operations.h"
#include "src/__support/FPUtil/dyadic_float.h"
#include "src/__support/FPUtil/generic/add_sub.h"
#include "src/__support/FPUtil/generic/div.h"
#include "src/__support/FPUtil/generic/mul.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/types.h"

namespace LIBC_NAMESPACE_DECL {
namespace fputil {

struct BFloat16 {
  uint16_t bits;

  LIBC_INLINE BFloat16() = default;

  template <typename T>
  LIBC_INLINE constexpr explicit BFloat16(T value)
      : bits(static_cast<uint16_t>(0U)) {
    if constexpr (cpp::is_floating_point_v<T>) {
      bits = fputil::cast<bfloat16>(value).bits;
    } else if constexpr (cpp::is_integral_v<T>) {
      Sign sign = Sign::POS;

      if constexpr (cpp::is_signed_v<T>) {
        if (value < 0) {
          sign = Sign::NEG;
          value = -value;
        }
      }

      fputil::DyadicFloat<cpp::numeric_limits<cpp::make_unsigned_t<T>>::digits>
          xd(sign, 0, value);
      bits = xd.template as<bfloat16, /*ShouldSignalExceptions=*/true>().bits;

    } else if constexpr (cpp::is_convertible_v<T, BFloat16>) {
      bits = value.operator BFloat16().bits;
    } else {
      bits = fputil::cast<bfloat16>(static_cast<float>(value)).bits;
    }
  }

  template <cpp::enable_if_t<fputil::get_fp_type<float>() ==
                                 fputil::FPType::IEEE754_Binary32,
                             int> = 0>
  LIBC_INLINE constexpr operator float() const {
    uint32_t x_bits = static_cast<uint32_t>(bits) << 16U;
    return cpp::bit_cast<float>(x_bits);
  }

  template <typename T, cpp::enable_if_t<cpp::is_integral_v<T>, int> = 0>
  LIBC_INLINE constexpr explicit operator T() const {
    return static_cast<T>(static_cast<float>(*this));
  }

  LIBC_INLINE bool operator==(BFloat16 other) const {
    return fputil::equals(*this, other);
  }

  LIBC_INLINE bool operator!=(BFloat16 other) const {
    return !fputil::equals(*this, other);
  }

  LIBC_INLINE bool operator<(BFloat16 other) const {
    return fputil::less_than(*this, other);
  }

  LIBC_INLINE bool operator<=(BFloat16 other) const {
    return fputil::less_than_or_equals(*this, other);
  }

  LIBC_INLINE bool operator>(BFloat16 other) const {
    return fputil::greater_than(*this, other);
  }

  LIBC_INLINE bool operator>=(BFloat16 other) const {
    return fputil::greater_than_or_equals(*this, other);
  }

  LIBC_INLINE constexpr BFloat16 operator-() const {
    fputil::FPBits<bfloat16> result(*this);
    result.set_sign(result.is_pos() ? Sign::NEG : Sign::POS);
    return result.get_val();
  }

  LIBC_INLINE BFloat16 operator+(BFloat16 other) const {
    return fputil::generic::add<BFloat16>(*this, other);
  }

  LIBC_INLINE BFloat16 operator-(BFloat16 other) const {
    return fputil::generic::sub<BFloat16>(*this, other);
  }

  LIBC_INLINE BFloat16 operator*(BFloat16 other) const {
    return fputil::generic::mul<bfloat16>(*this, other);
  }

  LIBC_INLINE BFloat16 operator/(BFloat16 other) const {
    return fputil::generic::div<bfloat16>(*this, other);
  }
}; // struct BFloat16

} // namespace fputil
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H