1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
//===-- Definition of bfloat16 data type. -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
#include "hdr/stdint_proxy.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/FPUtil/cast.h"
#include "src/__support/FPUtil/comparison_operations.h"
#include "src/__support/FPUtil/dyadic_float.h"
#include "src/__support/FPUtil/generic/add_sub.h"
#include "src/__support/FPUtil/generic/div.h"
#include "src/__support/FPUtil/generic/mul.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/types.h"
namespace LIBC_NAMESPACE_DECL {
namespace fputil {
struct BFloat16 {
uint16_t bits;
LIBC_INLINE BFloat16() = default;
template <typename T>
LIBC_INLINE constexpr explicit BFloat16(T value)
: bits(static_cast<uint16_t>(0U)) {
if constexpr (cpp::is_floating_point_v<T>) {
bits = fputil::cast<bfloat16>(value).bits;
} else if constexpr (cpp::is_integral_v<T>) {
Sign sign = Sign::POS;
if constexpr (cpp::is_signed_v<T>) {
if (value < 0) {
sign = Sign::NEG;
value = -value;
}
}
fputil::DyadicFloat<cpp::numeric_limits<cpp::make_unsigned_t<T>>::digits>
xd(sign, 0, value);
bits = xd.template as<bfloat16, /*ShouldSignalExceptions=*/true>().bits;
} else if constexpr (cpp::is_convertible_v<T, BFloat16>) {
bits = value.operator BFloat16().bits;
} else {
bits = fputil::cast<bfloat16>(static_cast<float>(value)).bits;
}
}
template <cpp::enable_if_t<fputil::get_fp_type<float>() ==
fputil::FPType::IEEE754_Binary32,
int> = 0>
LIBC_INLINE constexpr operator float() const {
uint32_t x_bits = static_cast<uint32_t>(bits) << 16U;
return cpp::bit_cast<float>(x_bits);
}
template <typename T, cpp::enable_if_t<cpp::is_integral_v<T>, int> = 0>
LIBC_INLINE constexpr explicit operator T() const {
return static_cast<T>(static_cast<float>(*this));
}
LIBC_INLINE bool operator==(BFloat16 other) const {
return fputil::equals(*this, other);
}
LIBC_INLINE bool operator!=(BFloat16 other) const {
return !fputil::equals(*this, other);
}
LIBC_INLINE bool operator<(BFloat16 other) const {
return fputil::less_than(*this, other);
}
LIBC_INLINE bool operator<=(BFloat16 other) const {
return fputil::less_than_or_equals(*this, other);
}
LIBC_INLINE bool operator>(BFloat16 other) const {
return fputil::greater_than(*this, other);
}
LIBC_INLINE bool operator>=(BFloat16 other) const {
return fputil::greater_than_or_equals(*this, other);
}
LIBC_INLINE constexpr BFloat16 operator-() const {
fputil::FPBits<bfloat16> result(*this);
result.set_sign(result.is_pos() ? Sign::NEG : Sign::POS);
return result.get_val();
}
LIBC_INLINE BFloat16 operator+(BFloat16 other) const {
return fputil::generic::add<BFloat16>(*this, other);
}
LIBC_INLINE BFloat16 operator-(BFloat16 other) const {
return fputil::generic::sub<BFloat16>(*this, other);
}
LIBC_INLINE BFloat16 operator*(BFloat16 other) const {
return fputil::generic::mul<bfloat16>(*this, other);
}
LIBC_INLINE BFloat16 operator/(BFloat16 other) const {
return fputil::generic::div<bfloat16>(*this, other);
}
}; // struct BFloat16
} // namespace fputil
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
|