summaryrefslogtreecommitdiff
path: root/libc/benchmarks/gpu
diff options
context:
space:
mode:
authorMingming Liu <mingmingl@google.com>2025-09-10 15:25:31 -0700
committerGitHub <noreply@github.com>2025-09-10 15:25:31 -0700
commit1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch)
tree57f4b1f313c8cf74eed8819870f39c36ea263c68 /libc/benchmarks/gpu
parent898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff)
parentb8cefcb601ddaa18482555c4ff363c01a270c2fe (diff)
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'libc/benchmarks/gpu')
-rw-r--r--libc/benchmarks/gpu/CMakeLists.txt6
-rw-r--r--libc/benchmarks/gpu/LibcGpuBenchmark.h110
-rw-r--r--libc/benchmarks/gpu/Random.h190
-rw-r--r--libc/benchmarks/gpu/src/math/CMakeLists.txt147
-rw-r--r--libc/benchmarks/gpu/src/math/atan2_benchmark.cpp7
-rw-r--r--libc/benchmarks/gpu/src/math/exp_benchmark.cpp59
-rw-r--r--libc/benchmarks/gpu/src/math/expf16_benchmark.cpp56
-rw-r--r--libc/benchmarks/gpu/src/math/expf_benchmark.cpp59
-rw-r--r--libc/benchmarks/gpu/src/math/log_benchmark.cpp68
-rw-r--r--libc/benchmarks/gpu/src/math/logf16_benchmark.cpp62
-rw-r--r--libc/benchmarks/gpu/src/math/logf_benchmark.cpp68
-rw-r--r--libc/benchmarks/gpu/src/math/platform.h19
-rw-r--r--libc/benchmarks/gpu/src/math/sin_benchmark.cpp7
13 files changed, 744 insertions, 114 deletions
diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
index 6ca134b12a47..cf8c9902ca7f 100644
--- a/libc/benchmarks/gpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -40,6 +40,7 @@ add_unittest_framework_library(
LibcGpuBenchmarkMain.cpp
HDRS
LibcGpuBenchmark.h
+ Random.h
DEPENDS
libc.benchmarks.gpu.timing.timing
libc.hdr.stdint_proxy
@@ -49,12 +50,17 @@ add_unittest_framework_library(
libc.src.__support.CPP.algorithm
libc.src.__support.CPP.atomic
libc.src.__support.CPP.array
+ libc.src.__support.CPP.optional
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.nearest_integer_operations
libc.src.__support.FPUtil.sqrt
+ libc.src.__support.sign
libc.src.__support.fixedvector
libc.src.__support.GPU.utils
libc.src.__support.time.gpu.time_utils
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
libc.src.stdio.printf
libc.src.time.clock
)
diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.h b/libc/benchmarks/gpu/LibcGpuBenchmark.h
index e36e93c7efc1..b310d49a60fd 100644
--- a/libc/benchmarks/gpu/LibcGpuBenchmark.h
+++ b/libc/benchmarks/gpu/LibcGpuBenchmark.h
@@ -1,6 +1,8 @@
#ifndef LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
#define LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
+#include "benchmarks/gpu/Random.h"
+
#include "benchmarks/gpu/timing/timing.h"
#include "hdr/stdint_proxy.h"
@@ -175,94 +177,6 @@ private:
}
};
-class RandomGenerator {
- uint64_t state;
-
- static LIBC_INLINE uint64_t splitmix64(uint64_t x) noexcept {
- x += 0x9E3779B97F4A7C15ULL;
- x = (x ^ (x >> 30)) * 0xBF58476D1CE4E5B9ULL;
- x = (x ^ (x >> 27)) * 0x94D049BB133111EBULL;
- x = (x ^ (x >> 31));
- return x ? x : 0x9E3779B97F4A7C15ULL;
- }
-
-public:
- explicit LIBC_INLINE RandomGenerator(uint64_t seed) noexcept
- : state(splitmix64(seed)) {}
-
- LIBC_INLINE uint64_t next64() noexcept {
- uint64_t x = state;
- x ^= x >> 12;
- x ^= x << 25;
- x ^= x >> 27;
- state = x;
- return x * 0x2545F4914F6CDD1DULL;
- }
-
- LIBC_INLINE uint32_t next32() noexcept {
- return static_cast<uint32_t>(next64() >> 32);
- }
-};
-
-// We want random floating-point values whose *unbiased* exponent e is
-// approximately uniform in [min_exp, max_exp]. That is,
-// 2^min_exp <= |value| < 2^(max_exp + 1).
-// Caveats / boundaries:
-// - e = -EXP_BIAS ==> subnormal range (biased exponent = 0). We ensure a
-// non-zero mantissa so we don't accidentally produce 0.
-// - e in [1 - EXP_BIAS, EXP_BIAS] ==> normal numbers.
-// - e = EXP_BIAS + 1 ==> Inf/NaN. We do not include it by default; max_exp
-// defaults to EXP_BIAS.
-template <typename T>
-static T
-get_rand_input(RandomGenerator &rng,
- int min_exp = -LIBC_NAMESPACE::fputil::FPBits<T>::EXP_BIAS,
- int max_exp = LIBC_NAMESPACE::fputil::FPBits<T>::EXP_BIAS) {
- using FPBits = LIBC_NAMESPACE::fputil::FPBits<T>;
- using Storage = typename FPBits::StorageType;
-
- // Sanitize and clamp requested range to what the format supports
- if (min_exp > max_exp) {
- auto tmp = min_exp;
- min_exp = max_exp;
- max_exp = tmp;
- };
- min_exp = cpp::max(min_exp, -FPBits::EXP_BIAS);
- max_exp = cpp::min(max_exp, FPBits::EXP_BIAS);
-
- // Sample unbiased exponent e uniformly in [min_exp, max_exp] without modulo
- // bias
- auto sample_in_range = [&](uint64_t r) -> int32_t {
- const uint64_t range = static_cast<uint64_t>(
- static_cast<int64_t>(max_exp) - static_cast<int64_t>(min_exp) + 1);
- const uint64_t threshold = (-range) % range;
- while (r < threshold)
- r = rng.next64();
- return static_cast<int32_t>(min_exp + static_cast<int64_t>(r % range));
- };
- const int32_t e = sample_in_range(rng.next64());
-
- // Start from random bits to get random sign and mantissa
- FPBits xbits([&] {
- if constexpr (cpp::is_same_v<T, double>)
- return FPBits(rng.next64());
- else
- return FPBits(rng.next32());
- }());
-
- if (e == -FPBits::EXP_BIAS) {
- // Subnormal: biased exponent must be 0; ensure mantissa != 0 to avoid 0
- xbits.set_biased_exponent(Storage(0));
- if (xbits.get_mantissa() == Storage(0))
- xbits.set_mantissa(Storage(1));
- } else {
- // Normal: biased exponent in [1, 2 * FPBits::EXP_BIAS]
- const int32_t biased = e + FPBits::EXP_BIAS;
- xbits.set_biased_exponent(static_cast<Storage>(biased));
- }
- return xbits.get_val();
-}
-
template <typename T> class MathPerf {
static LIBC_INLINE uint64_t make_seed(uint64_t base_seed, uint64_t salt) {
const uint64_t tid = gpu::get_thread_id();
@@ -271,9 +185,9 @@ template <typename T> class MathPerf {
public:
// Returns cycles-per-call (lower is better)
- template <size_t N = 1>
- static uint64_t run_throughput_in_range(T f(T), int min_exp, int max_exp,
- uint32_t call_index) {
+ template <size_t N = 1, typename Dist>
+ static uint64_t run_throughput(T (*f)(T), const Dist &dist,
+ uint32_t call_index) {
cpp::array<T, N> inputs;
uint64_t base_seed = static_cast<uint64_t>(call_index);
@@ -281,7 +195,7 @@ public:
RandomGenerator rng(make_seed(base_seed, salt));
for (size_t i = 0; i < N; ++i)
- inputs[i] = get_rand_input<T>(rng, min_exp, max_exp);
+ inputs[i] = dist(rng);
uint64_t total_time = LIBC_NAMESPACE::throughput(f, inputs);
@@ -289,11 +203,9 @@ public:
}
// Returns cycles-per-call (lower is better)
- template <size_t N = 1>
- static uint64_t run_throughput_in_range(T f(T, T), int arg1_min_exp,
- int arg1_max_exp, int arg2_min_exp,
- int arg2_max_exp,
- uint32_t call_index) {
+ template <size_t N = 1, typename Dist1, typename Dist2>
+ static uint64_t run_throughput(T (*f)(T, T), const Dist1 &dist1,
+ const Dist2 &dist2, uint32_t call_index) {
cpp::array<T, N> inputs1;
cpp::array<T, N> inputs2;
@@ -302,8 +214,8 @@ public:
RandomGenerator rng(make_seed(base_seed, salt));
for (size_t i = 0; i < N; ++i) {
- inputs1[i] = get_rand_input<T>(rng, arg1_min_exp, arg1_max_exp);
- inputs2[i] = get_rand_input<T>(rng, arg2_min_exp, arg2_max_exp);
+ inputs1[i] = dist1(rng);
+ inputs2[i] = dist2(rng);
}
uint64_t total_time = LIBC_NAMESPACE::throughput(f, inputs1, inputs2);
diff --git a/libc/benchmarks/gpu/Random.h b/libc/benchmarks/gpu/Random.h
new file mode 100644
index 000000000000..f7d272289a6d
--- /dev/null
+++ b/libc/benchmarks/gpu/Random.h
@@ -0,0 +1,190 @@
+//===-- Pseudo-random number generation utilities ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_BENCHMARKS_GPU_RANDOM_H
+#define LLVM_LIBC_BENCHMARKS_GPU_RANDOM_H
+
+#include "hdr/stdint_proxy.h"
+#include "src/__support/CPP/algorithm.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+#include "src/__support/sign.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace benchmarks {
+
+// Pseudo-random number generator (PRNG) that produces unsigned 64-bit, 32-bit,
+// and 16-bit integers. The implementation is based on the xorshift* generator,
+// seeded using SplitMix64 for robust initialization. For more details, see:
+// https://en.wikipedia.org/wiki/Xorshift
+class RandomGenerator {
+ uint64_t state;
+
+ static LIBC_INLINE uint64_t splitmix64(uint64_t x) noexcept {
+ x += 0x9E3779B97F4A7C15ULL;
+ x = (x ^ (x >> 30)) * 0xBF58476D1CE4E5B9ULL;
+ x = (x ^ (x >> 27)) * 0x94D049BB133111EBULL;
+ x = (x ^ (x >> 31));
+ return x ? x : 0x9E3779B97F4A7C15ULL;
+ }
+
+public:
+ explicit LIBC_INLINE RandomGenerator(uint64_t seed) noexcept
+ : state(splitmix64(seed)) {}
+
+ LIBC_INLINE uint64_t next64() noexcept {
+ uint64_t x = state;
+ x ^= x >> 12;
+ x ^= x << 25;
+ x ^= x >> 27;
+ state = x;
+ return x * 0x2545F4914F6CDD1DULL;
+ }
+
+ LIBC_INLINE uint32_t next32() noexcept {
+ return static_cast<uint32_t>(next64() >> 32);
+ }
+
+ LIBC_INLINE uint16_t next16() noexcept {
+ return static_cast<uint16_t>(next64() >> 48);
+ }
+};
+
+// Generates random floating-point numbers where the unbiased binary exponent
+// is sampled uniformly in `[min_exp, max_exp]`. The significand bits are
+// always randomized, while the sign is randomized by default but can be fixed.
+// Evenly covers orders of magnitude; never yields Inf/NaN.
+template <typename T> class UniformExponent {
+ static_assert(cpp::is_same_v<T, float16> || cpp::is_same_v<T, float> ||
+ cpp::is_same_v<T, double>,
+ "UniformExponent supports float16, float, and double");
+
+ using FPBits = LIBC_NAMESPACE::fputil::FPBits<T>;
+ using Storage = typename FPBits::StorageType;
+
+public:
+ explicit UniformExponent(int min_exp = -FPBits::EXP_BIAS,
+ int max_exp = FPBits::EXP_BIAS,
+ cpp::optional<Sign> forced_sign = cpp::nullopt)
+ : min_exp(clamp_exponent(cpp::min(min_exp, max_exp))),
+ max_exp(clamp_exponent(cpp::max(min_exp, max_exp))),
+ forced_sign(forced_sign) {}
+
+ LIBC_INLINE T operator()(RandomGenerator &rng) const noexcept {
+ // Sample unbiased exponent e uniformly in [min_exp, max_exp] without modulo
+ // bias, using rejection sampling
+ auto sample_in_range = [&](uint64_t r) -> int32_t {
+ const uint64_t range = static_cast<uint64_t>(
+ static_cast<int64_t>(max_exp) - static_cast<int64_t>(min_exp) + 1);
+ const uint64_t threshold = (-range) % range;
+ while (r < threshold)
+ r = rng.next64();
+ return static_cast<int32_t>(min_exp + static_cast<int64_t>(r % range));
+ };
+ const int32_t e = sample_in_range(rng.next64());
+
+ // Start from random bits to get random sign and mantissa
+ FPBits xbits([&] {
+ if constexpr (cpp::is_same_v<T, double>)
+ return FPBits(rng.next64());
+ else if constexpr (cpp::is_same_v<T, float>)
+ return FPBits(rng.next32());
+ else
+ return FPBits(rng.next16());
+ }());
+
+ if (e == -FPBits::EXP_BIAS) {
+ // Subnormal: biased exponent must be 0; ensure mantissa != 0 to avoid 0
+ xbits.set_biased_exponent(Storage(0));
+ if (xbits.get_mantissa() == Storage(0))
+ xbits.set_mantissa(Storage(1));
+ } else {
+ // Normal: biased exponent in [1, 2 * FPBits::EXP_BIAS]
+ const int32_t biased = e + FPBits::EXP_BIAS;
+ xbits.set_biased_exponent(static_cast<Storage>(biased));
+ }
+
+ if (forced_sign)
+ xbits.set_sign(*forced_sign);
+
+ return xbits.get_val();
+ }
+
+private:
+ static LIBC_INLINE int clamp_exponent(int val) noexcept {
+ if (val < -FPBits::EXP_BIAS)
+ return -FPBits::EXP_BIAS;
+
+ if (val > FPBits::EXP_BIAS)
+ return FPBits::EXP_BIAS;
+
+ return val;
+ }
+
+ const int min_exp;
+ const int max_exp;
+ const cpp::optional<Sign> forced_sign;
+};
+
+// Generates random floating-point numbers that are uniformly distributed on
+// a linear scale. Values are sampled from `[min_val, max_val)`.
+template <typename T> class UniformLinear {
+ static_assert(cpp::is_same_v<T, float16> || cpp::is_same_v<T, float> ||
+ cpp::is_same_v<T, double>,
+ "UniformLinear supports float16, float, and double");
+
+ using FPBits = LIBC_NAMESPACE::fputil::FPBits<T>;
+ using Storage = typename FPBits::StorageType;
+
+ static constexpr T MAX_NORMAL = FPBits::max_normal().get_val();
+
+public:
+ explicit UniformLinear(T min_val = -MAX_NORMAL, T max_val = MAX_NORMAL)
+ : min_val(clamp_val(cpp::min(min_val, max_val))),
+ max_val(clamp_val(cpp::max(min_val, max_val))) {}
+
+ LIBC_INLINE T operator()(RandomGenerator &rng) const noexcept {
+ double u = standard_uniform(rng.next64());
+ double a = static_cast<double>(min_val);
+ double b = static_cast<double>(max_val);
+ double y = a + (b - a) * u;
+ return static_cast<T>(y);
+ }
+
+private:
+ static LIBC_INLINE T clamp_val(T val) noexcept {
+ if (val < -MAX_NORMAL)
+ return -MAX_NORMAL;
+
+ if (val > MAX_NORMAL)
+ return MAX_NORMAL;
+
+ return val;
+ }
+
+ static LIBC_INLINE double standard_uniform(uint64_t x) noexcept {
+ constexpr int PREC_BITS =
+ LIBC_NAMESPACE::fputil::FPBits<double>::SIG_LEN + 1;
+ constexpr int SHIFT_BITS = LIBC_NAMESPACE::fputil::FPBits<double>::EXP_LEN;
+ constexpr double INV = 1.0 / static_cast<double>(1ULL << PREC_BITS);
+
+ return static_cast<double>(x >> SHIFT_BITS) * INV;
+ }
+
+ const T min_val;
+ const T max_val;
+};
+
+} // namespace benchmarks
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif
diff --git a/libc/benchmarks/gpu/src/math/CMakeLists.txt b/libc/benchmarks/gpu/src/math/CMakeLists.txt
index 8417f23c124a..53da45d9eb2b 100644
--- a/libc/benchmarks/gpu/src/math/CMakeLists.txt
+++ b/libc/benchmarks/gpu/src/math/CMakeLists.txt
@@ -25,15 +25,19 @@ if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
endif()
add_benchmark(
- sin_benchmark
+ atan2_benchmark
SUITE
libc-gpu-math-benchmarks
SRCS
- sin_benchmark.cpp
+ atan2_benchmark.cpp
+ HDRS
+ platform.h
DEPENDS
libc.hdr.stdint_proxy
- libc.src.math.sin
- libc.src.math.sinf
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+ libc.src.math.atan2
COMPILE_OPTIONS
${math_benchmark_flags}
LOADER_ARGS
@@ -41,14 +45,143 @@ add_benchmark(
)
add_benchmark(
- atan2_benchmark
+ exp_benchmark
SUITE
libc-gpu-math-benchmarks
SRCS
- atan2_benchmark.cpp
+ exp_benchmark.cpp
+ HDRS
+ platform.h
DEPENDS
libc.hdr.stdint_proxy
- libc.src.math.atan2
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+ libc.src.math.exp
+ COMPILE_OPTIONS
+ ${math_benchmark_flags}
+ LOADER_ARGS
+ --threads 64
+)
+
+add_benchmark(
+ expf_benchmark
+ SUITE
+ libc-gpu-math-benchmarks
+ SRCS
+ expf_benchmark.cpp
+ HDRS
+ platform.h
+ DEPENDS
+ libc.hdr.stdint_proxy
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+ libc.src.math.expf
+ COMPILE_OPTIONS
+ ${math_benchmark_flags}
+ LOADER_ARGS
+ --threads 64
+)
+
+add_benchmark(
+ expf16_benchmark
+ SUITE
+ libc-gpu-math-benchmarks
+ SRCS
+ expf16_benchmark.cpp
+ HDRS
+ platform.h
+ DEPENDS
+ libc.hdr.stdint_proxy
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+ libc.src.math.expf16
+ COMPILE_OPTIONS
+ ${math_benchmark_flags}
+ LOADER_ARGS
+ --threads 64
+)
+
+add_benchmark(
+ log_benchmark
+ SUITE
+ libc-gpu-math-benchmarks
+ SRCS
+ log_benchmark.cpp
+ HDRS
+ platform.h
+ DEPENDS
+ libc.hdr.stdint_proxy
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+ libc.src.__support.sign
+ libc.src.math.log
+ COMPILE_OPTIONS
+ ${math_benchmark_flags}
+ LOADER_ARGS
+ --threads 64
+)
+
+add_benchmark(
+ logf_benchmark
+ SUITE
+ libc-gpu-math-benchmarks
+ SRCS
+ logf_benchmark.cpp
+ HDRS
+ platform.h
+ DEPENDS
+ libc.hdr.stdint_proxy
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+ libc.src.__support.sign
+ libc.src.math.logf
+ COMPILE_OPTIONS
+ ${math_benchmark_flags}
+ LOADER_ARGS
+ --threads 64
+)
+
+add_benchmark(
+ logf16_benchmark
+ SUITE
+ libc-gpu-math-benchmarks
+ SRCS
+ logf16_benchmark.cpp
+ HDRS
+ platform.h
+ DEPENDS
+ libc.hdr.stdint_proxy
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+ libc.src.__support.sign
+ libc.src.math.logf16
+ COMPILE_OPTIONS
+ ${math_benchmark_flags}
+ LOADER_ARGS
+ --threads 64
+)
+
+add_benchmark(
+ sin_benchmark
+ SUITE
+ libc-gpu-math-benchmarks
+ SRCS
+ sin_benchmark.cpp
+ HDRS
+ platform.h
+ DEPENDS
+ libc.hdr.stdint_proxy
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+ libc.src.math.sin
+ libc.src.math.sinf
COMPILE_OPTIONS
${math_benchmark_flags}
LOADER_ARGS
diff --git a/libc/benchmarks/gpu/src/math/atan2_benchmark.cpp b/libc/benchmarks/gpu/src/math/atan2_benchmark.cpp
index 82bb0c5d7de4..6039f0c66b2a 100644
--- a/libc/benchmarks/gpu/src/math/atan2_benchmark.cpp
+++ b/libc/benchmarks/gpu/src/math/atan2_benchmark.cpp
@@ -9,8 +9,11 @@
#define BM_RANDOM_INPUTS(T, Func, MinExp, MaxExp, N) \
[](uint32_t call_index) { \
- return LIBC_NAMESPACE::benchmarks::MathPerf<T>::run_throughput_in_range< \
- N>(Func, MinExp, MaxExp, MinExp, MaxExp, call_index); \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const UniformExponent<T> dist(MinExp, MaxExp); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, dist, \
+ call_index); \
}
#define BENCH(T, Name, Func, MinExp, MaxExp) \
diff --git a/libc/benchmarks/gpu/src/math/exp_benchmark.cpp b/libc/benchmarks/gpu/src/math/exp_benchmark.cpp
new file mode 100644
index 000000000000..2398c4b9f17b
--- /dev/null
+++ b/libc/benchmarks/gpu/src/math/exp_benchmark.cpp
@@ -0,0 +1,59 @@
+//===-- GPU benchmark for exp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmarks/gpu/LibcGpuBenchmark.h"
+#include "benchmarks/gpu/Random.h"
+
+#include "hdr/stdint_proxy.h"
+#include "src/math/exp.h"
+
+#if defined(NVPTX_MATH_FOUND) || defined(AMDGPU_MATH_FOUND)
+#include "platform.h"
+#endif
+
+#define RANDOM_INPUT(T, Func, Dist, Min, Max, N) \
+ [](uint32_t call_index) { \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const Dist<T> dist(Min, Max); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, call_index); \
+ }
+
+#define BENCH(T, Name, Func, Dist, Min, Max) \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpGpuBenchmark, Name##_1, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 1)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpGpuBenchmark, Name##_128, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 128)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpGpuBenchmark, Name##_1024, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 1024)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpGpuBenchmark, Name##_4096, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 4096))
+
+using LIBC_NAMESPACE::exp;
+
+BENCH(double, ExpSubnormal, exp, UniformExponent, -1022, -1022);
+BENCH(double, ExpCoreRange, exp, UniformLinear, -10.0, 10.0);
+BENCH(double, ExpFinite, exp, UniformLinear, -745.0, 709.0);
+BENCH(double, ExpUnderflow, exp, UniformLinear, -746.0, -745.0);
+BENCH(double, ExpOverflow, exp, UniformLinear, 709.0, 710.0);
+
+#ifdef NVPTX_MATH_FOUND
+BENCH(double, NvExpSubnormal, __nv_exp, UniformExponent, -1022, -1022);
+BENCH(double, NvExpCoreRange, __nv_exp, UniformLinear, -10.0, 10.0);
+BENCH(double, NvExpFinite, __nv_exp, UniformLinear, -745.0, 709.0);
+BENCH(double, NvExpUnderflow, __nv_exp, UniformLinear, -746.0, -745.0);
+BENCH(double, NvExpOverflow, __nv_exp, UniformLinear, 709.0, 710.0);
+#endif
+
+#ifdef AMDGPU_MATH_FOUND
+BENCH(double, AmdExpSubnormal, __ocml_exp_f64, UniformExponent, -1022, -1022);
+BENCH(double, AmdExpCoreRange, __ocml_exp_f64, UniformLinear, -10.0, 10.0);
+BENCH(double, AmdExpFinite, __ocml_exp_f64, UniformLinear, -745.0, 709.0);
+BENCH(double, AmdExpUnderflow, __ocml_exp_f64, UniformLinear, -746.0, -745.0);
+BENCH(double, AmdExpOverflow, __ocml_exp_f64, UniformLinear, 709.0, 710.0);
+#endif
diff --git a/libc/benchmarks/gpu/src/math/expf16_benchmark.cpp b/libc/benchmarks/gpu/src/math/expf16_benchmark.cpp
new file mode 100644
index 000000000000..20e045b893ec
--- /dev/null
+++ b/libc/benchmarks/gpu/src/math/expf16_benchmark.cpp
@@ -0,0 +1,56 @@
+//===-- GPU benchmark for expf16 ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmarks/gpu/LibcGpuBenchmark.h"
+#include "benchmarks/gpu/Random.h"
+
+#include "hdr/stdint_proxy.h"
+#include "src/__support/macros/properties/types.h"
+#include "src/math/expf16.h"
+
+#if defined(NVPTX_MATH_FOUND) || defined(AMDGPU_MATH_FOUND)
+#include "platform.h"
+#endif
+
+#define RANDOM_INPUT(T, Func, Dist, Min, Max, N) \
+ [](uint32_t call_index) { \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const Dist<T> dist(Min, Max); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, call_index); \
+ }
+
+#define BENCH(T, Name, Func, Dist, Min, Max) \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpf16GpuBenchmark, Name##_1, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 1)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpf16GpuBenchmark, Name##_128, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 128)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpf16GpuBenchmark, Name##_1024, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 1024)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpf16GpuBenchmark, Name##_4096, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 4096))
+
+using LIBC_NAMESPACE::expf16;
+
+BENCH(float16, Expf16Subnormal, expf16, UniformExponent, -14, -14);
+BENCH(float16, Expf16CoreRange, expf16, UniformLinear, -10.0f16, 10.0f16);
+BENCH(float16, Expf16Finite, expf16, UniformLinear, -16.0f16, 11.0f16);
+BENCH(float16, Expf16Underflow, expf16, UniformLinear, -17.0f16, -16.0f16);
+BENCH(float16, Expf16Overflow, expf16, UniformLinear, 11.0f16, 12.0f16);
+
+#ifdef AMDGPU_MATH_FOUND
+BENCH(float16, AmdExpf16Subnormal, __ocml_exp_f16, UniformExponent, -14, -14);
+BENCH(float16, AmdExpf16CoreRange, __ocml_exp_f16, UniformLinear, -10.0f16,
+ 10.0f16);
+BENCH(float16, AmdExpf16Finite, __ocml_exp_f16, UniformLinear, -16.0f16,
+ 11.0f16);
+BENCH(float16, AmdExpf16Underflow, __ocml_exp_f16, UniformLinear, -17.0f16,
+ -16.0f16);
+BENCH(float16, AmdExpf16Overflow, __ocml_exp_f16, UniformLinear, 11.0f16,
+ 12.0f16);
+#endif
diff --git a/libc/benchmarks/gpu/src/math/expf_benchmark.cpp b/libc/benchmarks/gpu/src/math/expf_benchmark.cpp
new file mode 100644
index 000000000000..4ef54c53baf4
--- /dev/null
+++ b/libc/benchmarks/gpu/src/math/expf_benchmark.cpp
@@ -0,0 +1,59 @@
+//===-- GPU benchmark for expf --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmarks/gpu/LibcGpuBenchmark.h"
+#include "benchmarks/gpu/Random.h"
+
+#include "hdr/stdint_proxy.h"
+#include "src/math/expf.h"
+
+#if defined(NVPTX_MATH_FOUND) || defined(AMDGPU_MATH_FOUND)
+#include "platform.h"
+#endif
+
+#define RANDOM_INPUT(T, Func, Dist, Min, Max, N) \
+ [](uint32_t call_index) { \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const Dist<T> dist(Min, Max); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, call_index); \
+ }
+
+#define BENCH(T, Name, Func, Dist, Min, Max) \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpfGpuBenchmark, Name##_1, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 1)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpfGpuBenchmark, Name##_128, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 128)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpfGpuBenchmark, Name##_1024, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 1024)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcExpfGpuBenchmark, Name##_4096, \
+ RANDOM_INPUT(T, Func, Dist, Min, Max, 4096))
+
+using LIBC_NAMESPACE::expf;
+
+BENCH(float, ExpfSubnormal, expf, UniformExponent, -126, -126);
+BENCH(float, ExpfCoreRange, expf, UniformLinear, -10.0f, 10.0f);
+BENCH(float, ExpfFinite, expf, UniformLinear, -103.0f, 88.0f);
+BENCH(float, ExpfUnderflow, expf, UniformLinear, -104.0f, -103.0f);
+BENCH(float, ExpfOverflow, expf, UniformLinear, 88.0f, 89.0f);
+
+#ifdef NVPTX_MATH_FOUND
+BENCH(float, NvExpfSubnormal, __nv_expf, UniformExponent, -126, -126);
+BENCH(float, NvExpfCoreRange, __nv_expf, UniformLinear, -10.0f, 10.0f);
+BENCH(float, NvExpfFinite, __nv_expf, UniformLinear, -103.0f, 88.0f);
+BENCH(float, NvExpfUnderflow, __nv_expf, UniformLinear, -104.0f, -103.0f);
+BENCH(float, NvExpfOverflow, __nv_expf, UniformLinear, 88.0f, 89.0f);
+#endif
+
+#ifdef AMDGPU_MATH_FOUND
+BENCH(float, AmdExpfSubnormal, __ocml_exp_f32, UniformExponent, -126, -126);
+BENCH(float, AmdExpfCoreRange, __ocml_exp_f32, UniformLinear, -10.0f, 10.0f);
+BENCH(float, AmdExpfFinite, __ocml_exp_f32, UniformLinear, -103.0f, 88.0f);
+BENCH(float, AmdExpfUnderflow, __ocml_exp_f32, UniformLinear, -104.0f, -103.0f);
+BENCH(float, AmdExpfOverflow, __ocml_exp_f32, UniformLinear, 88.0f, 89.0f);
+#endif
diff --git a/libc/benchmarks/gpu/src/math/log_benchmark.cpp b/libc/benchmarks/gpu/src/math/log_benchmark.cpp
new file mode 100644
index 000000000000..0ea1906ff053
--- /dev/null
+++ b/libc/benchmarks/gpu/src/math/log_benchmark.cpp
@@ -0,0 +1,68 @@
+//===-- GPU benchmark for log ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmarks/gpu/LibcGpuBenchmark.h"
+#include "benchmarks/gpu/Random.h"
+
+#include "hdr/stdint_proxy.h"
+#include "src/__support/sign.h"
+#include "src/math/log.h"
+
+#if defined(NVPTX_MATH_FOUND) || defined(AMDGPU_MATH_FOUND)
+#include "platform.h"
+#endif
+
+#define RANDOM_INPUT_UniformExponent(T, Func, Min, Max, N) \
+ [](uint32_t call_index) { \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const UniformExponent<T> dist(Min, Max, LIBC_NAMESPACE::Sign::POS); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, call_index); \
+ }
+
+#define RANDOM_INPUT_UniformLinear(T, Func, Min, Max, N) \
+ [](uint32_t call_index) { \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const UniformLinear<T> dist(Min, Max); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, call_index); \
+ }
+
+#define BENCH(T, Name, Func, Dist, Min, Max) \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogGpuBenchmark, Name##_1, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 1)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogGpuBenchmark, Name##_128, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 128)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogGpuBenchmark, Name##_1024, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 1024)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogGpuBenchmark, Name##_4096, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 4096))
+
+using LIBC_NAMESPACE::log;
+
+static constexpr double INV_E = 0x1.78b56362cef38p-2; // exp(-1.0)
+static constexpr double E = 0x1.5bf0a8b145769p+1; // exp(+1.0)
+
+BENCH(double, LogSubnormal, log, UniformExponent, -1022, -1022);
+BENCH(double, LogAroundOne, log, UniformLinear, INV_E, E);
+BENCH(double, LogMedMag, log, UniformExponent, -10, 10);
+BENCH(double, LogNormal, log, UniformExponent, -1021, 1023);
+
+#ifdef NVPTX_MATH_FOUND
+BENCH(double, NvLogSubnormal, __nv_log, UniformExponent, -1022, -1022);
+BENCH(double, NvLogAroundOne, __nv_log, UniformLinear, INV_E, E);
+BENCH(double, NvLogMedMag, __nv_log, UniformExponent, -10, 10);
+BENCH(double, NvLogNormal, __nv_log, UniformExponent, -1021, 1023);
+#endif
+
+#ifdef AMDGPU_MATH_FOUND
+BENCH(double, AmdLogSubnormal, __ocml_log_f64, UniformExponent, -1022, -1022);
+BENCH(double, AmdLogAroundOne, __ocml_log_f64, UniformLinear, INV_E, E);
+BENCH(double, AmdLogMedMag, __ocml_log_f64, UniformExponent, -10, 10);
+BENCH(double, AmdLogNormal, __ocml_log_f64, UniformExponent, -1021, 1023);
+#endif
diff --git a/libc/benchmarks/gpu/src/math/logf16_benchmark.cpp b/libc/benchmarks/gpu/src/math/logf16_benchmark.cpp
new file mode 100644
index 000000000000..9748e15c4640
--- /dev/null
+++ b/libc/benchmarks/gpu/src/math/logf16_benchmark.cpp
@@ -0,0 +1,62 @@
+//===-- GPU benchmark for logf16 ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmarks/gpu/LibcGpuBenchmark.h"
+#include "benchmarks/gpu/Random.h"
+
+#include "hdr/stdint_proxy.h"
+#include "src/__support/macros/properties/types.h"
+#include "src/__support/sign.h"
+#include "src/math/logf16.h"
+
+#if defined(NVPTX_MATH_FOUND) || defined(AMDGPU_MATH_FOUND)
+#include "platform.h"
+#endif
+
+#define RANDOM_INPUT_UniformExponent(T, Func, Min, Max, N) \
+ [](uint32_t call_index) { \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const UniformExponent<T> dist(Min, Max, LIBC_NAMESPACE::Sign::POS); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, call_index); \
+ }
+
+#define RANDOM_INPUT_UniformLinear(T, Func, Min, Max, N) \
+ [](uint32_t call_index) { \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const UniformLinear<T> dist(Min, Max); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, call_index); \
+ }
+
+#define BENCH(T, Name, Func, Dist, Min, Max) \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogf16GpuBenchmark, Name##_1, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 1)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogf16GpuBenchmark, Name##_128, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 128)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogf16GpuBenchmark, Name##_1024, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 1024)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogf16GpuBenchmark, Name##_4096, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 4096))
+
+using LIBC_NAMESPACE::logf16;
+
+static constexpr float16 INV_E = 0x1.78b56362cef38p-2f16; // exp(-1.0)
+static constexpr float16 E = 0x1.5bf0a8b145769p+1f16; // exp(+1.0)
+
+BENCH(float16, Logf16Subnormal, logf16, UniformExponent, -14, -14);
+BENCH(float16, Logf16AroundOne, logf16, UniformLinear, INV_E, E);
+BENCH(float16, Logf16MedMag, logf16, UniformExponent, -10, 10);
+BENCH(float16, Logf16Normal, logf16, UniformExponent, -13, 15);
+
+#ifdef AMDGPU_MATH_FOUND
+BENCH(float16, AmdLogf16Subnormal, __ocml_log_f16, UniformExponent, -14, -14);
+BENCH(float16, AmdLogf16AroundOne, __ocml_log_f16, UniformLinear, INV_E, E);
+BENCH(float16, AmdLogf16MedMag, __ocml_log_f16, UniformExponent, -10, 10);
+BENCH(float16, AmdLogf16Normal, __ocml_log_f16, UniformExponent, -13, 15);
+#endif
diff --git a/libc/benchmarks/gpu/src/math/logf_benchmark.cpp b/libc/benchmarks/gpu/src/math/logf_benchmark.cpp
new file mode 100644
index 000000000000..c4e5a226a18f
--- /dev/null
+++ b/libc/benchmarks/gpu/src/math/logf_benchmark.cpp
@@ -0,0 +1,68 @@
+//===-- GPU benchmark for logf --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmarks/gpu/LibcGpuBenchmark.h"
+#include "benchmarks/gpu/Random.h"
+
+#include "hdr/stdint_proxy.h"
+#include "src/__support/sign.h"
+#include "src/math/logf.h"
+
+#if defined(NVPTX_MATH_FOUND) || defined(AMDGPU_MATH_FOUND)
+#include "platform.h"
+#endif
+
+#define RANDOM_INPUT_UniformExponent(T, Func, Min, Max, N) \
+ [](uint32_t call_index) { \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const UniformExponent<T> dist(Min, Max, LIBC_NAMESPACE::Sign::POS); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, call_index); \
+ }
+
+#define RANDOM_INPUT_UniformLinear(T, Func, Min, Max, N) \
+ [](uint32_t call_index) { \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const UniformLinear<T> dist(Min, Max); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, call_index); \
+ }
+
+#define BENCH(T, Name, Func, Dist, Min, Max) \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogfGpuBenchmark, Name##_1, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 1)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogfGpuBenchmark, Name##_128, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 128)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogfGpuBenchmark, Name##_1024, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 1024)); \
+ SINGLE_WAVE_BENCHMARK(LlvmLibcLogfGpuBenchmark, Name##_4096, \
+ RANDOM_INPUT_##Dist(T, Func, Min, Max, 4096))
+
+using LIBC_NAMESPACE::logf;
+
+static constexpr float INV_E = 0x1.78b56362cef38p-2f; // exp(-1.0)
+static constexpr float E = 0x1.5bf0a8b145769p+1f; // exp(+1.0)
+
+BENCH(float, LogfSubnormal, logf, UniformExponent, -126, -126);
+BENCH(float, LogfAroundOne, logf, UniformLinear, INV_E, E);
+BENCH(float, LogfMedMag, logf, UniformExponent, -10, 10);
+BENCH(float, LogfNormal, logf, UniformExponent, -125, 127);
+
+#ifdef NVPTX_MATH_FOUND
+BENCH(float, NvLogfSubnormal, __nv_logf, UniformExponent, -126, -126);
+BENCH(float, NvLogfAroundOne, __nv_logf, UniformLinear, INV_E, E);
+BENCH(float, NvLogfMedMag, __nv_logf, UniformExponent, -10, 10);
+BENCH(float, NvLogfNormal, __nv_logf, UniformExponent, -125, 127);
+#endif
+
+#ifdef AMDGPU_MATH_FOUND
+BENCH(float, AmdLogfSubnormal, __ocml_log_f32, UniformExponent, -126, -126);
+BENCH(float, AmdLogfAroundOne, __ocml_log_f32, UniformLinear, INV_E, E);
+BENCH(float, AmdLogfMedMag, __ocml_log_f32, UniformExponent, -10, 10);
+BENCH(float, AmdLogfNormal, __ocml_log_f32, UniformExponent, -125, 127);
+#endif
diff --git a/libc/benchmarks/gpu/src/math/platform.h b/libc/benchmarks/gpu/src/math/platform.h
index 2dfa9f2299d4..e675d1e7b0d1 100644
--- a/libc/benchmarks/gpu/src/math/platform.h
+++ b/libc/benchmarks/gpu/src/math/platform.h
@@ -11,6 +11,7 @@
#include "hdr/stdint_proxy.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
namespace LIBC_NAMESPACE_DECL {
@@ -41,17 +42,27 @@ extern const LIBC_INLINE_VAR uint32_t __oclc_ISA_version = 9000;
// Forward declarations for the vendor math libraries.
extern "C" {
#ifdef AMDGPU_MATH_FOUND
-double __ocml_sin_f64(double);
-float __ocml_sin_f32(float);
double __ocml_atan2_f64(double, double);
float __ocml_atan2_f32(float, float);
+double __ocml_exp_f64(double);
+float __ocml_exp_f32(float);
+float16 __ocml_exp_f16(float16);
+double __ocml_log_f64(double);
+float __ocml_log_f32(float);
+float16 __ocml_log_f16(float16);
+double __ocml_sin_f64(double);
+float __ocml_sin_f32(float);
#endif
#ifdef NVPTX_MATH_FOUND
-double __nv_sin(double);
-float __nv_sinf(float);
double __nv_atan2(double, double);
float __nv_atan2f(float, float);
+double __nv_exp(double);
+float __nv_expf(float);
+double __nv_log(double);
+float __nv_logf(float);
+double __nv_sin(double);
+float __nv_sinf(float);
#endif
}
diff --git a/libc/benchmarks/gpu/src/math/sin_benchmark.cpp b/libc/benchmarks/gpu/src/math/sin_benchmark.cpp
index 5fe95c3f3b26..5ed82c845dec 100644
--- a/libc/benchmarks/gpu/src/math/sin_benchmark.cpp
+++ b/libc/benchmarks/gpu/src/math/sin_benchmark.cpp
@@ -1,4 +1,5 @@
#include "benchmarks/gpu/LibcGpuBenchmark.h"
+#include "benchmarks/gpu/Random.h"
#include "hdr/stdint_proxy.h"
#include "src/math/sin.h"
@@ -10,8 +11,10 @@
#define BM_RANDOM_INPUT(T, Func, MinExp, MaxExp, N) \
[](uint32_t call_index) { \
- return LIBC_NAMESPACE::benchmarks::MathPerf<T>::run_throughput_in_range< \
- N>(Func, MinExp, MaxExp, call_index); \
+ using namespace LIBC_NAMESPACE::benchmarks; \
+ \
+ const UniformExponent<T> dist(MinExp, MaxExp); \
+ return MathPerf<T>::template run_throughput<N>(Func, dist, call_index); \
}
#define BENCH(T, Name, Func, MinExp, MaxExp) \