summaryrefslogtreecommitdiff
path: root/libc/src/__support
diff options
context:
space:
mode:
authorNAKAMURA Takumi <geek4civic@gmail.com>2025-01-09 18:49:54 +0900
committerNAKAMURA Takumi <geek4civic@gmail.com>2025-01-09 18:49:54 +0900
commite2810c9a248f4c7fbfae84bb32b6f7e01027458b (patch)
treeae0b02a8491b969a1cee94ea16ffe42c559143c5 /libc/src/__support
parentfa04eb4af95c1ca7377279728cb004bcd2324d01 (diff)
parentbdcf47e4bcb92889665825654bb80a8bbe30379e (diff)
Merge branch 'users/chapuni/cov/single/base' into users/chapuni/cov/single/switchusers/chapuni/cov/single/switch
Diffstat (limited to 'libc/src/__support')
-rw-r--r--libc/src/__support/CMakeLists.txt2
-rw-r--r--libc/src/__support/File/file.cpp68
-rw-r--r--libc/src/__support/File/file.h4
-rw-r--r--libc/src/__support/GPU/CMakeLists.txt10
-rw-r--r--libc/src/__support/GPU/amdgpu/CMakeLists.txt7
-rw-r--r--libc/src/__support/GPU/amdgpu/utils.h183
-rw-r--r--libc/src/__support/GPU/generic/CMakeLists.txt7
-rw-r--r--libc/src/__support/GPU/generic/utils.h84
-rw-r--r--libc/src/__support/GPU/nvptx/CMakeLists.txt7
-rw-r--r--libc/src/__support/GPU/nvptx/utils.h160
-rw-r--r--libc/src/__support/GPU/utils.h108
-rw-r--r--libc/src/__support/fixedvector.h54
-rw-r--r--libc/src/__support/threads/thread.cpp4
13 files changed, 190 insertions, 508 deletions
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 4e90aad9a45b..5090dc218cda 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -267,7 +267,9 @@ add_header_library(
HDRS
fixedvector.h
DEPENDS
+ .libc_assert
libc.src.__support.CPP.array
+ libc.src.string.memory_utils.inline_memset
)
add_header_library(
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 972249fef96b..528542cccf32 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -42,7 +42,7 @@ FileIOResult File::write_unlocked_nbf(const uint8_t *data, size_t len) {
if (pos > 0) { // If the buffer is not empty
// Flush the buffer
const size_t write_size = pos;
- auto write_result = platform_write(this, buf, write_size);
+ FileIOResult write_result = platform_write(this, buf, write_size);
pos = 0; // Buffer is now empty so reset pos to the beginning.
// If less bytes were written than expected, then an error occurred.
if (write_result < write_size) {
@@ -52,7 +52,7 @@ FileIOResult File::write_unlocked_nbf(const uint8_t *data, size_t len) {
}
}
- auto write_result = platform_write(this, data, len);
+ FileIOResult write_result = platform_write(this, data, len);
if (write_result < len)
err = true;
return write_result;
@@ -99,7 +99,7 @@ FileIOResult File::write_unlocked_fbf(const uint8_t *data, size_t len) {
// is full.
const size_t write_size = pos;
- auto buf_result = platform_write(this, buf, write_size);
+ FileIOResult buf_result = platform_write(this, buf, write_size);
size_t bytes_written = buf_result.value;
pos = 0; // Buffer is now empty so reset pos to the beginning.
@@ -121,7 +121,8 @@ FileIOResult File::write_unlocked_fbf(const uint8_t *data, size_t len) {
pos = remainder.size();
} else {
- auto result = platform_write(this, remainder.data(), remainder.size());
+ FileIOResult result =
+ platform_write(this, remainder.data(), remainder.size());
size_t bytes_written = buf_result.value;
// If less bytes were written than expected, then an error occurred. Return
@@ -190,6 +191,17 @@ FileIOResult File::read_unlocked(void *data, size_t len) {
prev_op = FileOp::READ;
+ if (bufmode == _IONBF) { // unbuffered.
+ return read_unlocked_nbf(static_cast<uint8_t *>(data), len);
+ } else if (bufmode == _IOFBF) { // fully buffered
+ return read_unlocked_fbf(static_cast<uint8_t *>(data), len);
+ } else /*if (bufmode == _IOLBF) */ { // line buffered
+ // There is no line buffered mode for read. Use fully buffered instead.
+ return read_unlocked_fbf(static_cast<uint8_t *>(data), len);
+ }
+}
+
+size_t File::copy_data_from_buf(uint8_t *data, size_t len) {
cpp::span<uint8_t> bufref(static_cast<uint8_t *>(buf), bufsize);
cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data), len);
@@ -209,32 +221,42 @@ FileIOResult File::read_unlocked(void *data, size_t len) {
for (size_t i = 0; i < available_data; ++i)
dataref[i] = bufref[i + pos];
read_limit = pos = 0; // Reset the pointers.
+
+ return available_data;
+}
+
+FileIOResult File::read_unlocked_fbf(uint8_t *data, size_t len) {
+ // Read data from the buffer first.
+ size_t available_data = copy_data_from_buf(data, len);
+ if (available_data == len)
+ return available_data;
+
// Update the dataref to reflect that fact that we have already
// copied |available_data| into |data|.
- dataref = cpp::span<uint8_t>(dataref.data() + available_data,
- dataref.size() - available_data);
-
size_t to_fetch = len - available_data;
+ cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data) + available_data,
+ to_fetch);
+
if (to_fetch > bufsize) {
- auto result = platform_read(this, dataref.data(), to_fetch);
+ FileIOResult result = platform_read(this, dataref.data(), to_fetch);
size_t fetched_size = result.value;
if (result.has_error() || fetched_size < to_fetch) {
if (!result.has_error())
eof = true;
else
err = true;
- return {available_data + fetched_size, result.has_error()};
+ return {available_data + fetched_size, result.error};
}
return len;
}
// Fetch and buffer another buffer worth of data.
- auto result = platform_read(this, buf, bufsize);
+ FileIOResult result = platform_read(this, buf, bufsize);
size_t fetched_size = result.value;
read_limit += fetched_size;
size_t transfer_size = fetched_size >= to_fetch ? to_fetch : fetched_size;
for (size_t i = 0; i < transfer_size; ++i)
- dataref[i] = bufref[i];
+ dataref[i] = buf[i];
pos += transfer_size;
if (result.has_error() || fetched_size < to_fetch) {
if (!result.has_error())
@@ -245,6 +267,26 @@ FileIOResult File::read_unlocked(void *data, size_t len) {
return {transfer_size + available_data, result.error};
}
+FileIOResult File::read_unlocked_nbf(uint8_t *data, size_t len) {
+ // Check whether there is a character in the ungetc buffer.
+ size_t available_data = copy_data_from_buf(data, len);
+ if (available_data == len)
+ return available_data;
+
+ // Directly copy the data into |data|.
+ cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data) + available_data,
+ len - available_data);
+ FileIOResult result = platform_read(this, dataref.data(), dataref.size());
+
+ if (result.has_error() || result < dataref.size()) {
+ if (!result.has_error())
+ eof = true;
+ else
+ err = true;
+ }
+ return {result + available_data, result.error};
+}
+
int File::ungetc_unlocked(int c) {
// There is no meaning to unget if:
// 1. You are trying to push back EOF.
@@ -287,7 +329,7 @@ ErrorOr<int> File::seek(off_t offset, int whence) {
FileLock lock(this);
if (prev_op == FileOp::WRITE && pos > 0) {
- auto buf_result = platform_write(this, buf, pos);
+ FileIOResult buf_result = platform_write(this, buf, pos);
if (buf_result.has_error() || buf_result.value < pos) {
err = true;
return Error(buf_result.error);
@@ -325,7 +367,7 @@ ErrorOr<off_t> File::tell() {
int File::flush_unlocked() {
if (prev_op == FileOp::WRITE && pos > 0) {
- auto buf_result = platform_write(this, buf, pos);
+ FileIOResult buf_result = platform_write(this, buf, pos);
if (buf_result.has_error() || buf_result.value < pos) {
err = true;
return buf_result.error;
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 42e1d11b4ab1..5c97a9c6419f 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -280,6 +280,10 @@ private:
FileIOResult write_unlocked_fbf(const uint8_t *data, size_t len);
FileIOResult write_unlocked_nbf(const uint8_t *data, size_t len);
+ FileIOResult read_unlocked_fbf(uint8_t *data, size_t len);
+ FileIOResult read_unlocked_nbf(uint8_t *data, size_t len);
+ size_t copy_data_from_buf(uint8_t *data, size_t len);
+
constexpr void adjust_buf() {
if (read_allowed() && (buf == nullptr || bufsize == 0)) {
// We should allow atleast one ungetc operation.
diff --git a/libc/src/__support/GPU/CMakeLists.txt b/libc/src/__support/GPU/CMakeLists.txt
index 28fd9a1ebcc9..9b359f65cdb3 100644
--- a/libc/src/__support/GPU/CMakeLists.txt
+++ b/libc/src/__support/GPU/CMakeLists.txt
@@ -1,16 +1,12 @@
-if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE})
+# These utilities are GPU only.
+if(NOT LIBC_TARGET_OS_IS_GPU)
return()
endif()
-add_subdirectory(${LIBC_TARGET_ARCHITECTURE})
-set(target_gpu_utils libc.src.__support.GPU.${LIBC_TARGET_ARCHITECTURE}.${LIBC_TARGET_ARCHITECTURE}_utils)
-
add_header_library(
utils
HDRS
utils.h
- DEPENDS
- ${target_gpu_utils}
)
add_object_library(
@@ -21,6 +17,6 @@ add_object_library(
allocator.h
DEPENDS
libc.src.__support.common
- libc.src.__support.GPU.utils
libc.src.__support.RPC.rpc_client
+ .utils
)
diff --git a/libc/src/__support/GPU/amdgpu/CMakeLists.txt b/libc/src/__support/GPU/amdgpu/CMakeLists.txt
deleted file mode 100644
index f2b98fc03b21..000000000000
--- a/libc/src/__support/GPU/amdgpu/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_header_library(
- amdgpu_utils
- HDRS
- utils.h
- DEPENDS
- libc.src.__support.common
-)
diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h
deleted file mode 100644
index 6ab95403ca38..000000000000
--- a/libc/src/__support/GPU/amdgpu/utils.h
+++ /dev/null
@@ -1,183 +0,0 @@
-//===-------------- AMDGPU implementation of GPU utils ----------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_AMDGPU_IO_H
-#define LLVM_LIBC_SRC___SUPPORT_GPU_AMDGPU_IO_H
-
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-
-#include <stdint.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace gpu {
-
-/// Type aliases to the address spaces used by the AMDGPU backend.
-template <typename T> using Private = [[clang::opencl_private]] T;
-template <typename T> using Constant = [[clang::opencl_constant]] T;
-template <typename T> using Local = [[clang::opencl_local]] T;
-template <typename T> using Global = [[clang::opencl_global]] T;
-
-/// Returns the number of workgroups in the 'x' dimension of the grid.
-LIBC_INLINE uint32_t get_num_blocks_x() {
- return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
-}
-
-/// Returns the number of workgroups in the 'y' dimension of the grid.
-LIBC_INLINE uint32_t get_num_blocks_y() {
- return __builtin_amdgcn_grid_size_y() / __builtin_amdgcn_workgroup_size_y();
-}
-
-/// Returns the number of workgroups in the 'z' dimension of the grid.
-LIBC_INLINE uint32_t get_num_blocks_z() {
- return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z();
-}
-
-/// Returns the total number of workgruops in the grid.
-LIBC_INLINE uint64_t get_num_blocks() {
- return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z();
-}
-
-/// Returns the 'x' dimension of the current AMD workgroup's id.
-LIBC_INLINE uint32_t get_block_id_x() {
- return __builtin_amdgcn_workgroup_id_x();
-}
-
-/// Returns the 'y' dimension of the current AMD workgroup's id.
-LIBC_INLINE uint32_t get_block_id_y() {
- return __builtin_amdgcn_workgroup_id_y();
-}
-
-/// Returns the 'z' dimension of the current AMD workgroup's id.
-LIBC_INLINE uint32_t get_block_id_z() {
- return __builtin_amdgcn_workgroup_id_z();
-}
-
-/// Returns the absolute id of the AMD workgroup.
-LIBC_INLINE uint64_t get_block_id() {
- return get_block_id_x() + get_num_blocks_x() * get_block_id_y() +
- get_num_blocks_x() * get_num_blocks_y() * get_block_id_z();
-}
-
-/// Returns the number of workitems in the 'x' dimension.
-LIBC_INLINE uint32_t get_num_threads_x() {
- return __builtin_amdgcn_workgroup_size_x();
-}
-
-/// Returns the number of workitems in the 'y' dimension.
-LIBC_INLINE uint32_t get_num_threads_y() {
- return __builtin_amdgcn_workgroup_size_y();
-}
-
-/// Returns the number of workitems in the 'z' dimension.
-LIBC_INLINE uint32_t get_num_threads_z() {
- return __builtin_amdgcn_workgroup_size_z();
-}
-
-/// Returns the total number of workitems in the workgroup.
-LIBC_INLINE uint64_t get_num_threads() {
- return get_num_threads_x() * get_num_threads_y() * get_num_threads_z();
-}
-
-/// Returns the 'x' dimension id of the workitem in the current AMD workgroup.
-LIBC_INLINE uint32_t get_thread_id_x() {
- return __builtin_amdgcn_workitem_id_x();
-}
-
-/// Returns the 'y' dimension id of the workitem in the current AMD workgroup.
-LIBC_INLINE uint32_t get_thread_id_y() {
- return __builtin_amdgcn_workitem_id_y();
-}
-
-/// Returns the 'z' dimension id of the workitem in the current AMD workgroup.
-LIBC_INLINE uint32_t get_thread_id_z() {
- return __builtin_amdgcn_workitem_id_z();
-}
-
-/// Returns the absolute id of the thread in the current AMD workgroup.
-LIBC_INLINE uint64_t get_thread_id() {
- return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() +
- get_num_threads_x() * get_num_threads_y() * get_thread_id_z();
-}
-
-/// Returns the size of an AMD wavefront, either 32 or 64 depending on hardware
-/// and compilation options.
-LIBC_INLINE uint32_t get_lane_size() {
- return __builtin_amdgcn_wavefrontsize();
-}
-
-/// Returns the id of the thread inside of an AMD wavefront executing together.
-[[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() {
- return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
-
-/// Returns the bit-mask of active threads in the current wavefront.
-[[clang::convergent]] LIBC_INLINE uint64_t get_lane_mask() {
- return __builtin_amdgcn_read_exec();
-}
-
-/// Copies the value from the first active thread in the wavefront to the rest.
-[[clang::convergent]] LIBC_INLINE uint32_t broadcast_value(uint64_t,
- uint32_t x) {
- return __builtin_amdgcn_readfirstlane(x);
-}
-
-/// Returns a bitmask of threads in the current lane for which \p x is true.
-[[clang::convergent]] LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) {
- // the lane_mask & gives the nvptx semantics when lane_mask is a subset of
- // the active threads
- return lane_mask & __builtin_amdgcn_ballot_w64(x);
-}
-
-/// Waits for all the threads in the block to converge and issues a fence.
-[[clang::convergent]] LIBC_INLINE void sync_threads() {
- __builtin_amdgcn_s_barrier();
- __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
-}
-
-/// Waits for all pending memory operations to complete in program order.
-[[clang::convergent]] LIBC_INLINE void memory_fence() {
- __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
-}
-
-/// Wait for all threads in the wavefront to converge, this is a noop on AMDGPU.
-[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t) {
- __builtin_amdgcn_wave_barrier();
-}
-
-/// Shuffles the the lanes inside the wavefront according to the given index.
-[[clang::convergent]] LIBC_INLINE uint32_t shuffle(uint64_t, uint32_t idx,
- uint32_t x) {
- return __builtin_amdgcn_ds_bpermute(idx << 2, x);
-}
-
-/// Returns the current value of the GPU's processor clock.
-/// NOTE: The RDNA3 and RDNA2 architectures use a 20-bit cycle counter.
-LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }
-
-/// Returns a fixed-frequency timestamp. The actual frequency is dependent on
-/// the card and can only be queried via the driver.
-LIBC_INLINE uint64_t fixed_frequency_clock() {
- return __builtin_readsteadycounter();
-}
-
-/// Terminates execution of the associated wavefront.
-[[noreturn]] LIBC_INLINE void end_program() { __builtin_amdgcn_endpgm(); }
-
-/// Returns a unique identifier for the process cluster the current wavefront is
-/// executing on. Here we use the identifier for the compute unit (CU) and
-/// shader engine.
-/// FIXME: Currently unimplemented on AMDGPU until we have a simpler interface
-/// than the one at
-/// https://github.com/ROCm/clr/blob/develop/hipamd/include/hip/amd_detail/amd_device_functions.h#L899
-LIBC_INLINE uint32_t get_cluster_id() { return 0; }
-
-} // namespace gpu
-} // namespace LIBC_NAMESPACE_DECL
-
-#endif
diff --git a/libc/src/__support/GPU/generic/CMakeLists.txt b/libc/src/__support/GPU/generic/CMakeLists.txt
deleted file mode 100644
index 68ba7d1ec80e..000000000000
--- a/libc/src/__support/GPU/generic/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_header_library(
- generic_utils
- HDRS
- utils.h
- DEPENDS
- libc.src.__support.common
-)
diff --git a/libc/src/__support/GPU/generic/utils.h b/libc/src/__support/GPU/generic/utils.h
deleted file mode 100644
index 9461ef0aa245..000000000000
--- a/libc/src/__support/GPU/generic/utils.h
+++ /dev/null
@@ -1,84 +0,0 @@
-//===-------------- Generic implementation of GPU utils ---------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_GENERIC_UTILS_H
-#define LLVM_LIBC_SRC___SUPPORT_GPU_GENERIC_UTILS_H
-
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-
-#include <stdint.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace gpu {
-
-template <typename T> using Private = T;
-template <typename T> using Constant = T;
-template <typename T> using Shared = T;
-template <typename T> using Global = T;
-
-LIBC_INLINE uint32_t get_num_blocks_x() { return 1; }
-
-LIBC_INLINE uint32_t get_num_blocks_y() { return 1; }
-
-LIBC_INLINE uint32_t get_num_blocks_z() { return 1; }
-
-LIBC_INLINE uint64_t get_num_blocks() { return 1; }
-
-LIBC_INLINE uint32_t get_block_id_x() { return 0; }
-
-LIBC_INLINE uint32_t get_block_id_y() { return 0; }
-
-LIBC_INLINE uint32_t get_block_id_z() { return 0; }
-
-LIBC_INLINE uint64_t get_block_id() { return 0; }
-
-LIBC_INLINE uint32_t get_num_threads_x() { return 1; }
-
-LIBC_INLINE uint32_t get_num_threads_y() { return 1; }
-
-LIBC_INLINE uint32_t get_num_threads_z() { return 1; }
-
-LIBC_INLINE uint64_t get_num_threads() { return 1; }
-
-LIBC_INLINE uint32_t get_thread_id_x() { return 0; }
-
-LIBC_INLINE uint32_t get_thread_id_y() { return 0; }
-
-LIBC_INLINE uint32_t get_thread_id_z() { return 0; }
-
-LIBC_INLINE uint64_t get_thread_id() { return 0; }
-
-LIBC_INLINE uint32_t get_lane_size() { return 1; }
-
-LIBC_INLINE uint32_t get_lane_id() { return 0; }
-
-LIBC_INLINE uint64_t get_lane_mask() { return 1; }
-
-LIBC_INLINE uint32_t broadcast_value(uint64_t, uint32_t x) { return x; }
-
-LIBC_INLINE uint64_t ballot(uint64_t, bool x) { return x; }
-
-LIBC_INLINE void sync_threads() {}
-
-LIBC_INLINE void sync_lane(uint64_t) {}
-
-LIBC_INLINE uint32_t shuffle(uint64_t, uint32_t, uint32_t x) { return x; }
-
-LIBC_INLINE uint64_t processor_clock() { return 0; }
-
-LIBC_INLINE uint64_t fixed_frequency_clock() { return 0; }
-
-[[noreturn]] LIBC_INLINE void end_program() { __builtin_unreachable(); }
-
-LIBC_INLINE uint32_t get_cluster_id() { return 0; }
-
-} // namespace gpu
-} // namespace LIBC_NAMESPACE_DECL
-
-#endif // LLVM_LIBC_SRC___SUPPORT_GPU_GENERIC_UTILS_H
diff --git a/libc/src/__support/GPU/nvptx/CMakeLists.txt b/libc/src/__support/GPU/nvptx/CMakeLists.txt
deleted file mode 100644
index 0d3f8c7933c8..000000000000
--- a/libc/src/__support/GPU/nvptx/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_header_library(
- nvptx_utils
- HDRS
- utils.h
- DEPENDS
- libc.src.__support.common
-)
diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h
deleted file mode 100644
index 1a43a839a9ce..000000000000
--- a/libc/src/__support/GPU/nvptx/utils.h
+++ /dev/null
@@ -1,160 +0,0 @@
-//===-------------- NVPTX implementation of GPU utils -----------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-id: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_NVPTX_IO_H
-#define LLVM_LIBC_SRC___SUPPORT_GPU_NVPTX_IO_H
-
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-
-#include <stdint.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace gpu {
-
-/// Type aliases to the address spaces used by the NVPTX backend.
-template <typename T> using Private = [[clang::opencl_private]] T;
-template <typename T> using Constant = [[clang::opencl_constant]] T;
-template <typename T> using Local = [[clang::opencl_local]] T;
-template <typename T> using Global = [[clang::opencl_global]] T;
-
-/// Returns the number of CUDA blocks in the 'x' dimension.
-LIBC_INLINE uint32_t get_num_blocks_x() {
- return __nvvm_read_ptx_sreg_nctaid_x();
-}
-
-/// Returns the number of CUDA blocks in the 'y' dimension.
-LIBC_INLINE uint32_t get_num_blocks_y() {
- return __nvvm_read_ptx_sreg_nctaid_y();
-}
-
-/// Returns the number of CUDA blocks in the 'z' dimension.
-LIBC_INLINE uint32_t get_num_blocks_z() {
- return __nvvm_read_ptx_sreg_nctaid_z();
-}
-
-/// Returns the total number of CUDA blocks.
-LIBC_INLINE uint64_t get_num_blocks() {
- return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z();
-}
-
-/// Returns the 'x' dimension of the current CUDA block's id.
-LIBC_INLINE uint32_t get_block_id_x() { return __nvvm_read_ptx_sreg_ctaid_x(); }
-
-/// Returns the 'y' dimension of the current CUDA block's id.
-LIBC_INLINE uint32_t get_block_id_y() { return __nvvm_read_ptx_sreg_ctaid_y(); }
-
-/// Returns the 'z' dimension of the current CUDA block's id.
-LIBC_INLINE uint32_t get_block_id_z() { return __nvvm_read_ptx_sreg_ctaid_z(); }
-
-/// Returns the absolute id of the CUDA block.
-LIBC_INLINE uint64_t get_block_id() {
- return get_block_id_x() + get_num_blocks_x() * get_block_id_y() +
- get_num_blocks_x() * get_num_blocks_y() * get_block_id_z();
-}
-
-/// Returns the number of CUDA threads in the 'x' dimension.
-LIBC_INLINE uint32_t get_num_threads_x() {
- return __nvvm_read_ptx_sreg_ntid_x();
-}
-
-/// Returns the number of CUDA threads in the 'y' dimension.
-LIBC_INLINE uint32_t get_num_threads_y() {
- return __nvvm_read_ptx_sreg_ntid_y();
-}
-
-/// Returns the number of CUDA threads in the 'z' dimension.
-LIBC_INLINE uint32_t get_num_threads_z() {
- return __nvvm_read_ptx_sreg_ntid_z();
-}
-
-/// Returns the total number of threads in the block.
-LIBC_INLINE uint64_t get_num_threads() {
- return get_num_threads_x() * get_num_threads_y() * get_num_threads_z();
-}
-
-/// Returns the 'x' dimension id of the thread in the current CUDA block.
-LIBC_INLINE uint32_t get_thread_id_x() { return __nvvm_read_ptx_sreg_tid_x(); }
-
-/// Returns the 'y' dimension id of the thread in the current CUDA block.
-LIBC_INLINE uint32_t get_thread_id_y() { return __nvvm_read_ptx_sreg_tid_y(); }
-
-/// Returns the 'z' dimension id of the thread in the current CUDA block.
-LIBC_INLINE uint32_t get_thread_id_z() { return __nvvm_read_ptx_sreg_tid_z(); }
-
-/// Returns the absolute id of the thread in the current CUDA block.
-LIBC_INLINE uint64_t get_thread_id() {
- return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() +
- get_num_threads_x() * get_num_threads_y() * get_thread_id_z();
-}
-
-/// Returns the size of a CUDA warp, always 32 on NVIDIA hardware.
-LIBC_INLINE uint32_t get_lane_size() { return 32; }
-
-/// Returns the id of the thread inside of a CUDA warp executing together.
-[[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() {
- return __nvvm_read_ptx_sreg_laneid();
-}
-
-/// Returns the bit-mask of active threads in the current warp.
-[[clang::convergent]] LIBC_INLINE uint64_t get_lane_mask() {
- return __nvvm_activemask();
-}
-
-/// Copies the value from the first active thread in the warp to the rest.
-[[clang::convergent]] LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask,
- uint32_t x) {
- uint32_t mask = static_cast<uint32_t>(lane_mask);
- uint32_t id = __builtin_ffs(mask) - 1;
- return __nvvm_shfl_sync_idx_i32(mask, x, id, get_lane_size() - 1);
-}
-
-/// Returns a bitmask of threads in the current lane for which \p x is true.
-[[clang::convergent]] LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) {
- uint32_t mask = static_cast<uint32_t>(lane_mask);
- return __nvvm_vote_ballot_sync(mask, x);
-}
-
-/// Waits for all the threads in the block to converge and issues a fence.
-[[clang::convergent]] LIBC_INLINE void sync_threads() { __syncthreads(); }
-
-/// Waits for all pending memory operations to complete in program order.
-[[clang::convergent]] LIBC_INLINE void memory_fence() { __nvvm_membar_sys(); }
-
-/// Waits for all threads in the warp to reconverge for independent scheduling.
-[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t mask) {
- __nvvm_bar_warp_sync(static_cast<uint32_t>(mask));
-}
-
-/// Shuffles the the lanes inside the warp according to the given index.
-[[clang::convergent]] LIBC_INLINE uint32_t shuffle(uint64_t lane_mask,
- uint32_t idx, uint32_t x) {
- uint32_t mask = static_cast<uint32_t>(lane_mask);
- uint32_t bitmask = (mask >> idx) & 1;
- return -bitmask & __nvvm_shfl_sync_idx_i32(mask, x, idx, get_lane_size() - 1);
-}
-
-/// Returns the current value of the GPU's processor clock.
-LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }
-
-/// Returns a global fixed-frequency timer at nanosecond frequency.
-LIBC_INLINE uint64_t fixed_frequency_clock() {
- return __builtin_readsteadycounter();
-}
-
-/// Terminates execution of the calling thread.
-[[noreturn]] LIBC_INLINE void end_program() { __nvvm_exit(); }
-
-/// Returns a unique identifier for the process cluster the current warp is
-/// executing on. Here we use the identifier for the symmetric multiprocessor.
-LIBC_INLINE uint32_t get_cluster_id() { return __nvvm_read_ptx_sreg_smid(); }
-
-} // namespace gpu
-} // namespace LIBC_NAMESPACE_DECL
-
-#endif
diff --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h
index ae52e7a088ad..e138c84c0cb2 100644
--- a/libc/src/__support/GPU/utils.h
+++ b/libc/src/__support/GPU/utils.h
@@ -9,48 +9,108 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
#define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
+#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
-#if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
-#include "amdgpu/utils.h"
-#elif defined(LIBC_TARGET_ARCH_IS_NVPTX)
-#include "nvptx/utils.h"
-#else
-#include "generic/utils.h"
+#if !__has_include(<gpuintrin.h>)
+#error "Unsupported compiler"
#endif
+#include <gpuintrin.h>
+
namespace LIBC_NAMESPACE_DECL {
namespace gpu {
-/// Get the first active thread inside the lane.
-LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) {
- return __builtin_ffsll(lane_mask) - 1;
+
+template <typename T> using Private = __gpu_private T;
+template <typename T> using Constant = __gpu_constant T;
+template <typename T> using Local = __gpu_local T;
+template <typename T> using Global = __gpu_local T;
+
+LIBC_INLINE uint32_t get_num_blocks_x() { return __gpu_num_blocks(0); }
+
+LIBC_INLINE uint32_t get_num_blocks_y() { return __gpu_num_blocks(1); }
+
+LIBC_INLINE uint32_t get_num_blocks_z() { return __gpu_num_blocks(2); }
+
+LIBC_INLINE uint64_t get_num_blocks() {
+ return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z();
+}
+
+LIBC_INLINE uint32_t get_block_id_x() { return __gpu_block_id(0); }
+
+LIBC_INLINE uint32_t get_block_id_y() { return __gpu_block_id(1); }
+
+LIBC_INLINE uint32_t get_block_id_z() { return __gpu_block_id(2); }
+
+LIBC_INLINE uint64_t get_block_id() {
+ return get_block_id_x() + get_num_blocks_x() * get_block_id_y() +
+ get_num_blocks_x() * get_num_blocks_y() * get_block_id_z();
+}
+
+LIBC_INLINE uint32_t get_num_threads_x() { return __gpu_num_threads(0); }
+
+LIBC_INLINE uint32_t get_num_threads_y() { return __gpu_num_threads(1); }
+
+LIBC_INLINE uint32_t get_num_threads_z() { return __gpu_num_threads(2); }
+
+LIBC_INLINE uint64_t get_num_threads() {
+ return get_num_threads_x() * get_num_threads_y() * get_num_threads_z();
+}
+
+LIBC_INLINE uint32_t get_thread_id_x() { return __gpu_thread_id(0); }
+
+LIBC_INLINE uint32_t get_thread_id_y() { return __gpu_thread_id(1); }
+
+LIBC_INLINE uint32_t get_thread_id_z() { return __gpu_thread_id(2); }
+
+LIBC_INLINE uint64_t get_thread_id() {
+ return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() +
+ get_num_threads_x() * get_num_threads_y() * get_thread_id_z();
+}
+
+LIBC_INLINE uint32_t get_lane_size() { return __gpu_num_lanes(); }
+
+LIBC_INLINE uint32_t get_lane_id() { return __gpu_lane_id(); }
+
+LIBC_INLINE uint64_t get_lane_mask() { return __gpu_lane_mask(); }
+
+LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) {
+ return __gpu_read_first_lane_u32(lane_mask, x);
+}
+
+LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) {
+ return __gpu_ballot(lane_mask, x);
+}
+
+LIBC_INLINE void sync_threads() { __gpu_sync_threads(); }
+
+LIBC_INLINE void sync_lane(uint64_t lane_mask) { __gpu_sync_lane(lane_mask); }
+
+LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x) {
+ return __gpu_shuffle_idx_u32(lane_mask, idx, x);
}
-/// Conditional that is only true for a single thread in a lane.
+[[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); }
+
LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
- return gpu::get_lane_id() == get_first_lane_id(lane_mask);
+ return __gpu_is_first_in_lane(lane_mask);
}
-/// Gets the sum of all lanes inside the warp or wavefront.
LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) {
- for (uint32_t step = gpu::get_lane_size() / 2; step > 0; step /= 2) {
- uint32_t index = step + gpu::get_lane_id();
- x += gpu::shuffle(lane_mask, index, x);
- }
- return gpu::broadcast_value(lane_mask, x);
+ return __gpu_lane_sum_u32(lane_mask, x);
}
-/// Gets the accumulator scan of the threads in the warp or wavefront.
LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) {
- for (uint32_t step = 1; step < gpu::get_lane_size(); step *= 2) {
- uint32_t index = gpu::get_lane_id() - step;
- uint32_t bitmask = gpu::get_lane_id() >= step;
- x += -bitmask & gpu::shuffle(lane_mask, index, x);
- }
- return x;
+ return __gpu_lane_scan_u32(lane_mask, x);
+}
+
+LIBC_INLINE uint64_t fixed_frequency_clock() {
+ return __builtin_readsteadycounter();
}
+LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }
+
} // namespace gpu
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/fixedvector.h b/libc/src/__support/fixedvector.h
index 7ac0c230f9c5..34601f86dc01 100644
--- a/libc/src/__support/fixedvector.h
+++ b/libc/src/__support/fixedvector.h
@@ -10,9 +10,10 @@
#define LLVM_LIBC_SRC___SUPPORT_FIXEDVECTOR_H
#include "src/__support/CPP/array.h"
-
#include "src/__support/CPP/iterator.h"
+#include "src/__support/libc_assert.h"
#include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memset.h"
namespace LIBC_NAMESPACE_DECL {
@@ -23,27 +24,32 @@ template <typename T, size_t CAPACITY> class FixedVector {
size_t item_count = 0;
public:
- constexpr FixedVector() = default;
+ LIBC_INLINE constexpr FixedVector() = default;
using iterator = typename cpp::array<T, CAPACITY>::iterator;
- constexpr FixedVector(iterator begin, iterator end) : store{}, item_count{} {
+ LIBC_INLINE constexpr FixedVector(iterator begin, iterator end)
+ : store{}, item_count{} {
+ LIBC_ASSERT(begin + CAPACITY >= end);
for (; begin != end; ++begin)
push_back(*begin);
}
using const_iterator = typename cpp::array<T, CAPACITY>::const_iterator;
- constexpr FixedVector(const_iterator begin, const_iterator end)
+ LIBC_INLINE constexpr FixedVector(const_iterator begin, const_iterator end)
: store{}, item_count{} {
+ LIBC_ASSERT(begin + CAPACITY >= end);
for (; begin != end; ++begin)
push_back(*begin);
}
- constexpr FixedVector(size_t count, const T &value) : store{}, item_count{} {
+ LIBC_INLINE constexpr FixedVector(size_t count, const T &value)
+ : store{}, item_count{} {
+ LIBC_ASSERT(count <= CAPACITY);
for (size_t i = 0; i < count; ++i)
push_back(value);
}
- constexpr bool push_back(const T &obj) {
+ LIBC_INLINE constexpr bool push_back(const T &obj) {
if (item_count == CAPACITY)
return false;
store[item_count] = obj;
@@ -51,27 +57,43 @@ public:
return true;
}
- constexpr const T &back() const { return store[item_count - 1]; }
+ LIBC_INLINE constexpr const T &back() const {
+ LIBC_ASSERT(!empty());
+ return store[item_count - 1];
+ }
- constexpr T &back() { return store[item_count - 1]; }
+ LIBC_INLINE constexpr T &back() {
+ LIBC_ASSERT(!empty());
+ return store[item_count - 1];
+ }
- constexpr bool pop_back() {
+ LIBC_INLINE constexpr bool pop_back() {
if (item_count == 0)
return false;
+ inline_memset(&store[item_count - 1], 0, sizeof(T));
--item_count;
return true;
}
- constexpr T &operator[](size_t idx) { return store[idx]; }
+ LIBC_INLINE constexpr T &operator[](size_t idx) {
+ LIBC_ASSERT(idx < item_count);
+ return store[idx];
+ }
- constexpr const T &operator[](size_t idx) const { return store[idx]; }
+ LIBC_INLINE constexpr const T &operator[](size_t idx) const {
+ LIBC_ASSERT(idx < item_count);
+ return store[idx];
+ }
- constexpr bool empty() const { return item_count == 0; }
+ LIBC_INLINE constexpr bool empty() const { return item_count == 0; }
- constexpr size_t size() const { return item_count; }
+ LIBC_INLINE constexpr size_t size() const { return item_count; }
// Empties the store for all practical purposes.
- constexpr void reset() { item_count = 0; }
+ LIBC_INLINE constexpr void reset() {
+ inline_memset(store.data(), 0, sizeof(T) * item_count);
+ item_count = 0;
+ }
// This static method does not free up the resources held by |store|,
// say by calling `free` or something similar. It just does the equivalent
@@ -81,7 +103,9 @@ public:
// dynamically allocated storate. So, the `destroy` method like this
// matches the `destroy` API of those other data structures so that users
// can easily swap one data structure for the other.
- static void destroy(FixedVector<T, CAPACITY> *store) { store->reset(); }
+ LIBC_INLINE static void destroy(FixedVector<T, CAPACITY> *store) {
+ store->reset();
+ }
using reverse_iterator = typename cpp::array<T, CAPACITY>::reverse_iterator;
LIBC_INLINE constexpr reverse_iterator rbegin() {
diff --git a/libc/src/__support/threads/thread.cpp b/libc/src/__support/threads/thread.cpp
index dad4f75f092e..6f6b75be5766 100644
--- a/libc/src/__support/threads/thread.cpp
+++ b/libc/src/__support/threads/thread.cpp
@@ -117,7 +117,9 @@ public:
int add_callback(AtExitCallback *callback, void *obj) {
cpp::lock_guard lock(mtx);
- return callback_list.push_back({callback, obj});
+ if (callback_list.push_back({callback, obj}))
+ return 0;
+ return -1;
}
void call() {