diff options
Diffstat (limited to 'offload/DeviceRTL/include')
| -rw-r--r-- | offload/DeviceRTL/include/Allocator.h | 45 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/Configuration.h | 68 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/Debug.h | 44 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/DeviceTypes.h | 166 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/DeviceUtils.h | 96 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/Interface.h | 366 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/LibC.h | 23 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/Mapping.h | 108 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/Profiling.h | 21 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/State.h | 377 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/Synchronization.h | 225 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/Workshare.h | 26 | ||||
| -rw-r--r-- | offload/DeviceRTL/include/generated_microtask_cases.gen | 797 |
13 files changed, 0 insertions, 2362 deletions
diff --git a/offload/DeviceRTL/include/Allocator.h b/offload/DeviceRTL/include/Allocator.h deleted file mode 100644 index dc4d029ed75f..000000000000 --- a/offload/DeviceRTL/include/Allocator.h +++ /dev/null @@ -1,45 +0,0 @@ -//===-------- Allocator.h - OpenMP memory allocator interface ---- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_ALLOCATOR_H -#define OMPTARGET_ALLOCATOR_H - -#include "DeviceTypes.h" - -// Forward declaration. -struct KernelEnvironmentTy; - -namespace ompx { - -namespace allocator { - -static uint64_t constexpr ALIGNMENT = 16; - -/// Initialize the allocator according to \p KernelEnvironment -void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment); - -/// Allocate \p Size bytes. -[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT), gnu::malloc]] void * -alloc(uint64_t Size); - -/// Free the allocation pointed to by \p Ptr. -void free(void *Ptr); - -} // namespace allocator - -} // namespace ompx - -extern "C" { -void *malloc(size_t Size); -void free(void *Ptr); -} - -#endif diff --git a/offload/DeviceRTL/include/Configuration.h b/offload/DeviceRTL/include/Configuration.h deleted file mode 100644 index 95408933dd86..000000000000 --- a/offload/DeviceRTL/include/Configuration.h +++ /dev/null @@ -1,68 +0,0 @@ -//===--- Configuration.h - OpenMP device configuration interface -- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// API to query the global (constant) device environment. -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_CONFIGURATION_H -#define OMPTARGET_CONFIGURATION_H - -#include "Shared/Environment.h" - -#include "DeviceTypes.h" - -namespace ompx { -namespace config { - -/// Return the number of devices in the system, same number as returned on the -/// host by omp_get_num_devices. -uint32_t getNumDevices(); - -/// Return the device number in the system for omp_get_device_num. -uint32_t getDeviceNum(); - -/// Return the user chosen debug level. -uint32_t getDebugKind(); - -/// Return if teams oversubscription is assumed -uint32_t getAssumeTeamsOversubscription(); - -/// Return if threads oversubscription is assumed -uint32_t getAssumeThreadsOversubscription(); - -/// Return the amount of dynamic shared memory that was allocated at launch. -uint64_t getDynamicMemorySize(); - -/// Returns the cycles per second of the device's fixed frequency clock. -uint64_t getClockFrequency(); - -/// Returns the pointer to the beginning of the indirect call table. -void *getIndirectCallTablePtr(); - -/// Returns the size of the indirect call table. -uint64_t getIndirectCallTableSize(); - -/// Returns the size of the indirect call table. -uint64_t getHardwareParallelism(); - -/// Return if debugging is enabled for the given debug kind. -bool isDebugMode(DeviceDebugKind Level); - -/// Indicates if this kernel may require thread-specific states, or if it was -/// explicitly disabled by the user. -bool mayUseThreadStates(); - -/// Indicates if this kernel may require data environments for nested -/// parallelism, or if it was explicitly disabled by the user. -bool mayUseNestedParallelism(); - -} // namespace config -} // namespace ompx - -#endif diff --git a/offload/DeviceRTL/include/Debug.h b/offload/DeviceRTL/include/Debug.h deleted file mode 100644 index 98d0fa498d95..000000000000 --- a/offload/DeviceRTL/include/Debug.h +++ /dev/null @@ -1,44 +0,0 @@ -//===-------- Debug.h ---- Debug utilities ------------------------ C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_DEVICERTL_DEBUG_H -#define OMPTARGET_DEVICERTL_DEBUG_H - -#include "Configuration.h" -#include "LibC.h" - -/// Assertion -/// -/// { -extern "C" { -void __assert_assume(bool condition); -void __assert_fail(const char *expr, const char *file, unsigned line, - const char *function); -void __assert_fail_internal(const char *expr, const char *msg, const char *file, - unsigned line, const char *function); -} - -#define ASSERT(expr, msg) \ - { \ - if (config::isDebugMode(DeviceDebugKind::Assertion) && !(expr)) \ - __assert_fail_internal(#expr, msg, __FILE__, __LINE__, \ - __PRETTY_FUNCTION__); \ - else \ - __assert_assume(expr); \ - } -#define UNREACHABLE(msg) \ - printf(msg); \ - __builtin_trap(); \ - __builtin_unreachable(); - -///} - -#endif diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h deleted file mode 100644 index 2e5d92380f04..000000000000 --- a/offload/DeviceRTL/include/DeviceTypes.h +++ /dev/null @@ -1,166 +0,0 @@ -//===---------- DeviceTypes.h - OpenMP types ---------------------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_TYPES_H -#define OMPTARGET_TYPES_H - -#include <gpuintrin.h> -#include <stddef.h> -#include <stdint.h> - -template <typename T> using Private = __gpu_private T; -template <typename T> using Constant = __gpu_constant T; -template <typename T> using Local = __gpu_local T; -template <typename T> using Global = __gpu_local T; - -enum omp_proc_bind_t { - omp_proc_bind_false = 0, - omp_proc_bind_true = 1, - omp_proc_bind_master = 2, - omp_proc_bind_close = 3, - omp_proc_bind_spread = 4 -}; - -enum omp_sched_t { - omp_sched_static = 1, /* chunkSize >0 */ - omp_sched_dynamic = 2, /* chunkSize >0 */ - omp_sched_guided = 3, /* chunkSize >0 */ - omp_sched_auto = 4, /* no chunkSize */ -}; - -enum kmp_sched_t { - kmp_sched_static_chunk = 33, - kmp_sched_static_nochunk = 34, - kmp_sched_dynamic = 35, - kmp_sched_guided = 36, - kmp_sched_runtime = 37, - kmp_sched_auto = 38, - - kmp_sched_static_balanced_chunk = 45, - - kmp_sched_static_ordered = 65, - kmp_sched_static_nochunk_ordered = 66, - kmp_sched_dynamic_ordered = 67, - kmp_sched_guided_ordered = 68, - kmp_sched_runtime_ordered = 69, - kmp_sched_auto_ordered = 70, - - kmp_sched_distr_static_chunk = 91, - kmp_sched_distr_static_nochunk = 92, - kmp_sched_distr_static_chunk_sched_static_chunkone = 93, - - kmp_sched_default = kmp_sched_static_nochunk, - kmp_sched_unordered_first = kmp_sched_static_chunk, - kmp_sched_unordered_last = kmp_sched_auto, - kmp_sched_ordered_first = kmp_sched_static_ordered, - kmp_sched_ordered_last = kmp_sched_auto_ordered, - kmp_sched_distribute_first = kmp_sched_distr_static_chunk, - kmp_sched_distribute_last = - kmp_sched_distr_static_chunk_sched_static_chunkone, - - /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. - * Since we need to distinguish the three possible cases (no modifier, - * monotonic modifier, nonmonotonic modifier), we need separate bits for - * each modifier. The absence of monotonic does not imply nonmonotonic, - * especially since 4.5 says that the behaviour of the "no modifier" case - * is implementation defined in 4.5, but will become "nonmonotonic" in 5.0. - * - * Since we're passing a full 32 bit value, we can use a couple of high - * bits for these flags; out of paranoia we avoid the sign bit. - * - * These modifiers can be or-ed into non-static schedules by the compiler - * to pass the additional information. They will be stripped early in the - * processing in __kmp_dispatch_init when setting up schedules, so - * most of the code won't ever see schedules with these bits set. - */ - kmp_sched_modifier_monotonic = (1 << 29), - /**< Set if the monotonic schedule modifier was present */ - kmp_sched_modifier_nonmonotonic = (1 << 30), -/**< Set if the nonmonotonic schedule modifier was present */ - -#define SCHEDULE_WITHOUT_MODIFIERS(s) \ - (enum kmp_sched_t)( \ - (s) & ~(kmp_sched_modifier_nonmonotonic | kmp_sched_modifier_monotonic)) -#define SCHEDULE_HAS_MONOTONIC(s) (((s) & kmp_sched_modifier_monotonic) != 0) -#define SCHEDULE_HAS_NONMONOTONIC(s) \ - (((s) & kmp_sched_modifier_nonmonotonic) != 0) -#define SCHEDULE_HAS_NO_MODIFIERS(s) \ - (((s) & (kmp_sched_modifier_nonmonotonic | kmp_sched_modifier_monotonic)) == \ - 0) - -}; - -struct TaskDescriptorTy; -using TaskFnTy = int32_t (*)(int32_t global_tid, TaskDescriptorTy *taskDescr); -struct TaskDescriptorTy { - void *Payload; - TaskFnTy TaskFn; -}; - -using LaneMaskTy = uint64_t; - -namespace lanes { -enum : LaneMaskTy { All = ~(LaneMaskTy)0 }; -} // namespace lanes - -/// The ident structure that describes a source location. The struct is -/// identical to the one in the kmp.h file. We maintain the same data structure -/// for compatibility. -struct IdentTy { - int32_t reserved_1; /**< might be used in Fortran; see above */ - int32_t flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC - identifies this union member */ - int32_t reserved_2; /**< not really used in Fortran any more; see above */ - int32_t reserved_3; /**< source[4] in Fortran, do not use for C++ */ - char const *psource; /**< String describing the source location. - The string is composed of semi-colon separated fields - which describe the source file, the function and a pair - of line numbers that delimit the construct. */ -}; - -using __kmpc_impl_lanemask_t = LaneMaskTy; - -using ParallelRegionFnTy = void *; - -using CriticalNameTy = int32_t[8]; - -struct omp_lock_t { - void *Lock; -}; - -using InterWarpCopyFnTy = void (*)(void *src, int32_t warp_num); -using ShuffleReductFnTy = void (*)(void *rhsData, int16_t lane_id, - int16_t lane_offset, int16_t shortCircuit); -using ListGlobalFnTy = void (*)(void *buffer, int idx, void *reduce_data); - -/// Macros for allocating variables in different address spaces. -///{ - -// Follows the pattern in interface.h -typedef enum omp_allocator_handle_t { - omp_null_allocator = 0, - omp_default_mem_alloc = 1, - omp_large_cap_mem_alloc = 2, - omp_const_mem_alloc = 3, - omp_high_bw_mem_alloc = 4, - omp_low_lat_mem_alloc = 5, - omp_cgroup_mem_alloc = 6, - omp_pteam_mem_alloc = 7, - omp_thread_mem_alloc = 8, - KMP_ALLOCATOR_MAX_HANDLE = ~(0LU) -} omp_allocator_handle_t; - -#define __PRAGMA(STR) _Pragma(#STR) -#define OMP_PRAGMA(STR) __PRAGMA(omp STR) - -///} - -#endif diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h deleted file mode 100644 index b92514ee9838..000000000000 --- a/offload/DeviceRTL/include/DeviceUtils.h +++ /dev/null @@ -1,96 +0,0 @@ -//===--- DeviceUtils.h - OpenMP device runtime utility functions -- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_DEVICERTL_DEVICE_UTILS_H -#define OMPTARGET_DEVICERTL_DEVICE_UTILS_H - -#include "DeviceTypes.h" -#include "Shared/Utils.h" - -namespace utils { - -template <typename T> struct type_identity { - using type = T; -}; - -template <typename T, T v> struct integral_constant { - inline static constexpr T value = v; -}; - -/// Freestanding SFINAE helpers. -template <class T> struct remove_cv : type_identity<T> {}; -template <class T> struct remove_cv<const T> : type_identity<T> {}; -template <class T> struct remove_cv<volatile T> : type_identity<T> {}; -template <class T> struct remove_cv<const volatile T> : type_identity<T> {}; -template <class T> using remove_cv_t = typename remove_cv<T>::type; - -using true_type = integral_constant<bool, true>; -using false_type = integral_constant<bool, false>; - -template <typename T, typename U> struct is_same : false_type {}; -template <typename T> struct is_same<T, T> : true_type {}; -template <typename T, typename U> -inline constexpr bool is_same_v = is_same<T, U>::value; - -template <typename T> struct is_floating_point { - inline static constexpr bool value = - is_same_v<remove_cv_t<T>, float> || is_same_v<remove_cv_t<T>, double>; -}; -template <typename T> -inline constexpr bool is_floating_point_v = is_floating_point<T>::value; - -template <bool B, typename T = void> struct enable_if; -template <typename T> struct enable_if<true, T> : type_identity<T> {}; -template <bool B, typename T = void> -using enable_if_t = typename enable_if<B, T>::type; - -template <class T> struct remove_addrspace : type_identity<T> {}; -template <class T, int N> -struct remove_addrspace<T [[clang::address_space(N)]]> : type_identity<T> {}; -template <class T> -using remove_addrspace_t = typename remove_addrspace<T>::type; - -template <typename To, typename From> inline To bitCast(From V) { - static_assert(sizeof(To) == sizeof(From), "Bad conversion"); - return __builtin_bit_cast(To, V); -} - -/// Return the value \p Var from thread Id \p SrcLane in the warp if the thread -/// is identified by \p Mask. -int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width); - -int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width); - -int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width); - -uint64_t ballotSync(uint64_t Mask, int32_t Pred); - -/// Return \p LowBits and \p HighBits packed into a single 64 bit value. -uint64_t pack(uint32_t LowBits, uint32_t HighBits); - -/// Unpack \p Val into \p LowBits and \p HighBits. -void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits); - -/// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)). -bool isSharedMemPtr(void *Ptr); - -/// Return true iff \p Ptr is pointing into (thread) local memory (AS(5)). -bool isThreadLocalMemPtr(void *Ptr); - -/// A pointer variable that has by design an `undef` value. Use with care. -[[clang::loader_uninitialized]] static void *const UndefPtr; - -#define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true) -#define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false) - -} // namespace utils - -#endif diff --git a/offload/DeviceRTL/include/Interface.h b/offload/DeviceRTL/include/Interface.h deleted file mode 100644 index c4bfaaa2404b..000000000000 --- a/offload/DeviceRTL/include/Interface.h +++ /dev/null @@ -1,366 +0,0 @@ -//===-------- Interface.h - OpenMP interface ---------------------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_DEVICERTL_INTERFACE_H -#define OMPTARGET_DEVICERTL_INTERFACE_H - -#include "Shared/Environment.h" - -#include "DeviceTypes.h" - -/// External API -/// -///{ - -extern "C" { - -/// ICV: dyn-var, constant 0 -/// -/// setter: ignored. -/// getter: returns 0. -/// -///{ -void omp_set_dynamic(int); -int omp_get_dynamic(void); -///} - -/// ICV: nthreads-var, integer -/// -/// scope: data environment -/// -/// setter: ignored. -/// getter: returns false. -/// -/// implementation notes: -/// -/// -///{ -void omp_set_num_threads(int); -int omp_get_max_threads(void); -///} - -/// ICV: thread-limit-var, computed -/// -/// getter: returns thread limited defined during launch. -/// -///{ -int omp_get_thread_limit(void); -///} - -/// ICV: max-active-level-var, constant 1 -/// -/// setter: ignored. -/// getter: returns 1. -/// -///{ -void omp_set_max_active_levels(int); -int omp_get_max_active_levels(void); -///} - -/// ICV: places-partition-var -/// -/// -///{ -///} - -/// ICV: active-level-var, 0 or 1 -/// -/// getter: returns 0 or 1. -/// -///{ -int omp_get_active_level(void); -///} - -/// ICV: level-var -/// -/// getter: returns parallel region nesting -/// -///{ -int omp_get_level(void); -///} - -/// ICV: run-sched-var -/// -/// -///{ -void omp_set_schedule(omp_sched_t, int); -void omp_get_schedule(omp_sched_t *, int *); -///} - -/// TODO this is incomplete. -int omp_get_num_threads(void); -int omp_get_thread_num(void); -void omp_set_nested(int); - -int omp_get_nested(void); - -void omp_set_max_active_levels(int Level); - -int omp_get_max_active_levels(void); - -omp_proc_bind_t omp_get_proc_bind(void); - -int omp_get_num_places(void); - -int omp_get_place_num_procs(int place_num); - -void omp_get_place_proc_ids(int place_num, int *ids); - -int omp_get_place_num(void); - -int omp_get_partition_num_places(void); - -void omp_get_partition_place_nums(int *place_nums); - -int omp_get_cancellation(void); - -void omp_set_default_device(int deviceId); - -int omp_get_default_device(void); - -int omp_get_num_devices(void); - -int omp_get_device_num(void); - -int omp_get_num_teams(void); - -int omp_get_team_num(); - -int omp_get_initial_device(void); - -void *llvm_omp_target_dynamic_shared_alloc(); - -/// Synchronization -/// -///{ -void omp_init_lock(omp_lock_t *Lock); - -void omp_destroy_lock(omp_lock_t *Lock); - -void omp_set_lock(omp_lock_t *Lock); - -void omp_unset_lock(omp_lock_t *Lock); - -int omp_test_lock(omp_lock_t *Lock); -///} - -/// Tasking -/// -///{ -int omp_in_final(void); - -int omp_get_max_task_priority(void); -///} - -/// Misc -/// -///{ -double omp_get_wtick(void); - -double omp_get_wtime(void); -///} -} - -extern "C" { -/// Allocate \p Bytes in "shareable" memory and return the address. Needs to be -/// called balanced with __kmpc_free_shared like a stack (push/pop). Can be -/// called by any thread, allocation happens *per thread*. -void *__kmpc_alloc_shared(uint64_t Bytes); - -/// Deallocate \p Ptr. Needs to be called balanced with __kmpc_alloc_shared like -/// a stack (push/pop). Can be called by any thread. \p Ptr has to be the -/// allocated by __kmpc_alloc_shared by the same thread. -void __kmpc_free_shared(void *Ptr, uint64_t Bytes); - -/// Get a pointer to the memory buffer containing dynamically allocated shared -/// memory configured at launch. -void *__kmpc_get_dynamic_shared(); - -/// Allocate sufficient space for \p NumArgs sequential `void*` and store the -/// allocation address in \p GlobalArgs. -/// -/// Called by the main thread prior to a parallel region. -/// -/// We also remember it in GlobalArgsPtr to ensure the worker threads and -/// deallocation function know the allocation address too. -void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t NumArgs); - -/// Deallocate the memory allocated by __kmpc_begin_sharing_variables. -/// -/// Called by the main thread after a parallel region. -void __kmpc_end_sharing_variables(); - -/// Store the allocation address obtained via __kmpc_begin_sharing_variables in -/// \p GlobalArgs. -/// -/// Called by the worker threads in the parallel region (function). -void __kmpc_get_shared_variables(void ***GlobalArgs); - -/// External interface to get the thread ID. -uint32_t __kmpc_get_hardware_thread_id_in_block(); - -/// External interface to get the number of threads. -uint32_t __kmpc_get_hardware_num_threads_in_block(); - -/// External interface to get the warp size. -uint32_t __kmpc_get_warp_size(); - -/// Kernel -/// -///{ -// Forward declaration -struct KernelEnvironmentTy; - -int8_t __kmpc_is_spmd_exec_mode(); - -int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment, - KernelLaunchEnvironmentTy &KernelLaunchEnvironment); - -void __kmpc_target_deinit(); - -///} - -/// Reduction -/// -///{ -void *__kmpc_reduction_get_fixed_buffer(); - -int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(IdentTy *Loc, - uint64_t reduce_data_size, - void *reduce_data, - ShuffleReductFnTy shflFct, - InterWarpCopyFnTy cpyFct); - -int32_t __kmpc_nvptx_teams_reduce_nowait_v2( - IdentTy *Loc, void *GlobalBuffer, uint32_t num_of_records, - uint64_t reduce_data_size, void *reduce_data, ShuffleReductFnTy shflFct, - InterWarpCopyFnTy cpyFct, ListGlobalFnTy lgcpyFct, ListGlobalFnTy lgredFct, - ListGlobalFnTy glcpyFct, ListGlobalFnTy glredFct); -///} - -/// Synchronization -/// -///{ -void __kmpc_ordered(IdentTy *Loc, int32_t TId); - -void __kmpc_end_ordered(IdentTy *Loc, int32_t TId); - -int32_t __kmpc_cancel_barrier(IdentTy *Loc_ref, int32_t TId); - -void __kmpc_barrier(IdentTy *Loc_ref, int32_t TId); - -void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId); - -void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId); - -int32_t __kmpc_master(IdentTy *Loc, int32_t TId); - -void __kmpc_end_master(IdentTy *Loc, int32_t TId); - -int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter); - -void __kmpc_end_masked(IdentTy *Loc, int32_t TId); - -int32_t __kmpc_single(IdentTy *Loc, int32_t TId); - -void __kmpc_end_single(IdentTy *Loc, int32_t TId); - -void __kmpc_flush(IdentTy *Loc); - -uint64_t __kmpc_warp_active_thread_mask(void); - -void __kmpc_syncwarp(uint64_t Mask); - -void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name); - -void __kmpc_end_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name); -///} - -/// Parallelism -/// -///{ -/// TODO -void __kmpc_kernel_prepare_parallel(ParallelRegionFnTy WorkFn); - -/// TODO -bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn); - -/// TODO -void __kmpc_kernel_end_parallel(); - -/// TODO -void __kmpc_push_proc_bind(IdentTy *Loc, uint32_t TId, int ProcBind); - -/// TODO -void __kmpc_push_num_teams(IdentTy *Loc, int32_t TId, int32_t NumTeams, - int32_t ThreadLimit); - -/// TODO -uint16_t __kmpc_parallel_level(IdentTy *Loc, uint32_t); - -///} - -/// Tasking -/// -///{ -TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t, - size_t TaskSizeInclPrivateValues, - size_t SharedValuesSize, - TaskFnTy TaskFn); - -int32_t __kmpc_omp_task(IdentTy *Loc, uint32_t TId, - TaskDescriptorTy *TaskDescriptor); - -int32_t __kmpc_omp_task_with_deps(IdentTy *Loc, uint32_t TId, - TaskDescriptorTy *TaskDescriptor, int32_t, - void *, int32_t, void *); - -void __kmpc_omp_task_begin_if0(IdentTy *Loc, uint32_t TId, - TaskDescriptorTy *TaskDescriptor); - -void __kmpc_omp_task_complete_if0(IdentTy *Loc, uint32_t TId, - TaskDescriptorTy *TaskDescriptor); - -void __kmpc_omp_wait_deps(IdentTy *Loc, uint32_t TId, int32_t, void *, int32_t, - void *); - -void __kmpc_taskgroup(IdentTy *Loc, uint32_t TId); - -void __kmpc_end_taskgroup(IdentTy *Loc, uint32_t TId); - -int32_t __kmpc_omp_taskyield(IdentTy *Loc, uint32_t TId, int); - -int32_t __kmpc_omp_taskwait(IdentTy *Loc, uint32_t TId); - -void __kmpc_taskloop(IdentTy *Loc, uint32_t TId, - TaskDescriptorTy *TaskDescriptor, int, - uint64_t *LowerBound, uint64_t *UpperBound, int64_t, int, - int32_t, uint64_t, void *); -///} - -/// Misc -/// -///{ -int32_t __kmpc_cancellationpoint(IdentTy *Loc, int32_t TId, int32_t CancelVal); - -int32_t __kmpc_cancel(IdentTy *Loc, int32_t TId, int32_t CancelVal); -///} - -/// Shuffle -/// -///{ -int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size); -int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size); - -///} -} - -#endif diff --git a/offload/DeviceRTL/include/LibC.h b/offload/DeviceRTL/include/LibC.h deleted file mode 100644 index 94b5e6519606..000000000000 --- a/offload/DeviceRTL/include/LibC.h +++ /dev/null @@ -1,23 +0,0 @@ -//===--------- LibC.h - Simple implementation of libc functions --- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_LIBC_H -#define OMPTARGET_LIBC_H - -#include "DeviceTypes.h" - -namespace ompx { - -int printf(const char *Format, ...); - -} // namespace ompx - -#endif diff --git a/offload/DeviceRTL/include/Mapping.h b/offload/DeviceRTL/include/Mapping.h deleted file mode 100644 index 8ba018b5314a..000000000000 --- a/offload/DeviceRTL/include/Mapping.h +++ /dev/null @@ -1,108 +0,0 @@ -//===--------- Mapping.h - OpenMP device runtime mapping helpers -- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_MAPPING_H -#define OMPTARGET_MAPPING_H - -#include "DeviceTypes.h" - -namespace ompx { - -namespace mapping { - -enum { - DIM_X = __GPU_X_DIM, - DIM_Y = __GPU_Y_DIM, - DIM_Z = __GPU_Z_DIM, -}; - -inline constexpr uint32_t MaxThreadsPerTeam = 1024; - -/// Initialize the mapping machinery. -void init(bool IsSPMD); - -/// Return true if the kernel is executed in SPMD mode. -bool isSPMDMode(); - -/// Return true if the kernel is executed in generic mode. -bool isGenericMode(); - -/// Return true if the executing thread is the main thread in generic mode. -/// These functions will lookup state and it is required that that is OK for the -/// thread and location. See also `isInitialThreadInLevel0` for a stateless -/// alternative for certain situations, e.g. during initialization. -bool isMainThreadInGenericMode(); -bool isMainThreadInGenericMode(bool IsSPMD); - -/// Return true if this thread is the initial thread in parallel level 0. -/// -/// The thread for which this returns true should be used for single threaded -/// initialization tasks. We pick a special thread to ensure there are no -/// races between the initialization and the first read of initialized state. -bool isInitialThreadInLevel0(bool IsSPMD); - -/// Return true if the executing thread has the lowest Id of the active threads -/// in the warp. -bool isLeaderInWarp(); - -/// Return a mask describing all active threads in the warp. -LaneMaskTy activemask(); - -/// Return a mask describing all threads with a smaller Id in the warp. -LaneMaskTy lanemaskLT(); - -/// Return a mask describing all threads with a larger Id in the warp. -LaneMaskTy lanemaskGT(); - -/// Return the thread Id in the warp, in [0, getWarpSize()). -uint32_t getThreadIdInWarp(); - -/// Return the warp size, thus number of threads in the warp. -uint32_t getWarpSize(); - -/// Return the warp id in the block, in [0, getNumberOfWarpsInBlock()] -uint32_t getWarpIdInBlock(); - -/// Return the number of warps in the block. -uint32_t getNumberOfWarpsInBlock(); - -/// Return the thread Id in the block, in [0, getNumberOfThreadsInBlock(Dim)). -uint32_t getThreadIdInBlock(int32_t Dim = DIM_X); - -/// Return the block size, thus number of threads in the block. -uint32_t getNumberOfThreadsInBlock(int32_t Dim = DIM_X); - -/// Return the block Id in the kernel, in [0, getNumberOfBlocksInKernel(Dim)). -uint32_t getBlockIdInKernel(int32_t Dim = DIM_X); - -/// Return the number of blocks in the kernel. -uint32_t getNumberOfBlocksInKernel(int32_t Dim = DIM_X); - -/// Return the kernel size, thus number of threads in the kernel. -uint32_t getNumberOfThreadsInKernel(); - -/// Return the maximal number of threads in the block usable for a team (= -/// parallel region). -/// -/// Note: The version taking \p IsSPMD mode explicitly can be used during the -/// initialization of the target region, that is before `mapping::isSPMDMode()` -/// can be called by any thread other than the main one. -uint32_t getMaxTeamThreads(); -uint32_t getMaxTeamThreads(bool IsSPMD); - -/// Return the number of processing elements on the device. -uint32_t getNumberOfProcessorElements(); - -} // namespace mapping - -} // namespace ompx - -#endif diff --git a/offload/DeviceRTL/include/Profiling.h b/offload/DeviceRTL/include/Profiling.h deleted file mode 100644 index d99475225412..000000000000 --- a/offload/DeviceRTL/include/Profiling.h +++ /dev/null @@ -1,21 +0,0 @@ -//===-------- Profiling.h - OpenMP interface ---------------------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_DEVICERTL_PROFILING_H -#define OMPTARGET_DEVICERTL_PROFILING_H - -extern "C" { -void __llvm_profile_register_function(void *Ptr); -void __llvm_profile_register_names_function(void *Ptr, long int I); -void __llvm_profile_instrument_memop(long int I, void *Ptr, int I2); -} - -#endif diff --git a/offload/DeviceRTL/include/State.h b/offload/DeviceRTL/include/State.h deleted file mode 100644 index db396dae6e44..000000000000 --- a/offload/DeviceRTL/include/State.h +++ /dev/null @@ -1,377 +0,0 @@ -//===-------- State.h - OpenMP State & ICV interface ------------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_STATE_H -#define OMPTARGET_STATE_H - -#include "Shared/Environment.h" - -#include "Debug.h" -#include "DeviceTypes.h" -#include "DeviceUtils.h" -#include "Mapping.h" - -// Forward declaration. -struct KernelEnvironmentTy; - -namespace ompx { - -namespace memory { - -/// Alloca \p Size bytes in shared memory, if possible, for \p Reason. -/// -/// Note: See the restrictions on __kmpc_alloc_shared for proper usage. -void *allocShared(uint64_t Size, const char *Reason); - -/// Free \p Ptr, allocated via allocShared, for \p Reason. -/// -/// Note: See the restrictions on __kmpc_free_shared for proper usage. -void freeShared(void *Ptr, uint64_t Bytes, const char *Reason); - -/// Alloca \p Size bytes in global memory, if possible, for \p Reason. -void *allocGlobal(uint64_t Size, const char *Reason); - -/// Return a pointer to the dynamic shared memory buffer. -void *getDynamicBuffer(); - -/// Free \p Ptr, allocated via allocGlobal, for \p Reason. -void freeGlobal(void *Ptr, const char *Reason); - -} // namespace memory - -namespace state { - -inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE; - -struct ICVStateTy { - uint32_t NThreadsVar; - uint32_t LevelVar; - uint32_t ActiveLevelVar; - uint32_t Padding0Val; - uint32_t MaxActiveLevelsVar; - uint32_t RunSchedVar; - uint32_t RunSchedChunkVar; - - bool operator==(const ICVStateTy &Other) const; - - void assertEqual(const ICVStateTy &Other) const; -}; - -struct TeamStateTy { - void init(bool IsSPMD); - - bool operator==(const TeamStateTy &) const; - - void assertEqual(TeamStateTy &Other) const; - - /// ICVs - /// - /// Preallocated storage for ICV values that are used if the threads have not - /// set a custom default. The latter is supported but unlikely and slow(er). - /// - ///{ - ICVStateTy ICVState; - ///} - - uint32_t ParallelTeamSize; - uint32_t HasThreadState; - ParallelRegionFnTy ParallelRegionFnVar; -}; - -extern Local<TeamStateTy> TeamState; - -struct ThreadStateTy { - - /// ICVs have preallocated storage in the TeamStateTy which is used if a - /// thread has not set a custom value. The latter is supported but unlikely. - /// When it happens we will allocate dynamic memory to hold the values of all - /// ICVs. Thus, the first time an ICV is set by a thread we will allocate an - /// ICV struct to hold them all. This is slower than alternatives but allows - /// users to pay only for what they use. - /// - state::ICVStateTy ICVState; - - ThreadStateTy *PreviousThreadState; - - void init() { - ICVState = TeamState.ICVState; - PreviousThreadState = nullptr; - } - - void init(ThreadStateTy *PreviousTS) { - ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState; - PreviousThreadState = PreviousTS; - } -}; - -extern Local<ThreadStateTy **> ThreadStates; - -/// Initialize the state machinery. Must be called by all threads. -void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment, - KernelLaunchEnvironmentTy &KernelLaunchEnvironment); - -/// Return the kernel and kernel launch environment associated with the current -/// kernel. The former is static and contains compile time information that -/// holds for all instances of the kernel. The latter is dynamic and provides -/// per-launch information. -KernelEnvironmentTy &getKernelEnvironment(); -KernelLaunchEnvironmentTy &getKernelLaunchEnvironment(); - -/// TODO -enum ValueKind { - VK_NThreads, - VK_Level, - VK_ActiveLevel, - VK_MaxActiveLevels, - VK_RunSched, - // --- - VK_RunSchedChunk, - VK_ParallelRegionFn, - VK_ParallelTeamSize, - VK_HasThreadState, -}; - -/// TODO -void enterDataEnvironment(IdentTy *Ident); - -/// TODO -void exitDataEnvironment(); - -/// TODO -struct DateEnvironmentRAII { - DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); } - ~DateEnvironmentRAII() { exitDataEnvironment(); } -}; - -/// TODO -void resetStateForThread(uint32_t TId); - -// FIXME: https://github.com/llvm/llvm-project/issues/123241. -#define lookupForModify32Impl(Member, Ident, ForceTeamState) \ - { \ - if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() || \ - !TeamState.HasThreadState)) \ - return TeamState.ICVState.Member; \ - uint32_t TId = mapping::getThreadIdInBlock(); \ - if (OMP_UNLIKELY(!ThreadStates[TId])) { \ - ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>( \ - memory::allocGlobal(sizeof(ThreadStateTy), \ - "ICV modification outside data environment")); \ - ASSERT(ThreadStates[TId] != nullptr, "Nullptr returned by malloc!"); \ - TeamState.HasThreadState = true; \ - ThreadStates[TId]->init(); \ - } \ - return ThreadStates[TId]->ICVState.Member; \ - } - -// FIXME: https://github.com/llvm/llvm-project/issues/123241. -#define lookupImpl(Member, ForceTeamState) \ - { \ - auto TId = mapping::getThreadIdInBlock(); \ - if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() && \ - TeamState.HasThreadState && ThreadStates[TId])) \ - return ThreadStates[TId]->ICVState.Member; \ - return TeamState.ICVState.Member; \ - } - -[[gnu::always_inline, gnu::flatten]] inline uint32_t & -lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) { - switch (Kind) { - case state::VK_NThreads: - if (IsReadonly) - lookupImpl(NThreadsVar, ForceTeamState); - lookupForModify32Impl(NThreadsVar, Ident, ForceTeamState); - case state::VK_Level: - if (IsReadonly) - lookupImpl(LevelVar, ForceTeamState); - lookupForModify32Impl(LevelVar, Ident, ForceTeamState); - case state::VK_ActiveLevel: - if (IsReadonly) - lookupImpl(ActiveLevelVar, ForceTeamState); - lookupForModify32Impl(ActiveLevelVar, Ident, ForceTeamState); - case state::VK_MaxActiveLevels: - if (IsReadonly) - lookupImpl(MaxActiveLevelsVar, ForceTeamState); - lookupForModify32Impl(MaxActiveLevelsVar, Ident, ForceTeamState); - case state::VK_RunSched: - if (IsReadonly) - lookupImpl(RunSchedVar, ForceTeamState); - lookupForModify32Impl(RunSchedVar, Ident, ForceTeamState); - case state::VK_RunSchedChunk: - if (IsReadonly) - lookupImpl(RunSchedChunkVar, ForceTeamState); - lookupForModify32Impl(RunSchedChunkVar, Ident, ForceTeamState); - case state::VK_ParallelTeamSize: - return TeamState.ParallelTeamSize; - case state::VK_HasThreadState: - return TeamState.HasThreadState; - default: - break; - } - __builtin_unreachable(); -} - -[[gnu::always_inline, gnu::flatten]] inline void *& -lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) { - switch (Kind) { - case state::VK_ParallelRegionFn: - return TeamState.ParallelRegionFnVar; - default: - break; - } - __builtin_unreachable(); -} - -/// A class without actual state used to provide a nice interface to lookup and -/// update ICV values we can declare in global scope. -template <typename Ty, ValueKind Kind> struct Value { - [[gnu::flatten, gnu::always_inline]] operator Ty() { - return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr, - /*ForceTeamState=*/false); - } - - [[gnu::flatten, gnu::always_inline]] Value &operator=(const Ty &Other) { - set(Other, /*IdentTy=*/nullptr); - return *this; - } - - [[gnu::flatten, gnu::always_inline]] Value &operator++() { - inc(1, /*IdentTy=*/nullptr); - return *this; - } - - [[gnu::flatten, gnu::always_inline]] Value &operator--() { - inc(-1, /*IdentTy=*/nullptr); - return *this; - } - - [[gnu::flatten, gnu::always_inline]] void - assert_eq(const Ty &V, IdentTy *Ident = nullptr, - bool ForceTeamState = false) { - ASSERT(lookup(/*IsReadonly=*/true, Ident, ForceTeamState) == V, nullptr); - } - -private: - [[gnu::flatten, gnu::always_inline]] Ty & - lookup(bool IsReadonly, IdentTy *Ident, bool ForceTeamState) { - Ty &t = lookup32(Kind, IsReadonly, Ident, ForceTeamState); - return t; - } - - [[gnu::flatten, gnu::always_inline]] Ty &inc(int UpdateVal, IdentTy *Ident) { - return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) += - UpdateVal); - } - - [[gnu::flatten, gnu::always_inline]] Ty &set(Ty UpdateVal, IdentTy *Ident) { - return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) = - UpdateVal); - } - - template <typename VTy, typename Ty2> friend struct ValueRAII; -}; - -/// A mookup class without actual state used to provide -/// a nice interface to lookup and update ICV values -/// we can declare in global scope. -template <typename Ty, ValueKind Kind> struct PtrValue { - [[gnu::flatten, gnu::always_inline]] operator Ty() { - return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr, - /*ForceTeamState=*/false); - } - - [[gnu::flatten, gnu::always_inline]] PtrValue &operator=(const Ty Other) { - set(Other); - return *this; - } - -private: - Ty &lookup(bool IsReadonly, IdentTy *, bool ForceTeamState) { - return lookupPtr(Kind, IsReadonly, ForceTeamState); - } - - Ty &set(Ty UpdateVal) { - return (lookup(/*IsReadonly=*/false, /*IdentTy=*/nullptr, - /*ForceTeamState=*/false) = UpdateVal); - } - - template <typename VTy, typename Ty2> friend struct ValueRAII; -}; - -template <typename VTy, typename Ty> struct ValueRAII { - ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident, - bool ForceTeamState = false) - : Ptr(Active ? &V.lookup(/*IsReadonly=*/false, Ident, ForceTeamState) - : (Ty *)utils::UndefPtr), - Val(OldValue), Active(Active) { - if (!Active) - return; - ASSERT(*Ptr == OldValue, "ValueRAII initialization with wrong old value!"); - *Ptr = NewValue; - } - ~ValueRAII() { - if (Active) - *Ptr = Val; - } - -private: - Ty *Ptr; - Ty Val; - bool Active; -}; - -/// TODO -inline state::Value<uint32_t, state::VK_RunSchedChunk> RunSchedChunk; - -/// TODO -inline state::Value<uint32_t, state::VK_ParallelTeamSize> ParallelTeamSize; - -/// TODO -inline state::Value<uint32_t, state::VK_HasThreadState> HasThreadState; - -/// TODO -inline state::PtrValue<ParallelRegionFnTy, state::VK_ParallelRegionFn> - ParallelRegionFn; - -void runAndCheckState(void(Func(void))); - -void assumeInitialState(bool IsSPMD); - -/// Return the value of the ParallelTeamSize ICV. -int getEffectivePTeamSize(); - -} // namespace state - -namespace icv { - -/// TODO -inline state::Value<uint32_t, state::VK_NThreads> NThreads; - -/// TODO -inline state::Value<uint32_t, state::VK_Level> Level; - -/// The `active-level` describes which of the parallel level counted with the -/// `level-var` is active. There can only be one. -/// -/// active-level-var is 1, if ActiveLevelVar is not 0, otherwise it is 0. -inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel; - -/// TODO -inline state::Value<uint32_t, state::VK_MaxActiveLevels> MaxActiveLevels; - -/// TODO -inline state::Value<uint32_t, state::VK_RunSched> RunSched; - -} // namespace icv - -} // namespace ompx - -#endif diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h deleted file mode 100644 index 7e7c8eacb917..000000000000 --- a/offload/DeviceRTL/include/Synchronization.h +++ /dev/null @@ -1,225 +0,0 @@ -//===- Synchronization.h - OpenMP synchronization utilities ------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_DEVICERTL_SYNCHRONIZATION_H -#define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H - -#include "DeviceTypes.h" -#include "DeviceUtils.h" - -namespace ompx { -namespace atomic { - -enum OrderingTy { - relaxed = __ATOMIC_RELAXED, - acquire = __ATOMIC_ACQUIRE, - release = __ATOMIC_RELEASE, - acq_rel = __ATOMIC_ACQ_REL, - seq_cst = __ATOMIC_SEQ_CST, -}; - -enum MemScopeTy { - system = __MEMORY_SCOPE_SYSTEM, - device = __MEMORY_SCOPE_DEVICE, - workgroup = __MEMORY_SCOPE_WRKGRP, - wavefront = __MEMORY_SCOPE_WVFRNT, - single = __MEMORY_SCOPE_SINGLE, -}; - -/// Atomically increment \p *Addr and wrap at \p V with \p Ordering semantics. -uint32_t inc(uint32_t *Addr, uint32_t V, OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device); - -/// Atomically perform <op> on \p V and \p *Addr with \p Ordering semantics. The -/// result is stored in \p *Addr; -/// { - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -bool cas(Ty *Address, V ExpectedV, V DesiredV, atomic::OrderingTy OrderingSucc, - atomic::OrderingTy OrderingFail, - MemScopeTy MemScope = MemScopeTy::device) { - return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false, - OrderingSucc, OrderingFail, MemScope); -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -V add(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - return __scoped_atomic_fetch_add(Address, Val, Ordering, MemScope); -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -V load(Ty *Address, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { -#ifdef __NVPTX__ - return __scoped_atomic_fetch_add(Address, V(0), Ordering, MemScope); -#else - return __scoped_atomic_load_n(Address, Ordering, MemScope); -#endif -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -void store(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - __scoped_atomic_store_n(Address, Val, Ordering, MemScope); -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -V mul(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - Ty TypedCurrentVal, TypedResultVal, TypedNewVal; - bool Success; - do { - TypedCurrentVal = atomic::load(Address, Ordering); - TypedNewVal = TypedCurrentVal * Val; - Success = atomic::cas(Address, TypedCurrentVal, TypedNewVal, Ordering, - atomic::relaxed, MemScope); - } while (!Success); - return TypedResultVal; -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -utils::enable_if_t<!utils::is_floating_point_v<V>, V> -max(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - return __scoped_atomic_fetch_max(Address, Val, Ordering, MemScope); -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -utils::enable_if_t<utils::is_same_v<V, float>, V> -max(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - if (Val >= 0) - return utils::bitCast<float>(max( - (int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering, MemScope)); - return utils::bitCast<float>(min( - (uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering, MemScope)); -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -utils::enable_if_t<utils::is_same_v<V, double>, V> -max(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - if (Val >= 0) - return utils::bitCast<double>(max( - (int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering, MemScope)); - return utils::bitCast<double>(min( - (uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering, MemScope)); -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -utils::enable_if_t<!utils::is_floating_point_v<V>, V> -min(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - return __scoped_atomic_fetch_min(Address, Val, Ordering, MemScope); -} - -// TODO: Implement this with __atomic_fetch_max and remove the duplication. -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -utils::enable_if_t<utils::is_same_v<V, float>, V> -min(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - if (Val >= 0) - return utils::bitCast<float>(min( - (int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering, MemScope)); - return utils::bitCast<float>(max( - (uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering, MemScope)); -} - -// TODO: Implement this with __atomic_fetch_max and remove the duplication. -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -utils::enable_if_t<utils::is_same_v<V, double>, V> -min(Ty *Address, utils::remove_addrspace_t<Ty> Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - if (Val >= 0) - return utils::bitCast<double>(min( - (int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering, MemScope)); - return utils::bitCast<double>(max( - (uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering, MemScope)); -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -V bit_or(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - return __scoped_atomic_fetch_or(Address, Val, Ordering, MemScope); -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -V bit_and(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - return __scoped_atomic_fetch_and(Address, Val, Ordering, MemScope); -} - -template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> -V bit_xor(Ty *Address, V Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - return __scoped_atomic_fetch_xor(Address, Val, Ordering, MemScope); -} - -static inline uint32_t -atomicExchange(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering, - MemScopeTy MemScope = MemScopeTy::device) { - uint32_t R; - __scoped_atomic_exchange(Address, &Val, &R, Ordering, MemScope); - return R; -} - -///} - -} // namespace atomic - -namespace synchronize { - -/// Initialize the synchronization machinery. Must be called by all threads. -void init(bool IsSPMD); - -/// Synchronize all threads in a warp identified by \p Mask. -void warp(LaneMaskTy Mask); - -/// Synchronize all threads in a block and perform a fence before and after the -/// barrier according to \p Ordering. Note that the fence might be part of the -/// barrier. -void threads(atomic::OrderingTy Ordering); - -/// Synchronizing threads is allowed even if they all hit different instances of -/// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more -/// restrictive in that it requires all threads to hit the same instance. The -/// noinline is removed by the openmp-opt pass and helps to preserve the -/// information till then. -///{ - -/// Synchronize all threads in a block, they are reaching the same instruction -/// (hence all threads in the block are "aligned"). Also perform a fence before -/// and after the barrier according to \p Ordering. Note that the -/// fence might be part of the barrier if the target offers this. -[[gnu::noinline, omp::assume("ompx_aligned_barrier")]] void -threadsAligned(atomic::OrderingTy Ordering); - -///} - -} // namespace synchronize - -namespace fence { - -/// Memory fence with \p Ordering semantics for the team. -void team(atomic::OrderingTy Ordering); - -/// Memory fence with \p Ordering semantics for the contention group. -void kernel(atomic::OrderingTy Ordering); - -/// Memory fence with \p Ordering semantics for the system. -void system(atomic::OrderingTy Ordering); - -} // namespace fence - -} // namespace ompx - -#endif diff --git a/offload/DeviceRTL/include/Workshare.h b/offload/DeviceRTL/include/Workshare.h deleted file mode 100644 index 554c3271c334..000000000000 --- a/offload/DeviceRTL/include/Workshare.h +++ /dev/null @@ -1,26 +0,0 @@ -//===-------- Workshare.h - OpenMP Workshare interface ------------ C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef OMPTARGET_WORKSHARE_H -#define OMPTARGET_WORKSHARE_H - -namespace ompx { - -namespace workshare { - -/// Initialize the worksharing machinery. -void init(bool IsSPMD); - -} // namespace workshare - -} // namespace ompx - -#endif diff --git a/offload/DeviceRTL/include/generated_microtask_cases.gen b/offload/DeviceRTL/include/generated_microtask_cases.gen deleted file mode 100644 index a05f6da2f84f..000000000000 --- a/offload/DeviceRTL/include/generated_microtask_cases.gen +++ /dev/null @@ -1,797 +0,0 @@ -case 0: -((void (*)(int32_t *, int32_t *))fn)(&global_tid, &bound_tid); -break; -case 1: -((void (*)(int32_t *, int32_t *, void *))fn)(&global_tid, &bound_tid, args[0]); -break; -case 2: -((void (*)(int32_t *, int32_t *, void *, void *))fn)(&global_tid, &bound_tid, - args[0], args[1]); -break; -case 3: -((void (*)(int32_t *, int32_t *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2]); -break; -case 4: -((void (*)(int32_t *, int32_t *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3]); -break; -case 5: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4]); -break; -case 6: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5]); -break; -case 7: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6]); -break; -case 8: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *))fn)(&global_tid, &bound_tid, args[0], args[1], - args[2], args[3], args[4], args[5], args[6], - args[7]); -break; -case 9: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *))fn)(&global_tid, &bound_tid, args[0], - args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8]); -break; -case 10: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *))fn)(&global_tid, &bound_tid, args[0], - args[1], args[2], args[3], - args[4], args[5], args[6], - args[7], args[8], args[9]); -break; -case 11: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10]); -break; -case 12: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11]); -break; -case 13: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12]); -break; -case 14: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13]); -break; -case 15: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14]); -break; -case 16: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *))fn)(&global_tid, &bound_tid, args[0], args[1], - args[2], args[3], args[4], args[5], args[6], - args[7], args[8], args[9], args[10], args[11], - args[12], args[13], args[14], args[15]); -break; -case 17: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *))fn)(&global_tid, &bound_tid, args[0], - args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16]); -break; -case 18: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17]); -break; -case 19: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17], args[18]); -break; -case 20: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19]); -break; -case 21: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17], args[18], - args[19], args[20]); -break; -case 22: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17], args[18], - args[19], args[20], args[21]); -break; -case 23: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17], args[18], - args[19], args[20], args[21], args[22]); -break; -case 24: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *))fn)(&global_tid, &bound_tid, args[0], args[1], - args[2], args[3], args[4], args[5], args[6], - args[7], args[8], args[9], args[10], args[11], - args[12], args[13], args[14], args[15], args[16], - args[17], args[18], args[19], args[20], args[21], - args[22], args[23]); -break; -case 25: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *))fn)(&global_tid, &bound_tid, args[0], - args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], - args[17], args[18], args[19], args[20], - args[21], args[22], args[23], args[24]); -break; -case 26: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25]); -break; -case 27: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26]); -break; -case 28: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17], args[18], - args[19], args[20], args[21], args[22], args[23], - args[24], args[25], args[26], args[27]); -break; -case 29: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17], args[18], - args[19], args[20], args[21], args[22], args[23], - args[24], args[25], args[26], args[27], args[28]); -break; -case 30: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29]); -break; -case 31: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17], args[18], - args[19], args[20], args[21], args[22], args[23], - args[24], args[25], args[26], args[27], args[28], - args[29], args[30]); -break; -case 32: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *))fn)(&global_tid, &bound_tid, args[0], args[1], - args[2], args[3], args[4], args[5], args[6], - args[7], args[8], args[9], args[10], args[11], - args[12], args[13], args[14], args[15], args[16], - args[17], args[18], args[19], args[20], args[21], - args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], - args[31]); -break; -case 33: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32]); -break; -case 34: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33]); -break; -case 35: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34]); -break; -case 36: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35]); -break; -case 37: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36]); -break; -case 38: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37]); -break; -case 39: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17], args[18], - args[19], args[20], args[21], args[22], args[23], - args[24], args[25], args[26], args[27], args[28], - args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38]); -break; -case 40: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *))fn)(&global_tid, &bound_tid, args[0], args[1], - args[2], args[3], args[4], args[5], args[6], - args[7], args[8], args[9], args[10], args[11], - args[12], args[13], args[14], args[15], args[16], - args[17], args[18], args[19], args[20], args[21], - args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], - args[32], args[33], args[34], args[35], args[36], - args[37], args[38], args[39]); -break; -case 41: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40]); -break; -case 42: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41]); -break; -case 43: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42]); -break; -case 44: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43]); -break; -case 45: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44]); -break; -case 46: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45]); -break; -/// DONE TO HERE -case 47: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17], args[18], - args[19], args[20], args[21], args[22], args[23], - args[24], args[25], args[26], args[27], args[28], - args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], - args[39], args[40], args[41], args[42], args[43], - args[44], args[45], args[46]); -break; -case 48: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47]); -break; -case 49: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48]); -break; -case 50: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49]); -break; -case 51: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50]); -break; -case 52: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51]); -break; -case 53: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51], args[52]); -break; -case 54: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51], args[52], args[53]); -break; -case 55: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51], args[52], args[53], args[54]); -break; -case 56: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *))fn)(&global_tid, &bound_tid, args[0], args[1], - args[2], args[3], args[4], args[5], args[6], - args[7], args[8], args[9], args[10], args[11], - args[12], args[13], args[14], args[15], args[16], - args[17], args[18], args[19], args[20], args[21], - args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], - args[32], args[33], args[34], args[35], args[36], - args[37], args[38], args[39], args[40], args[41], - args[42], args[43], args[44], args[45], args[46], - args[47], args[48], args[49], args[50], args[51], - args[52], args[53], args[54], args[55]); -break; -case 57: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51], args[52], args[53], args[54], - args[55], args[56]); -break; -case 58: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51], args[52], args[53], args[54], - args[55], args[56], args[57]); -break; -case 59: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51], args[52], args[53], args[54], - args[55], args[56], args[57], args[58]); -break; -case 60: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51], args[52], args[53], args[54], - args[55], args[56], args[57], args[58], args[59]); -break; -case 61: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51], args[52], args[53], args[54], - args[55], args[56], args[57], args[58], args[59], args[60]); -break; -case 62: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51], args[52], args[53], args[54], - args[55], args[56], args[57], args[58], args[59], args[60], args[61]); -break; -case 63: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *))fn)(&global_tid, &bound_tid, args[0], args[1], args[2], - args[3], args[4], args[5], args[6], args[7], args[8], - args[9], args[10], args[11], args[12], args[13], - args[14], args[15], args[16], args[17], args[18], - args[19], args[20], args[21], args[22], args[23], - args[24], args[25], args[26], args[27], args[28], - args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], - args[39], args[40], args[41], args[42], args[43], - args[44], args[45], args[46], args[47], args[48], - args[49], args[50], args[51], args[52], args[53], - args[54], args[55], args[56], args[57], args[58], - args[59], args[60], args[61], args[62]); -break; -case 64: -((void (*)(int32_t *, int32_t *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *, void *, void *, void *, void *, void *, void *, - void *, void *))fn)( - &global_tid, &bound_tid, args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], - args[13], args[14], args[15], args[16], args[17], args[18], args[19], - args[20], args[21], args[22], args[23], args[24], args[25], args[26], - args[27], args[28], args[29], args[30], args[31], args[32], args[33], - args[34], args[35], args[36], args[37], args[38], args[39], args[40], - args[41], args[42], args[43], args[44], args[45], args[46], args[47], - args[48], args[49], args[50], args[51], args[52], args[53], args[54], - args[55], args[56], args[57], args[58], args[59], args[60], args[61], - args[62], args[63]); -break; |
