diff options
Diffstat (limited to 'offload/DeviceRTL/src/Kernel.cpp')
| -rw-r--r-- | offload/DeviceRTL/src/Kernel.cpp | 162 |
1 files changed, 0 insertions, 162 deletions
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp deleted file mode 100644 index 467e44a65276..000000000000 --- a/offload/DeviceRTL/src/Kernel.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===--- Kernel.cpp - OpenMP device kernel interface -------------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the kernel entry points for the device. -// -//===----------------------------------------------------------------------===// - -#include "Shared/Environment.h" - -#include "Allocator.h" -#include "Debug.h" -#include "DeviceTypes.h" -#include "Interface.h" -#include "Mapping.h" -#include "State.h" -#include "Synchronization.h" -#include "Workshare.h" - -using namespace ompx; - -// These flags are copied from "llvm/Frontend/OpenMP/OMPDeviceConstants.h" and -// must be kept in-sync. -enum OMPTgtExecModeFlags : unsigned char { - OMP_TGT_EXEC_MODE_BARE = 0, - OMP_TGT_EXEC_MODE_GENERIC = 1 << 0, - OMP_TGT_EXEC_MODE_SPMD = 1 << 1, - OMP_TGT_EXEC_MODE_GENERIC_SPMD = - OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD -}; - -static void -inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment, - KernelLaunchEnvironmentTy &KernelLaunchEnvironment) { - // Order is important here. - synchronize::init(IsSPMD); - mapping::init(IsSPMD); - state::init(IsSPMD, KernelEnvironment, KernelLaunchEnvironment); - allocator::init(IsSPMD, KernelEnvironment); - workshare::init(IsSPMD); -} - -/// Simple generic state machine for worker threads. -static void genericStateMachine(IdentTy *Ident) { - uint32_t TId = mapping::getThreadIdInBlock(); - - do { - ParallelRegionFnTy WorkFn = nullptr; - - // Wait for the signal that we have a new work function. - synchronize::threads(atomic::seq_cst); - - // Retrieve the work function from the runtime. - bool IsActive = __kmpc_kernel_parallel(&WorkFn); - - // If there is nothing more to do, break out of the state machine by - // returning to the caller. - if (!WorkFn) - return; - - if (IsActive) { - ASSERT(!mapping::isSPMDMode(), nullptr); - ((void (*)(uint32_t, uint32_t))WorkFn)(0, TId); - __kmpc_kernel_end_parallel(); - } - - synchronize::threads(atomic::seq_cst); - - } while (true); -} - -extern "C" { - -/// Initialization -/// -/// \param Ident Source location identification, can be NULL. -/// -int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment, - KernelLaunchEnvironmentTy &KernelLaunchEnvironment) { - ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration; - bool IsSPMD = Configuration.ExecMode & OMP_TGT_EXEC_MODE_SPMD; - bool UseGenericStateMachine = Configuration.UseGenericStateMachine; - if (IsSPMD) { - inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment, - KernelLaunchEnvironment); - synchronize::threadsAligned(atomic::relaxed); - } else { - inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment, - KernelLaunchEnvironment); - // No need to wait since only the main threads will execute user - // code and workers will run into a barrier right away. - } - - if (IsSPMD) { - state::assumeInitialState(IsSPMD); - - // Synchronize to ensure the assertions above are in an aligned region. - // The barrier is eliminated later. - synchronize::threadsAligned(atomic::relaxed); - return -1; - } - - if (mapping::isInitialThreadInLevel0(IsSPMD)) - return -1; - - // Enter the generic state machine if enabled and if this thread can possibly - // be an active worker thread. - // - // The latter check is important for NVIDIA Pascal (but not Volta) and AMD - // GPU. In those cases, a single thread can apparently satisfy a barrier on - // behalf of all threads in the same warp. Thus, it would not be safe for - // other threads in the main thread's warp to reach the first - // synchronize::threads call in genericStateMachine before the main thread - // reaches its corresponding synchronize::threads call: that would permit all - // active worker threads to proceed before the main thread has actually set - // state::ParallelRegionFn, and then they would immediately quit without - // doing any work. mapping::getMaxTeamThreads() does not include any of the - // main thread's warp, so none of its threads can ever be active worker - // threads. - if (UseGenericStateMachine && - mapping::getThreadIdInBlock() < mapping::getMaxTeamThreads(IsSPMD)) - genericStateMachine(KernelEnvironment.Ident); - - return mapping::getThreadIdInBlock(); -} - -/// De-Initialization -/// -/// In non-SPMD, this function releases the workers trapped in a state machine -/// and also any memory dynamically allocated by the runtime. -/// -/// \param Ident Source location identification, can be NULL. -/// -void __kmpc_target_deinit() { - bool IsSPMD = mapping::isSPMDMode(); - if (IsSPMD) - return; - - if (mapping::isInitialThreadInLevel0(IsSPMD)) { - // Signal the workers to exit the state machine and exit the kernel. - state::ParallelRegionFn = nullptr; - } else if (!state::getKernelEnvironment() - .Configuration.UseGenericStateMachine) { - // Retrieve the work function just to ensure we always call - // __kmpc_kernel_parallel even if a custom state machine is used. - // TODO: this is not super pretty. The problem is we create the call to - // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it - // is not there yet. Thus, we assume we never reach it from - // __kmpc_target_deinit. That allows us to remove the store in there to - // ParallelRegionFn, which leads to bad results later on. - ParallelRegionFnTy WorkFn = nullptr; - __kmpc_kernel_parallel(&WorkFn); - ASSERT(WorkFn == nullptr, nullptr); - } -} - -int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); } -} |
