summaryrefslogtreecommitdiff
path: root/offload/DeviceRTL/src/Kernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'offload/DeviceRTL/src/Kernel.cpp')
-rw-r--r--offload/DeviceRTL/src/Kernel.cpp157
1 files changed, 157 insertions, 0 deletions
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
new file mode 100644
index 000000000000..95d4c728016d
--- /dev/null
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -0,0 +1,157 @@
+//===--- Kernel.cpp - OpenMP device kernel interface -------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the kernel entry points for the device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Shared/Environment.h"
+
+#include "Allocator.h"
+#include "Debug.h"
+#include "Interface.h"
+#include "Mapping.h"
+#include "State.h"
+#include "Synchronization.h"
+#include "Types.h"
+
+#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
+
+using namespace ompx;
+
+#pragma omp begin declare target device_type(nohost)
+
+static void
+inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
+ KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+ // Order is important here.
+ synchronize::init(IsSPMD);
+ mapping::init(IsSPMD);
+ state::init(IsSPMD, KernelEnvironment, KernelLaunchEnvironment);
+ allocator::init(IsSPMD, KernelEnvironment);
+}
+
+/// Simple generic state machine for worker threads.
+static void genericStateMachine(IdentTy *Ident) {
+ uint32_t TId = mapping::getThreadIdInBlock();
+
+ do {
+ ParallelRegionFnTy WorkFn = nullptr;
+
+ // Wait for the signal that we have a new work function.
+ synchronize::threads(atomic::seq_cst);
+
+ // Retrieve the work function from the runtime.
+ bool IsActive = __kmpc_kernel_parallel(&WorkFn);
+
+ // If there is nothing more to do, break out of the state machine by
+ // returning to the caller.
+ if (!WorkFn)
+ return;
+
+ if (IsActive) {
+ ASSERT(!mapping::isSPMDMode(), nullptr);
+ ((void (*)(uint32_t, uint32_t))WorkFn)(0, TId);
+ __kmpc_kernel_end_parallel();
+ }
+
+ synchronize::threads(atomic::seq_cst);
+
+ } while (true);
+}
+
+extern "C" {
+
+/// Initialization
+///
+/// \param Ident Source location identification, can be NULL.
+///
+int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
+ KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+ ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
+ bool IsSPMD = Configuration.ExecMode &
+ llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD;
+ bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
+ if (IsSPMD) {
+ inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
+ KernelLaunchEnvironment);
+ synchronize::threadsAligned(atomic::relaxed);
+ } else {
+ inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
+ KernelLaunchEnvironment);
+ // No need to wait since only the main threads will execute user
+ // code and workers will run into a barrier right away.
+ }
+
+ if (IsSPMD) {
+ state::assumeInitialState(IsSPMD);
+
+ // Synchronize to ensure the assertions above are in an aligned region.
+ // The barrier is eliminated later.
+ synchronize::threadsAligned(atomic::relaxed);
+ return -1;
+ }
+
+ if (mapping::isInitialThreadInLevel0(IsSPMD))
+ return -1;
+
+ // Enter the generic state machine if enabled and if this thread can possibly
+ // be an active worker thread.
+ //
+ // The latter check is important for NVIDIA Pascal (but not Volta) and AMD
+ // GPU. In those cases, a single thread can apparently satisfy a barrier on
+ // behalf of all threads in the same warp. Thus, it would not be safe for
+ // other threads in the main thread's warp to reach the first
+ // synchronize::threads call in genericStateMachine before the main thread
+ // reaches its corresponding synchronize::threads call: that would permit all
+ // active worker threads to proceed before the main thread has actually set
+ // state::ParallelRegionFn, and then they would immediately quit without
+ // doing any work. mapping::getMaxTeamThreads() does not include any of the
+ // main thread's warp, so none of its threads can ever be active worker
+ // threads.
+ if (UseGenericStateMachine &&
+ mapping::getThreadIdInBlock() < mapping::getMaxTeamThreads(IsSPMD))
+ genericStateMachine(KernelEnvironment.Ident);
+
+ return mapping::getThreadIdInBlock();
+}
+
+/// De-Initialization
+///
+/// In non-SPMD, this function releases the workers trapped in a state machine
+/// and also any memory dynamically allocated by the runtime.
+///
+/// \param Ident Source location identification, can be NULL.
+///
+void __kmpc_target_deinit() {
+ bool IsSPMD = mapping::isSPMDMode();
+ if (IsSPMD)
+ return;
+
+ if (mapping::isInitialThreadInLevel0(IsSPMD)) {
+ // Signal the workers to exit the state machine and exit the kernel.
+ state::ParallelRegionFn = nullptr;
+ } else if (!state::getKernelEnvironment()
+ .Configuration.UseGenericStateMachine) {
+ // Retrieve the work function just to ensure we always call
+ // __kmpc_kernel_parallel even if a custom state machine is used.
+ // TODO: this is not super pretty. The problem is we create the call to
+ // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it
+ // is not there yet. Thus, we assume we never reach it from
+ // __kmpc_target_deinit. That allows us to remove the store in there to
+ // ParallelRegionFn, which leads to bad results later on.
+ ParallelRegionFnTy WorkFn = nullptr;
+ __kmpc_kernel_parallel(&WorkFn);
+ ASSERT(WorkFn == nullptr, nullptr);
+ }
+}
+
+int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); }
+}
+
+#pragma omp end declare target