diff options
Diffstat (limited to 'offload/plugins-nextgen/common/include/PluginInterface.h')
| -rw-r--r-- | offload/plugins-nextgen/common/include/PluginInterface.h | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 88423be039af..973add0ba100 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -19,6 +19,7 @@ #include <shared_mutex> #include <vector> +#include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" #include "Shared/EnvironmentVar.h" @@ -265,7 +266,7 @@ struct GenericKernelTy { AsyncInfoWrapperTy &AsyncInfoWrapper) const; virtual Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads, uint64_t NumBlocks, KernelArgsTy &KernelArgs, - void *Args, + KernelLaunchParamsTy LaunchParams, AsyncInfoWrapperTy &AsyncInfoWrapper) const = 0; /// Get the kernel name. @@ -326,11 +327,12 @@ protected: private: /// Prepare the arguments before launching the kernel. - void *prepareArgs(GenericDeviceTy &GenericDevice, void **ArgPtrs, - ptrdiff_t *ArgOffsets, uint32_t &NumArgs, - llvm::SmallVectorImpl<void *> &Args, - llvm::SmallVectorImpl<void *> &Ptrs, - KernelLaunchEnvironmentTy *KernelLaunchEnvironment) const; + KernelLaunchParamsTy + prepareArgs(GenericDeviceTy &GenericDevice, void **ArgPtrs, + ptrdiff_t *ArgOffsets, uint32_t &NumArgs, + llvm::SmallVectorImpl<void *> &Args, + llvm::SmallVectorImpl<void *> &Ptrs, + KernelLaunchEnvironmentTy *KernelLaunchEnvironment) const; /// Get the number of threads and blocks for the kernel based on the /// user-defined threads and block clauses. @@ -824,6 +826,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy { return OMPX_MinThreadsForLowTripCount; } + /// Whether or not to reuse blocks for high trip count loops. + /// @see OMPX_ReuseBlocksForHighTripCount + bool getReuseBlocksForHighTripCount() { + return OMPX_ReuseBlocksForHighTripCount; + } + /// Get the total amount of hardware parallelism supported by the target /// device. This is the total amount of warps or wavefronts that can be /// resident on the device simultaneously. @@ -899,6 +907,9 @@ private: UInt32Envar OMPX_MinThreadsForLowTripCount = UInt32Envar("LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT", 32); + BoolEnvar OMPX_ReuseBlocksForHighTripCount = + BoolEnvar("LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT", true); + protected: /// Environment variables defined by the LLVM OpenMP implementation /// regarding the initial number of streams and events. |
