diff options
Diffstat (limited to 'offload/plugins-nextgen/common')
| -rw-r--r-- | offload/plugins-nextgen/common/include/PluginInterface.h | 23 | ||||
| -rw-r--r-- | offload/plugins-nextgen/common/src/PluginInterface.cpp | 111 |
2 files changed, 25 insertions, 109 deletions
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 75f87cab6049..6ff3ef8cda17 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -417,6 +417,7 @@ struct GenericKernelTy { case OMP_TGT_EXEC_MODE_SPMD: case OMP_TGT_EXEC_MODE_GENERIC: case OMP_TGT_EXEC_MODE_GENERIC_SPMD: + case OMP_TGT_EXEC_MODE_SPMD_NO_LOOP: return true; } return false; @@ -434,6 +435,8 @@ protected: return "Generic"; case OMP_TGT_EXEC_MODE_GENERIC_SPMD: return "Generic-SPMD"; + case OMP_TGT_EXEC_MODE_SPMD_NO_LOOP: + return "SPMD-No-Loop"; } llvm_unreachable("Unknown execution mode!"); } @@ -471,7 +474,8 @@ private: uint32_t BlockLimitClause[3], uint64_t LoopTripCount, uint32_t &NumThreads, bool IsNumThreadsFromUser) const; - /// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode. + /// Indicate if the kernel works in Generic SPMD, Generic, No-Loop + /// or SPMD mode. bool isGenericSPMDMode() const { return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_GENERIC_SPMD; @@ -486,6 +490,10 @@ private: bool isBareMode() const { return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_BARE; } + bool isNoLoopMode() const { + return KernelEnvironment.Configuration.ExecMode == + OMP_TGT_EXEC_MODE_SPMD_NO_LOOP; + } /// The kernel name. std::string Name; @@ -831,11 +839,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy { Error unloadBinary(DeviceImageTy *Image); virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0; - /// Setup the device environment if needed. Notice this setup may not be run - /// on some plugins. By default, it will be executed, but plugins can change - /// this behavior by overriding the shouldSetupDeviceEnvironment function. - Error setupDeviceEnvironment(GenericPluginTy &Plugin, DeviceImageTy &Image); - /// Setup the global device memory pool, if the plugin requires one. Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image, uint64_t PoolSize); @@ -1035,6 +1038,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy { uint32_t getDefaultNumBlocks() const { return GridValues.GV_Default_Num_Teams; } + uint32_t getDebugKind() const { return OMPX_DebugKind; } uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; } virtual uint64_t getClockFrequency() const { return CLOCKS_PER_SEC; } @@ -1175,11 +1179,6 @@ private: virtual Error getDeviceHeapSize(uint64_t &V) = 0; virtual Error setDeviceHeapSize(uint64_t V) = 0; - /// Indicate whether the device should setup the device environment. Notice - /// that returning false in this function will change the behavior of the - /// setupDeviceEnvironment() function. - virtual bool shouldSetupDeviceEnvironment() const { return true; } - /// Indicate whether the device should setup the global device memory pool. If /// false is return the value on the device will be uninitialized. virtual bool shouldSetupDeviceMemoryPool() const { return true; } @@ -1235,7 +1234,7 @@ protected: enum class PeerAccessState : uint8_t { AVAILABLE, UNAVAILABLE, PENDING }; /// Array of peer access states with the rest of devices. This means that if - /// the device I has a matrix PeerAccesses with PeerAccesses[J] == AVAILABLE, + /// the device I has a matrix PeerAccesses with PeerAccesses == AVAILABLE, /// the device I can access device J's memory directly. However, notice this /// does not mean that device J can access device I's memory directly. llvm::SmallVector<PeerAccessState> PeerAccesses; diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index d4b5f914c667..36cdd6035e26 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -371,54 +371,6 @@ public: }; } // namespace llvm::omp::target::plugin -// Extract the mapping of host function pointers to device function pointers -// from the entry table. Functions marked as 'indirect' in OpenMP will have -// offloading entries generated for them which map the host's function pointer -// to a global containing the corresponding function pointer on the device. -static Expected<std::pair<void *, uint64_t>> -setupIndirectCallTable(GenericPluginTy &Plugin, GenericDeviceTy &Device, - DeviceImageTy &Image) { - GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); - - llvm::ArrayRef<llvm::offloading::EntryTy> Entries( - Image.getTgtImage()->EntriesBegin, Image.getTgtImage()->EntriesEnd); - llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable; - for (const auto &Entry : Entries) { - if (Entry.Kind != object::OffloadKind::OFK_OpenMP || Entry.Size == 0 || - !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT)) - continue; - - assert(Entry.Size == sizeof(void *) && "Global not a function pointer?"); - auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); - - GlobalTy DeviceGlobal(Entry.SymbolName, Entry.Size); - if (auto Err = - Handler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal)) - return std::move(Err); - - HstPtr = Entry.Address; - if (auto Err = Device.dataRetrieve(&DevPtr, DeviceGlobal.getPtr(), - Entry.Size, nullptr)) - return std::move(Err); - } - - // If we do not have any indirect globals we exit early. - if (IndirectCallTable.empty()) - return std::pair{nullptr, 0}; - - // Sort the array to allow for more efficient lookup of device pointers. - llvm::sort(IndirectCallTable, - [](const auto &x, const auto &y) { return x.first < y.first; }); - - uint64_t TableSize = - IndirectCallTable.size() * sizeof(std::pair<void *, void *>); - void *DevicePtr = Device.allocate(TableSize, nullptr, TARGET_ALLOC_DEVICE); - if (auto Err = Device.dataSubmit(DevicePtr, IndirectCallTable.data(), - TableSize, nullptr)) - return std::move(Err); - return std::pair<void *, uint64_t>(DevicePtr, IndirectCallTable.size()); -} - AsyncInfoWrapperTy::AsyncInfoWrapperTy(GenericDeviceTy &Device, __tgt_async_info *AsyncInfoPtr) : Device(Device), @@ -662,6 +614,10 @@ uint32_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice, return std::min(NumTeamsClause[0], GenericDevice.getBlockLimit()); } + // Return the number of teams required to cover the loop iterations. + if (isNoLoopMode()) + return LoopTripCount > 0 ? (((LoopTripCount - 1) / NumThreads) + 1) : 1; + uint64_t DefaultNumBlocks = GenericDevice.getDefaultNumBlocks(); uint64_t TripCountNumBlocks = std::numeric_limits<uint64_t>::max(); if (LoopTripCount > 0) { @@ -939,10 +895,6 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, // Add the image to list. LoadedImages.push_back(Image); - // Setup the device environment if needed. - if (auto Err = setupDeviceEnvironment(Plugin, *Image)) - return std::move(Err); - // Setup the global device memory pool if needed. if (!Plugin.getRecordReplay().isReplaying() && shouldSetupDeviceMemoryPool()) { @@ -978,43 +930,6 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, return Image; } -Error GenericDeviceTy::setupDeviceEnvironment(GenericPluginTy &Plugin, - DeviceImageTy &Image) { - // There are some plugins that do not need this step. - if (!shouldSetupDeviceEnvironment()) - return Plugin::success(); - - // Obtain a table mapping host function pointers to device function pointers. - auto CallTablePairOrErr = setupIndirectCallTable(Plugin, *this, Image); - if (!CallTablePairOrErr) - return CallTablePairOrErr.takeError(); - - DeviceEnvironmentTy DeviceEnvironment; - DeviceEnvironment.DeviceDebugKind = OMPX_DebugKind; - DeviceEnvironment.NumDevices = Plugin.getNumDevices(); - // TODO: The device ID used here is not the real device ID used by OpenMP. - DeviceEnvironment.DeviceNum = DeviceId; - DeviceEnvironment.DynamicMemSize = OMPX_SharedMemorySize; - DeviceEnvironment.ClockFrequency = getClockFrequency(); - DeviceEnvironment.IndirectCallTable = - reinterpret_cast<uintptr_t>(CallTablePairOrErr->first); - DeviceEnvironment.IndirectCallTableSize = CallTablePairOrErr->second; - DeviceEnvironment.HardwareParallelism = getHardwareParallelism(); - - // Create the metainfo of the device environment global. - GlobalTy DevEnvGlobal("__omp_rtl_device_environment", - sizeof(DeviceEnvironmentTy), &DeviceEnvironment); - - // Write device environment values to the device. - GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); - if (auto Err = GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal)) { - DP("Missing symbol %s, continue execution anyway.\n", - DevEnvGlobal.getName().data()); - consumeError(std::move(Err)); - } - return Plugin::success(); -} - Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image, uint64_t PoolSize) { @@ -1337,16 +1252,19 @@ Error PinnedAllocationMapTy::unlockUnmappedHostBuffer(void *HstPtr) { Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo, bool ReleaseQueue) { + if (!AsyncInfo) + return Plugin::error(ErrorCode::INVALID_ARGUMENT, + "invalid async info queue"); + SmallVector<void *> AllocsToDelete{}; { std::lock_guard<std::mutex> AllocationGuard{AsyncInfo->Mutex}; - if (!AsyncInfo || !AsyncInfo->Queue) - return Plugin::error(ErrorCode::INVALID_ARGUMENT, - "invalid async info queue"); - - if (auto Err = synchronizeImpl(*AsyncInfo, ReleaseQueue)) - return Err; + // This can be false when no work has been added to the AsyncInfo. In which + // case, the device has nothing to synchronize. + if (AsyncInfo->Queue) + if (auto Err = synchronizeImpl(*AsyncInfo, ReleaseQueue)) + return Err; std::swap(AllocsToDelete, AsyncInfo->AssociatedAllocations); } @@ -2252,8 +2170,7 @@ int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size, GenericGlobalHandlerTy &GHandler = getGlobalHandler(); if (auto Err = GHandler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal)) { - REPORT("Failure to look up global address: %s\n", - toString(std::move(Err)).data()); + consumeError(std::move(Err)); return OFFLOAD_FAIL; } |
