summaryrefslogtreecommitdiff
path: root/offload
diff options
context:
space:
mode:
authorRoss Brunton <ross@codeplay.com>2025-06-25 14:53:18 +0100
committerGitHub <noreply@github.com>2025-06-25 14:53:18 +0100
commit0870c8838ba20edc0fb7efe60a0ee301bf01d209 (patch)
treeb52a4a2960798699ca906c13dcc06f0a145e650b /offload
parente90ab0e34277cb3e4ef1e468518c30322f9d2c65 (diff)
[Offload] Add an `unloadBinary` interface to PluginInterface (#143873)
This allows removal of a specific Image from a Device, rather than requiring all image data to outlive the device they were created for. This is required for `ol_program_handle_t`s, which now specify the lifetime of the buffer used to create the program.
Diffstat (limited to 'offload')
-rw-r--r--offload/liboffload/API/Program.td4
-rw-r--r--offload/liboffload/src/OffloadImpl.cpp8
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp20
-rw-r--r--offload/plugins-nextgen/common/include/PluginInterface.h12
-rw-r--r--offload/plugins-nextgen/common/src/PluginInterface.cpp72
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp27
6 files changed, 77 insertions, 66 deletions
diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
index 8c88fe6e21e6..0476fa1f7c27 100644
--- a/offload/liboffload/API/Program.td
+++ b/offload/liboffload/API/Program.td
@@ -13,7 +13,9 @@
def : Function {
let name = "olCreateProgram";
let desc = "Create a program for the device from the binary image pointed to by `ProgData`.";
- let details = [];
+ let details = [
+ "The provided `ProgData` will be copied and need not outlive the returned handle",
+ ];
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
Param<"const void*", "ProgData", "pointer to the program binary data", PARAM_IN>,
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index da2101529ffe..c2a35a245e2a 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -480,6 +480,14 @@ Error olCreateProgram_impl(ol_device_handle_t Device, const void *ProgData,
}
Error olDestroyProgram_impl(ol_program_handle_t Program) {
+ auto &Device = Program->Image->getDevice();
+ if (auto Err = Device.unloadBinary(Program->Image))
+ return Err;
+
+ auto &LoadedImages = Device.LoadedImages;
+ LoadedImages.erase(
+ std::find(LoadedImages.begin(), LoadedImages.end(), Program->Image));
+
return olDestroy(Program);
}
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 73e1e66928fa..bc1a768feafd 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2023,6 +2023,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::success();
}
+ Error unloadBinaryImpl(DeviceImageTy *Image) override {
+ AMDGPUDeviceImageTy &AMDImage = static_cast<AMDGPUDeviceImageTy &>(*Image);
+
+ // Unload the executable of the image.
+ return AMDImage.unloadExecutable();
+ }
+
/// Deinitialize the device and release its resources.
Error deinitImpl() override {
// Deinitialize the stream and event pools.
@@ -2035,19 +2042,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (auto Err = AMDGPUSignalManager.deinit())
return Err;
- // Close modules if necessary.
- if (!LoadedImages.empty()) {
- // Each image has its own module.
- for (DeviceImageTy *Image : LoadedImages) {
- AMDGPUDeviceImageTy &AMDImage =
- static_cast<AMDGPUDeviceImageTy &>(*Image);
-
- // Unload the executable of the image.
- if (auto Err = AMDImage.unloadExecutable())
- return Err;
- }
- }
-
// Invalidate agent reference.
Agent = {0};
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 91df80030437..fbc798faec24 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -752,6 +752,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
virtual Expected<DeviceImageTy *>
loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0;
+ /// Unload a previously loaded Image from the device
+ Error unloadBinary(DeviceImageTy *Image);
+ virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0;
+
/// Setup the device environment if needed. Notice this setup may not be run
/// on some plugins. By default, it will be executed, but plugins can change
/// this behavior by overriding the shouldSetupDeviceEnvironment function.
@@ -1036,6 +1040,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
BoolEnvar OMPX_TrackAllocationTraces =
BoolEnvar("OFFLOAD_TRACK_ALLOCATION_TRACES", false);
+ /// Array of images loaded into the device. Images are automatically
+ /// deallocated by the allocator.
+ llvm::SmallVector<DeviceImageTy *> LoadedImages;
+
private:
/// Get and set the stack size and heap size for the device. If not used, the
/// plugin can implement the setters as no-op and setting the output
@@ -1086,10 +1094,6 @@ protected:
UInt32Envar OMPX_InitialNumStreams;
UInt32Envar OMPX_InitialNumEvents;
- /// Array of images loaded into the device. Images are automatically
- /// deallocated by the allocator.
- llvm::SmallVector<DeviceImageTy *> LoadedImages;
-
/// The identifier of the device within the plugin. Notice this is not a
/// global device id and is not the device id visible to the OpenMP user.
const int32_t DeviceId;
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 3e9a62f57095..ac7031b6e881 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -821,26 +821,49 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
return Plugin::success();
}
-Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
- for (DeviceImageTy *Image : LoadedImages)
- if (auto Err = callGlobalDestructors(Plugin, *Image))
- return Err;
+Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) {
+ if (auto Err = callGlobalDestructors(Plugin, *Image))
+ return Err;
if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
- for (auto *Image : LoadedImages) {
- DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
- GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
- sizeof(DeviceMemoryPoolTrackingTy),
- &ImageDeviceMemoryPoolTracking);
- if (auto Err =
- GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
- consumeError(std::move(Err));
- continue;
- }
- DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
+ DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
+ GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
+ sizeof(DeviceMemoryPoolTrackingTy),
+ &ImageDeviceMemoryPoolTracking);
+ if (auto Err =
+ GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
+ consumeError(std::move(Err));
}
+ DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
+ }
+
+ GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
+ auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
+ if (!ProfOrErr)
+ return ProfOrErr.takeError();
+
+ if (!ProfOrErr->empty()) {
+ // Dump out profdata
+ if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
+ uint32_t(DeviceDebugKind::PGODump))
+ ProfOrErr->dump();
+
+ // Write data to profiling file
+ if (auto Err = ProfOrErr->write())
+ return Err;
+ }
+ return unloadBinaryImpl(Image);
+}
+
+Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
+ for (auto &I : LoadedImages)
+ if (auto Err = unloadBinary(I))
+ return Err;
+ LoadedImages.clear();
+
+ if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
// TODO: Write this by default into a file.
printf("\n\n|-----------------------\n"
"| Device memory tracker:\n"
@@ -856,25 +879,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
DeviceMemoryPoolTracking.AllocationMax);
}
- for (auto *Image : LoadedImages) {
- GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
- auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
- if (!ProfOrErr)
- return ProfOrErr.takeError();
-
- if (ProfOrErr->empty())
- continue;
-
- // Dump out profdata
- if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
- uint32_t(DeviceDebugKind::PGODump))
- ProfOrErr->dump();
-
- // Write data to profiling file
- if (auto Err = ProfOrErr->write())
- return Err;
- }
-
// Delete the memory manager before deinitializing the device. Otherwise,
// we may delete device allocations after the device is deinitialized.
if (MemoryManager)
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 9943f533ef5a..0e662b038c36 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -358,6 +358,19 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::success();
}
+ Error unloadBinaryImpl(DeviceImageTy *Image) override {
+ assert(Context && "Invalid CUDA context");
+
+ // Each image has its own module.
+ CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
+
+ // Unload the module of the image.
+ if (auto Err = CUDAImage.unloadModule())
+ return Err;
+
+ return Plugin::success();
+ }
+
/// Deinitialize the device and release its resources.
Error deinitImpl() override {
if (Context) {
@@ -372,20 +385,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (auto Err = CUDAEventManager.deinit())
return Err;
- // Close modules if necessary.
- if (!LoadedImages.empty()) {
- assert(Context && "Invalid CUDA context");
-
- // Each image has its own module.
- for (DeviceImageTy *Image : LoadedImages) {
- CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
-
- // Unload the module of the image.
- if (auto Err = CUDAImage.unloadModule())
- return Err;
- }
- }
-
if (Context) {
CUresult Res = cuDevicePrimaryCtxRelease(Device);
if (auto Err =