summaryrefslogtreecommitdiff
path: root/offload/plugins-nextgen
diff options
context:
space:
mode:
authorAbhinav Gaba <abhinav.gaba@intel.com>2025-08-15 11:49:35 -0700
committerGitHub <noreply@github.com>2025-08-15 11:49:35 -0700
commit79cf877627ec341c62f64e25a44f3ba340edad1e (patch)
treea29e2abb33f9b2b6ee2dc52643aba8a76b42ec1a /offload/plugins-nextgen
parent82caa251d4e145b54ea76236213617076f254c2b (diff)
[Offload] Introduce dataFence plugin interface. (#153793)
The purpose of this fence is to ensure that any `dataSubmit`s inserted into a queue before a `dataFence` finish before finish before any `dataSubmit`s inserted after it begin. This is a no-op for most queues, since they are in-order, and by design any operations inserted into them occur in order. But the interface is supposed to be functional for out-of-order queues. The addition of the interface means that any operations that rely on such ordering (like ATTACH map-type support in #149036) can invoke it, without worrying about whether the underlying queue is in-order or out-of-order. Once a plugin supports out-of-order queues, the plugin can implement this function, without requiring any change at the libomptarget level. --------- Co-authored-by: Alex Duran <alejandro.duran@intel.com>
Diffstat (limited to 'offload/plugins-nextgen')
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp7
-rw-r--r--offload/plugins-nextgen/common/include/PluginInterface.h8
-rw-r--r--offload/plugins-nextgen/common/src/PluginInterface.cpp12
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp7
-rw-r--r--offload/plugins-nextgen/host/src/rtl.cpp7
5 files changed, 41 insertions, 0 deletions
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 536c662451df..83280fe0a49c 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2576,6 +2576,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
getAgent(), (uint64_t)Size);
}
+ /// Insert a data fence between previous data operations and the following
+ /// operations. This is a no-op for AMDGPU devices as operations inserted into
+ /// a queue are in-order.
+ Error dataFence(__tgt_async_info *Async) override {
+ return Plugin::success();
+ }
+
/// Initialize the async info for interoperability purposes.
Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override {
// TODO: Implement this function.
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 5e32a1a76d96..a448721755a6 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -944,6 +944,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
virtual Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size,
AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
+ /// Instert a data fence between previous data operations and the following
+ /// operations if necessary for the device
+ virtual Error dataFence(__tgt_async_info *AsyncInfo) = 0;
+
/// Exchange data between devices (device to device transfer). Calling this
/// function is only valid if GenericPlugin::isDataExchangable() passing the
/// two devices returns true.
@@ -1454,6 +1458,10 @@ public:
int DstDeviceId, void *DstPtr, int64_t Size,
__tgt_async_info *AsyncInfo);
+ /// Places a fence between previous data movements and following data
+ /// movements if necessary on the device
+ int32_t data_fence(int32_t DeviceId, __tgt_async_info *AsyncInfo);
+
/// Begin executing a kernel on the given device.
int32_t launch_kernel(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
ptrdiff_t *TgtOffsets, KernelArgsTy *KernelArgs,
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index f177c5bc9f48..c06c35e1e6a5 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -2333,3 +2333,15 @@ int32_t GenericPluginTy::async_barrier(omp_interop_val_t *Interop) {
}
return OFFLOAD_SUCCESS;
}
+
+int32_t GenericPluginTy::data_fence(int32_t DeviceId,
+ __tgt_async_info *AsyncInfo) {
+ auto Err = getDevice(DeviceId).dataFence(AsyncInfo);
+ if (Err) {
+ REPORT("failure to place data fence on device %d: %s\n", DeviceId,
+ toString(std::move(Err)).data());
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 155a580faee2..a99357a3adea 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -856,6 +856,13 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::success();
}
+ /// Insert a data fence between previous data operations and the following
+ /// operations. This is a no-op for CUDA devices as operations inserted into
+ /// a queue are in-order.
+ Error dataFence(__tgt_async_info *Async) override {
+ return Plugin::success();
+ }
+
/// Initialize the device info for interoperability purposes.
Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) override {
assert(Context && "Context is null");
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index f8ddc6713c01..25443fd1ac0b 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -295,6 +295,13 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
"dataExchangeImpl not supported");
}
+ /// Insert a data fence between previous data operations and the following
+ /// operations. This is a no-op for Host devices as operations inserted into
+ /// a queue are in-order.
+ Error dataFence(__tgt_async_info *Async) override {
+ return Plugin::success();
+ }
+
/// All functions are already synchronous. No need to do anything on this
/// synchronization function.
Error synchronizeImpl(__tgt_async_info &AsyncInfo,