summaryrefslogtreecommitdiff
path: root/offload/plugins-nextgen/amdgpu/src
diff options
context:
space:
mode:
Diffstat (limited to 'offload/plugins-nextgen/amdgpu/src')
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp68
1 files changed, 62 insertions, 6 deletions
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 12c7cc62905c..f8db9bf0ae73 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1115,6 +1115,18 @@ private:
return Plugin::success();
}
+ /// Complete pending post actions until and including the event in target
+ /// slot.
+ Error completeUntil(uint32_t TargetSlot) {
+ for (uint32_t Slot = 0; Slot <= TargetSlot; ++Slot) {
+ // Take the post action of the operation if any.
+ if (auto Err = Slots[Slot].performAction())
+ return Err;
+ }
+
+ return Plugin::success();
+ }
+
/// Make the current stream wait on a specific operation of another stream.
/// The idea is to make the current stream waiting on two signals: 1) the last
/// signal of the current stream, and 2) the last signal of the other stream.
@@ -1502,6 +1514,11 @@ public:
return complete();
}
+ /// Synchronize the stream until the given event. The current thread waits
+ /// until the provided event is finalized, and it performs the pending post
+ /// actions for that and prior events.
+ Error synchronizeOn(AMDGPUEventTy &Event);
+
/// Query the stream and complete pending post actions if operations finished.
/// Return whether all the operations completed. This operation does not block
/// the calling thread.
@@ -1575,6 +1592,21 @@ struct AMDGPUEventTy {
return Stream.waitEvent(*this);
}
+ Error sync() {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ if (!RecordedStream)
+ return Plugin::error(ErrorCode::INVALID_ARGUMENT,
+ "event does not have any recorded stream");
+
+ // No need to wait on anything, the recorded stream already finished the
+ // corresponding operation.
+ if (RecordedSlot < 0)
+ return Plugin::success();
+
+ return RecordedStream->synchronizeOn(*this);
+ }
+
protected:
/// The stream registered in this event.
AMDGPUStreamTy *RecordedStream;
@@ -1630,6 +1662,27 @@ Error AMDGPUStreamTy::waitEvent(const AMDGPUEventTy &Event) {
return waitOnStreamOperation(RecordedStream, Event.RecordedSlot);
}
+Error AMDGPUStreamTy::synchronizeOn(AMDGPUEventTy &Event) {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ // If this event was for an older sync cycle, it has already been finalized
+ if (Event.RecordedSyncCycle < SyncCycle)
+ return Plugin::success();
+ assert(Event.RecordedSyncCycle == SyncCycle && "event is from the future?");
+
+ // Wait until the requested slot has completed
+ if (auto Err = Slots[Event.RecordedSlot].Signal->wait(
+ StreamBusyWaitMicroseconds, &Device))
+ return Err;
+
+ // If the event is the last one in the stream, just do a full finalize
+ if (Event.RecordedSlot == last())
+ return complete();
+
+ // Otherwise, only finalize until the appropriate event
+ return completeUntil(Event.RecordedSlot);
+}
+
struct AMDGPUStreamManagerTy final
: GenericDeviceResourceManagerTy<AMDGPUResourceRef<AMDGPUStreamTy>> {
using ResourceRef = AMDGPUResourceRef<AMDGPUStreamTy>;
@@ -2540,8 +2593,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
- return Plugin::error(ErrorCode::UNIMPLEMENTED,
- "synchronize event not implemented");
+ AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr);
+ return Event->sync();
}
/// Print information about the device.
@@ -2562,7 +2615,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor);
if (Status == HSA_STATUS_SUCCESS && Status2 == HSA_STATUS_SUCCESS)
Info.add("HSA Runtime Version",
- std::to_string(Major) + "." + std::to_string(Minor));
+ std::to_string(Major) + "." + std::to_string(Minor), "",
+ DeviceInfo::DRIVER_VERSION);
Info.add("HSA OpenMP Device Number", DeviceId);
@@ -2572,11 +2626,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_NAME, TmpChar);
if (Status == HSA_STATUS_SUCCESS)
- Info.add("Device Name", TmpChar);
+ Info.add("Device Name", TmpChar, "", DeviceInfo::NAME);
Status = getDeviceAttrRaw(HSA_AGENT_INFO_VENDOR_NAME, TmpChar);
if (Status == HSA_STATUS_SUCCESS)
- Info.add("Vendor Name", TmpChar);
+ Info.add("Vendor Name", TmpChar, "", DeviceInfo::VENDOR);
hsa_device_type_t DevType;
Status = getDeviceAttrRaw(HSA_AGENT_INFO_DEVICE, DevType);
@@ -2652,7 +2706,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
- auto &MaxSize = *Info.add("Workgroup Max Size per Dimension");
+ auto &MaxSize =
+ *Info.add("Workgroup Max Size per Dimension", std::monostate{}, "",
+ DeviceInfo::MAX_WORK_GROUP_SIZE);
MaxSize.add("x", WorkgrpMaxDim[0]);
MaxSize.add("y", WorkgrpMaxDim[1]);
MaxSize.add("z", WorkgrpMaxDim[2]);