diff options
Diffstat (limited to 'offload/plugins-nextgen/amdgpu/src')
| -rw-r--r-- | offload/plugins-nextgen/amdgpu/src/rtl.cpp | 68 |
1 files changed, 62 insertions, 6 deletions
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 12c7cc62905c..f8db9bf0ae73 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1115,6 +1115,18 @@ private: return Plugin::success(); } + /// Complete pending post actions until and including the event in target + /// slot. + Error completeUntil(uint32_t TargetSlot) { + for (uint32_t Slot = 0; Slot <= TargetSlot; ++Slot) { + // Take the post action of the operation if any. + if (auto Err = Slots[Slot].performAction()) + return Err; + } + + return Plugin::success(); + } + /// Make the current stream wait on a specific operation of another stream. /// The idea is to make the current stream waiting on two signals: 1) the last /// signal of the current stream, and 2) the last signal of the other stream. @@ -1502,6 +1514,11 @@ public: return complete(); } + /// Synchronize the stream until the given event. The current thread waits + /// until the provided event is finalized, and it performs the pending post + /// actions for that and prior events. + Error synchronizeOn(AMDGPUEventTy &Event); + /// Query the stream and complete pending post actions if operations finished. /// Return whether all the operations completed. This operation does not block /// the calling thread. @@ -1575,6 +1592,21 @@ struct AMDGPUEventTy { return Stream.waitEvent(*this); } + Error sync() { + std::lock_guard<std::mutex> Lock(Mutex); + + if (!RecordedStream) + return Plugin::error(ErrorCode::INVALID_ARGUMENT, + "event does not have any recorded stream"); + + // No need to wait on anything, the recorded stream already finished the + // corresponding operation. + if (RecordedSlot < 0) + return Plugin::success(); + + return RecordedStream->synchronizeOn(*this); + } + protected: /// The stream registered in this event. AMDGPUStreamTy *RecordedStream; @@ -1630,6 +1662,27 @@ Error AMDGPUStreamTy::waitEvent(const AMDGPUEventTy &Event) { return waitOnStreamOperation(RecordedStream, Event.RecordedSlot); } +Error AMDGPUStreamTy::synchronizeOn(AMDGPUEventTy &Event) { + std::lock_guard<std::mutex> Lock(Mutex); + + // If this event was for an older sync cycle, it has already been finalized + if (Event.RecordedSyncCycle < SyncCycle) + return Plugin::success(); + assert(Event.RecordedSyncCycle == SyncCycle && "event is from the future?"); + + // Wait until the requested slot has completed + if (auto Err = Slots[Event.RecordedSlot].Signal->wait( + StreamBusyWaitMicroseconds, &Device)) + return Err; + + // If the event is the last one in the stream, just do a full finalize + if (Event.RecordedSlot == last()) + return complete(); + + // Otherwise, only finalize until the appropriate event + return completeUntil(Event.RecordedSlot); +} + struct AMDGPUStreamManagerTy final : GenericDeviceResourceManagerTy<AMDGPUResourceRef<AMDGPUStreamTy>> { using ResourceRef = AMDGPUResourceRef<AMDGPUStreamTy>; @@ -2540,8 +2593,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { /// Synchronize the current thread with the event. Error syncEventImpl(void *EventPtr) override { - return Plugin::error(ErrorCode::UNIMPLEMENTED, - "synchronize event not implemented"); + AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr); + return Event->sync(); } /// Print information about the device. @@ -2562,7 +2615,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor); if (Status == HSA_STATUS_SUCCESS && Status2 == HSA_STATUS_SUCCESS) Info.add("HSA Runtime Version", - std::to_string(Major) + "." + std::to_string(Minor)); + std::to_string(Major) + "." + std::to_string(Minor), "", + DeviceInfo::DRIVER_VERSION); Info.add("HSA OpenMP Device Number", DeviceId); @@ -2572,11 +2626,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { Status = getDeviceAttrRaw(HSA_AGENT_INFO_NAME, TmpChar); if (Status == HSA_STATUS_SUCCESS) - Info.add("Device Name", TmpChar); + Info.add("Device Name", TmpChar, "", DeviceInfo::NAME); Status = getDeviceAttrRaw(HSA_AGENT_INFO_VENDOR_NAME, TmpChar); if (Status == HSA_STATUS_SUCCESS) - Info.add("Vendor Name", TmpChar); + Info.add("Vendor Name", TmpChar, "", DeviceInfo::VENDOR); hsa_device_type_t DevType; Status = getDeviceAttrRaw(HSA_AGENT_INFO_DEVICE, DevType); @@ -2652,7 +2706,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim); if (Status == HSA_STATUS_SUCCESS) { - auto &MaxSize = *Info.add("Workgroup Max Size per Dimension"); + auto &MaxSize = + *Info.add("Workgroup Max Size per Dimension", std::monostate{}, "", + DeviceInfo::MAX_WORK_GROUP_SIZE); MaxSize.add("x", WorkgrpMaxDim[0]); MaxSize.add("y", WorkgrpMaxDim[1]); MaxSize.add("z", WorkgrpMaxDim[2]); |
