diff options
| author | Rafal Bielski <rafal.bielski@codeplay.com> | 2025-08-19 13:02:01 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-19 13:02:01 +0100 |
| commit | 9c9d9e4cb6dfd8a3cada7fb6c8b4dc2b77b5514c (patch) | |
| tree | 0c07d20f0f49b1b2ef60b96df3f6995b963b30c5 /offload/plugins-nextgen | |
| parent | fcb36ca8ccd073c110cfc44b92f78562811f2ce9 (diff) | |
[Offload] Define additional device info properties (#152533)
Add the following properties in Offload device info:
* VENDOR_ID
* NUM_COMPUTE_UNITS
* [SINGLE|DOUBLE|HALF]_FP_CONFIG
* NATIVE_VECTOR_WIDTH_[CHAR|SHORT|INT|LONG|FLOAT|DOUBLE|HALF]
* MAX_CLOCK_FREQUENCY
* MEMORY_CLOCK_RATE
* ADDRESS_BITS
* MAX_MEM_ALLOC_SIZE
* GLOBAL_MEM_SIZE
Add a bitfield option to enumerators, allowing the values to be
bit-shifted instead of incremented. Generate the per-type enums using
`foreach` to reduce code duplication.
Use macros in unit test definitions to reduce code duplication.
Diffstat (limited to 'offload/plugins-nextgen')
| -rw-r--r-- | offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h | 6 | ||||
| -rw-r--r-- | offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h | 1 | ||||
| -rw-r--r-- | offload/plugins-nextgen/amdgpu/src/rtl.cpp | 25 | ||||
| -rw-r--r-- | offload/plugins-nextgen/cuda/src/rtl.cpp | 17 |
4 files changed, 42 insertions, 7 deletions
diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h index 61f680bab3a0..ad135f72fff1 100644 --- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h +++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h @@ -71,9 +71,15 @@ typedef enum { } hsa_isa_info_t; typedef enum { + HSA_MACHINE_MODEL_SMALL = 0, + HSA_MACHINE_MODEL_LARGE = 1 +} hsa_machine_model_t; + +typedef enum { HSA_AGENT_INFO_NAME = 0, HSA_AGENT_INFO_VENDOR_NAME = 1, HSA_AGENT_INFO_FEATURE = 2, + HSA_AGENT_INFO_MACHINE_MODEL = 3, HSA_AGENT_INFO_PROFILE = 4, HSA_AGENT_INFO_WAVEFRONT_SIZE = 6, HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7, diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h index 3117763e3589..29cfe78082db 100644 --- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h +++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h @@ -67,6 +67,7 @@ typedef enum hsa_amd_agent_info_s { HSA_AMD_AGENT_INFO_CACHELINE_SIZE = 0xA001, HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT = 0xA002, HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY = 0xA003, + HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY = 0xA008, HSA_AMD_AGENT_INFO_PRODUCT_NAME = 0xA009, HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A, HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B, diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 83280fe0a49c..3143fe408563 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2697,6 +2697,15 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { if (Status == HSA_STATUS_SUCCESS) Info.add("Vendor Name", TmpChar, "", DeviceInfo::VENDOR); + Info.add("Vendor ID", uint64_t{4130}, "", DeviceInfo::VENDOR_ID); + + hsa_machine_model_t MachineModel; + Status = getDeviceAttrRaw(HSA_AGENT_INFO_MACHINE_MODEL, MachineModel); + if (Status == HSA_STATUS_SUCCESS) + Info.add("Memory Address Size", + uint64_t{MachineModel == HSA_MACHINE_MODEL_SMALL ? 32u : 64u}, + "bits", DeviceInfo::ADDRESS_BITS); + hsa_device_type_t DevType; Status = getDeviceAttrRaw(HSA_AGENT_INFO_DEVICE, DevType); if (Status == HSA_STATUS_SUCCESS) { @@ -2747,11 +2756,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, TmpUInt); if (Status == HSA_STATUS_SUCCESS) - Info.add("Max Clock Freq", TmpUInt, "MHz"); + Info.add("Max Clock Freq", TmpUInt, "MHz", + DeviceInfo::MAX_CLOCK_FREQUENCY); + + Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY, TmpUInt); + if (Status == HSA_STATUS_SUCCESS) + Info.add("Max Memory Clock Freq", TmpUInt, "MHz", + DeviceInfo::MEMORY_CLOCK_RATE); Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, TmpUInt); if (Status == HSA_STATUS_SUCCESS) - Info.add("Compute Units", TmpUInt); + Info.add("Compute Units", TmpUInt, "", DeviceInfo::NUM_COMPUTE_UNITS); Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, TmpUInt); if (Status == HSA_STATUS_SUCCESS) @@ -2833,7 +2848,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt); if (Status == HSA_STATUS_SUCCESS) - PoolNode.add("Size", TmpSt, "bytes"); + PoolNode.add( + "Size", TmpSt, "bytes", + (Pool->isGlobal() && Pool->isCoarseGrained()) + ? std::optional<DeviceInfo>{DeviceInfo::GLOBAL_MEM_SIZE} + : std::nullopt); Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, TmpBool); diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index a99357a3adea..0a3720532e4e 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -979,13 +979,20 @@ struct CUDADeviceTy : public GenericDeviceTy { Info.add("Vendor Name", "NVIDIA", "", DeviceInfo::VENDOR); + Info.add("Vendor ID", uint64_t{4318}, "", DeviceInfo::VENDOR_ID); + + Info.add("Memory Address Size", std::numeric_limits<CUdeviceptr>::digits, + "bits", DeviceInfo::ADDRESS_BITS); + Res = cuDeviceTotalMem(&TmpSt, Device); if (Res == CUDA_SUCCESS) - Info.add("Global Memory Size", TmpSt, "bytes"); + Info.add("Global Memory Size", TmpSt, "bytes", + DeviceInfo::GLOBAL_MEM_SIZE); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, TmpInt); if (Res == CUDA_SUCCESS) - Info.add("Number of Multiprocessors", TmpInt); + Info.add("Number of Multiprocessors", TmpInt, "", + DeviceInfo::NUM_COMPUTE_UNITS); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, TmpInt); if (Res == CUDA_SUCCESS) @@ -1046,7 +1053,8 @@ struct CUDADeviceTy : public GenericDeviceTy { Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_CLOCK_RATE, TmpInt); if (Res == CUDA_SUCCESS) - Info.add("Clock Rate", TmpInt, "kHz"); + Info.add("Clock Rate", TmpInt / 1000, "MHz", + DeviceInfo::MAX_CLOCK_FREQUENCY); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, TmpInt); if (Res == CUDA_SUCCESS) @@ -1083,7 +1091,8 @@ struct CUDADeviceTy : public GenericDeviceTy { Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, TmpInt); if (Res == CUDA_SUCCESS) - Info.add("Memory Clock Rate", TmpInt, "kHz"); + Info.add("Memory Clock Rate", TmpInt / 1000, "MHz", + DeviceInfo::MEMORY_CLOCK_RATE); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, TmpInt); if (Res == CUDA_SUCCESS) |
