summaryrefslogtreecommitdiff
path: root/offload/plugins-nextgen
diff options
context:
space:
mode:
authorRafal Bielski <rafal.bielski@codeplay.com>2025-08-19 13:02:01 +0100
committerGitHub <noreply@github.com>2025-08-19 13:02:01 +0100
commit9c9d9e4cb6dfd8a3cada7fb6c8b4dc2b77b5514c (patch)
tree0c07d20f0f49b1b2ef60b96df3f6995b963b30c5 /offload/plugins-nextgen
parentfcb36ca8ccd073c110cfc44b92f78562811f2ce9 (diff)
[Offload] Define additional device info properties (#152533)
Add the following properties in Offload device info: * VENDOR_ID * NUM_COMPUTE_UNITS * [SINGLE|DOUBLE|HALF]_FP_CONFIG * NATIVE_VECTOR_WIDTH_[CHAR|SHORT|INT|LONG|FLOAT|DOUBLE|HALF] * MAX_CLOCK_FREQUENCY * MEMORY_CLOCK_RATE * ADDRESS_BITS * MAX_MEM_ALLOC_SIZE * GLOBAL_MEM_SIZE Add a bitfield option to enumerators, allowing the values to be bit-shifted instead of incremented. Generate the per-type enums using `foreach` to reduce code duplication. Use macros in unit test definitions to reduce code duplication.
Diffstat (limited to 'offload/plugins-nextgen')
-rw-r--r--offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h6
-rw-r--r--offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h1
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp25
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp17
4 files changed, 42 insertions, 7 deletions
diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h
index 61f680bab3a0..ad135f72fff1 100644
--- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h
+++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h
@@ -71,9 +71,15 @@ typedef enum {
} hsa_isa_info_t;
typedef enum {
+ HSA_MACHINE_MODEL_SMALL = 0,
+ HSA_MACHINE_MODEL_LARGE = 1
+} hsa_machine_model_t;
+
+typedef enum {
HSA_AGENT_INFO_NAME = 0,
HSA_AGENT_INFO_VENDOR_NAME = 1,
HSA_AGENT_INFO_FEATURE = 2,
+ HSA_AGENT_INFO_MACHINE_MODEL = 3,
HSA_AGENT_INFO_PROFILE = 4,
HSA_AGENT_INFO_WAVEFRONT_SIZE = 6,
HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7,
diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
index 3117763e3589..29cfe78082db 100644
--- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
+++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
@@ -67,6 +67,7 @@ typedef enum hsa_amd_agent_info_s {
HSA_AMD_AGENT_INFO_CACHELINE_SIZE = 0xA001,
HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT = 0xA002,
HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY = 0xA003,
+ HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY = 0xA008,
HSA_AMD_AGENT_INFO_PRODUCT_NAME = 0xA009,
HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A,
HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B,
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 83280fe0a49c..3143fe408563 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2697,6 +2697,15 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Status == HSA_STATUS_SUCCESS)
Info.add("Vendor Name", TmpChar, "", DeviceInfo::VENDOR);
+ Info.add("Vendor ID", uint64_t{4130}, "", DeviceInfo::VENDOR_ID);
+
+ hsa_machine_model_t MachineModel;
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_MACHINE_MODEL, MachineModel);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Memory Address Size",
+ uint64_t{MachineModel == HSA_MACHINE_MODEL_SMALL ? 32u : 64u},
+ "bits", DeviceInfo::ADDRESS_BITS);
+
hsa_device_type_t DevType;
Status = getDeviceAttrRaw(HSA_AGENT_INFO_DEVICE, DevType);
if (Status == HSA_STATUS_SUCCESS) {
@@ -2747,11 +2756,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
- Info.add("Max Clock Freq", TmpUInt, "MHz");
+ Info.add("Max Clock Freq", TmpUInt, "MHz",
+ DeviceInfo::MAX_CLOCK_FREQUENCY);
+
+ Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Max Memory Clock Freq", TmpUInt, "MHz",
+ DeviceInfo::MEMORY_CLOCK_RATE);
Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
- Info.add("Compute Units", TmpUInt);
+ Info.add("Compute Units", TmpUInt, "", DeviceInfo::NUM_COMPUTE_UNITS);
Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
@@ -2833,7 +2848,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt);
if (Status == HSA_STATUS_SUCCESS)
- PoolNode.add("Size", TmpSt, "bytes");
+ PoolNode.add(
+ "Size", TmpSt, "bytes",
+ (Pool->isGlobal() && Pool->isCoarseGrained())
+ ? std::optional<DeviceInfo>{DeviceInfo::GLOBAL_MEM_SIZE}
+ : std::nullopt);
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
TmpBool);
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index a99357a3adea..0a3720532e4e 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -979,13 +979,20 @@ struct CUDADeviceTy : public GenericDeviceTy {
Info.add("Vendor Name", "NVIDIA", "", DeviceInfo::VENDOR);
+ Info.add("Vendor ID", uint64_t{4318}, "", DeviceInfo::VENDOR_ID);
+
+ Info.add("Memory Address Size", std::numeric_limits<CUdeviceptr>::digits,
+ "bits", DeviceInfo::ADDRESS_BITS);
+
Res = cuDeviceTotalMem(&TmpSt, Device);
if (Res == CUDA_SUCCESS)
- Info.add("Global Memory Size", TmpSt, "bytes");
+ Info.add("Global Memory Size", TmpSt, "bytes",
+ DeviceInfo::GLOBAL_MEM_SIZE);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add("Number of Multiprocessors", TmpInt);
+ Info.add("Number of Multiprocessors", TmpInt, "",
+ DeviceInfo::NUM_COMPUTE_UNITS);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, TmpInt);
if (Res == CUDA_SUCCESS)
@@ -1046,7 +1053,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_CLOCK_RATE, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add("Clock Rate", TmpInt, "kHz");
+ Info.add("Clock Rate", TmpInt / 1000, "MHz",
+ DeviceInfo::MAX_CLOCK_FREQUENCY);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, TmpInt);
if (Res == CUDA_SUCCESS)
@@ -1083,7 +1091,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add("Memory Clock Rate", TmpInt, "kHz");
+ Info.add("Memory Clock Rate", TmpInt / 1000, "MHz",
+ DeviceInfo::MEMORY_CLOCK_RATE);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, TmpInt);
if (Res == CUDA_SUCCESS)