diff options
Diffstat (limited to 'offload/plugins-nextgen/cuda/src/rtl.cpp')
| -rw-r--r-- | offload/plugins-nextgen/cuda/src/rtl.cpp | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index bf335ab20f75..af3c74636bff 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -1060,8 +1060,10 @@ struct CUDADeviceTy : public GenericDeviceTy { Info.add("CUDA OpenMP Device Number", DeviceId); Res = cuDeviceGetName(TmpChar, 1000, Device); - if (Res == CUDA_SUCCESS) + if (Res == CUDA_SUCCESS) { Info.add("Device Name", TmpChar, "", DeviceInfo::NAME); + Info.add("Product Name", TmpChar, "", DeviceInfo::PRODUCT_NAME); + } Info.add("Vendor Name", "NVIDIA", "", DeviceInfo::VENDOR); @@ -1118,7 +1120,13 @@ struct CUDADeviceTy : public GenericDeviceTy { if (Res == CUDA_SUCCESS) MaxBlock.add("z", TmpInt); - auto &MaxGrid = *Info.add("Maximum Grid Dimensions", ""); + // TODO: I assume CUDA devices have no limit on the amount of threads, + // verify this + Info.add("Maximum Grid Size", std::numeric_limits<uint32_t>::max(), "", + DeviceInfo::MAX_WORK_SIZE); + + auto &MaxGrid = *Info.add("Maximum Grid Dimensions", std::monostate{}, "", + DeviceInfo::MAX_WORK_SIZE_PER_DIMENSION); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, TmpInt); if (Res == CUDA_SUCCESS) MaxGrid.add("x", TmpInt); @@ -1444,7 +1452,7 @@ Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice, Func, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, MaxDynCGroupMem); if (auto Err = Plugin::check( AttrResult, - "Error in cuLaunchKernel while setting the memory limits: %s")) + "error in cuFuncSetAttribute while setting the memory limits: %s")) return Err; MaxDynCGroupMemLimit = MaxDynCGroupMem; } |
