diff options
| author | Joseph Huber <huberjn@outlook.com> | 2025-07-21 14:38:03 -0500 |
|---|---|---|
| committer | Tom Stellard <tstellar@redhat.com> | 2025-09-22 17:55:39 -0700 |
| commit | e625a781211ea61833d8249967fcb10b3b5cba36 (patch) | |
| tree | 39455c0ae773c094ef9c81a9ace5a33604e62167 /offload | |
| parent | 3e93017936b522cd0821e38c3ad68231028e00a0 (diff) | |
[LLVM] Update CUDA ELF flags for their new ABI (#149534)
Summary:
We rely on these flags to do things in the runtime and print the
contents of binaries correctly. CUDA updated their ABI encoding recently
and we didn't handle that. it's a new ABI entirely so we just select on
it when it shows up.
Fixes: https://github.com/llvm/llvm-project/issues/148703
[LLVM] Fix offload and update CUDA ABI for all SM values (#159354)
Summary:
Turns out the new CUDA ABI now applies retroactively to all the other
SMs if you upgrade to CUDA 13.0. This patch changes the scheme, keeping
all the SM flags consistent but using an offset.
Fixes: https://github.com/llvm/llvm-project/issues/159088
Diffstat (limited to 'offload')
| -rw-r--r-- | offload/plugins-nextgen/common/src/Utils/ELF.cpp | 23 | ||||
| -rw-r--r-- | offload/plugins-nextgen/cuda/src/rtl.cpp | 6 |
2 files changed, 20 insertions, 9 deletions
diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index dfec55432f20..b0ee1984c42c 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -60,23 +60,30 @@ static Expected<bool> checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) { const auto Header = ELFObj.getELFFile().getHeader(); if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN) - return createError("Only executable ELF files are supported"); + return createError("only executable ELF files are supported"); if (Header.e_machine == EM_AMDGPU) { if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) - return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA"); + return createError("invalid AMD OS/ABI, must be AMDGPU_HSA"); if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 && Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6) - return createError("Invalid AMD ABI version, must be version 5 or above"); + return createError("invalid AMD ABI version, must be version 5 or above"); if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 || (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC) - return createError("Unsupported AMDGPU architecture"); + return createError("unsupported AMDGPU architecture"); } else if (Header.e_machine == EM_CUDA) { - if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS) - return createError("Invalid CUDA addressing mode"); - if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35) - return createError("Unsupported NVPTX architecture"); + if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V1) { + if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS) + return createError("invalid CUDA addressing mode"); + if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35) + return createError("unsupported NVPTX architecture"); + } else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) { + if ((Header.e_flags & EF_CUDA_SM_MASK) < EF_CUDA_SM100) + return createError("unsupported NVPTX architecture"); + } else { + return createError("invalid CUDA ABI version"); + } } return Header.e_machine == EMachine; diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index b787376eb177..71a28fadfd81 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -1442,7 +1442,11 @@ struct CUDAPluginTy final : public GenericPluginTy { return ElfOrErr.takeError(); // Get the numeric value for the image's `sm_` value. - auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM; + const auto Header = ElfOrErr->getELFFile().getHeader(); + unsigned SM = + Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 + ? Header.e_flags & ELF::EF_CUDA_SM + : (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET; CUdevice Device; CUresult Res = cuDeviceGet(&Device, DeviceId); |
