diff options
Diffstat (limited to 'offload/plugins-nextgen/amdgpu/src')
| -rw-r--r-- | offload/plugins-nextgen/amdgpu/src/rtl.cpp | 29 |
1 files changed, 11 insertions, 18 deletions
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index c6dd954746e4..663cfdc5fdf0 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -3163,25 +3163,24 @@ struct AMDGPUPluginTy final : public GenericPluginTy { uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; } /// Check whether the image is compatible with an AMDGPU device. - Expected<bool> isELFCompatible(StringRef Image) const override { + Expected<bool> isELFCompatible(uint32_t DeviceId, + StringRef Image) const override { // Get the associated architecture and flags from the ELF. auto ElfOrErr = ELF64LEObjectFile::create( MemoryBufferRef(Image, /*Identifier=*/""), /*InitContent=*/false); if (!ElfOrErr) return ElfOrErr.takeError(); std::optional<StringRef> Processor = ElfOrErr->tryGetCPUName(); + if (!Processor) + return false; - for (hsa_agent_t Agent : KernelAgents) { - auto TargeTripleAndFeaturesOrError = - utils::getTargetTripleAndFeatures(Agent); - if (!TargeTripleAndFeaturesOrError) - return TargeTripleAndFeaturesOrError.takeError(); - if (!utils::isImageCompatibleWithEnv(Processor ? *Processor : "", + auto TargeTripleAndFeaturesOrError = + utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId)); + if (!TargeTripleAndFeaturesOrError) + return TargeTripleAndFeaturesOrError.takeError(); + return utils::isImageCompatibleWithEnv(Processor ? *Processor : "", ElfOrErr->getPlatformFlags(), - *TargeTripleAndFeaturesOrError)) - return false; - } - return true; + *TargeTripleAndFeaturesOrError); } bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override { @@ -3273,19 +3272,13 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, if (ArgsSize < KernelArgsSize) return Plugin::error("Mismatch of kernel arguments size"); - // The args size reported by HSA may or may not contain the implicit args. - // For now, assume that HSA does not consider the implicit arguments when - // reporting the arguments of a kernel. In the worst case, we can waste - // 56 bytes per allocation. - uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize; - AMDGPUPluginTy &AMDGPUPlugin = static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin); AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice(); AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager(); void *AllArgs = nullptr; - if (auto Err = ArgsMemoryManager.allocate(AllArgsSize, &AllArgs)) + if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs)) return Err; // Account for user requested dynamic shared memory. |
