summaryrefslogtreecommitdiff
path: root/offload/plugins-nextgen/amdgpu/src/rtl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'offload/plugins-nextgen/amdgpu/src/rtl.cpp')
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp29
1 files changed, 11 insertions, 18 deletions
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index c6dd954746e4..663cfdc5fdf0 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3163,25 +3163,24 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }
/// Check whether the image is compatible with an AMDGPU device.
- Expected<bool> isELFCompatible(StringRef Image) const override {
+ Expected<bool> isELFCompatible(uint32_t DeviceId,
+ StringRef Image) const override {
// Get the associated architecture and flags from the ELF.
auto ElfOrErr = ELF64LEObjectFile::create(
MemoryBufferRef(Image, /*Identifier=*/""), /*InitContent=*/false);
if (!ElfOrErr)
return ElfOrErr.takeError();
std::optional<StringRef> Processor = ElfOrErr->tryGetCPUName();
+ if (!Processor)
+ return false;
- for (hsa_agent_t Agent : KernelAgents) {
- auto TargeTripleAndFeaturesOrError =
- utils::getTargetTripleAndFeatures(Agent);
- if (!TargeTripleAndFeaturesOrError)
- return TargeTripleAndFeaturesOrError.takeError();
- if (!utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
+ auto TargeTripleAndFeaturesOrError =
+ utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
+ if (!TargeTripleAndFeaturesOrError)
+ return TargeTripleAndFeaturesOrError.takeError();
+ return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
ElfOrErr->getPlatformFlags(),
- *TargeTripleAndFeaturesOrError))
- return false;
- }
- return true;
+ *TargeTripleAndFeaturesOrError);
}
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
@@ -3273,19 +3272,13 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (ArgsSize < KernelArgsSize)
return Plugin::error("Mismatch of kernel arguments size");
- // The args size reported by HSA may or may not contain the implicit args.
- // For now, assume that HSA does not consider the implicit arguments when
- // reporting the arguments of a kernel. In the worst case, we can waste
- // 56 bytes per allocation.
- uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize;
-
AMDGPUPluginTy &AMDGPUPlugin =
static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager();
void *AllArgs = nullptr;
- if (auto Err = ArgsMemoryManager.allocate(AllArgsSize, &AllArgs))
+ if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs))
return Err;
// Account for user requested dynamic shared memory.