summaryrefslogtreecommitdiff
path: root/offload
diff options
context:
space:
mode:
authorJoseph Huber <huberjn@outlook.com>2025-10-06 12:49:44 -0500
committerGitHub <noreply@github.com>2025-10-06 12:49:44 -0500
commit8763812b4c60a702094d03c45d3a9db4478ca331 (patch)
tree42c973cd5a1301a3685407c5fc45d3447a179f63 /offload
parent839b91c2294b4aeb5598309f90afa241ace5acef (diff)
[Offload] Remove check on kernel argument sizes (#162121)
Summary: This check is unnecessarily restrictive and currently incorrectly fires for any size less than eight bytes. Just remove it, we do sanity checks elsewhere and at some point need to trust the ABI.
Diffstat (limited to 'offload')
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp5
-rw-r--r--offload/unittests/OffloadAPI/device_code/CMakeLists.txt2
-rw-r--r--offload/unittests/OffloadAPI/device_code/byte.cpp3
-rw-r--r--offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp1
4 files changed, 6 insertions, 5 deletions
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index f73fa0475a3a..8d2f9755e035 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3687,11 +3687,6 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
KernelArgsTy &KernelArgs,
KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
- if (ArgsSize != LaunchParams.Size &&
- ArgsSize > LaunchParams.Size + getImplicitArgsSize())
- return Plugin::error(ErrorCode::INVALID_ARGUMENT,
- "invalid kernel arguments size");
-
AMDGPUPluginTy &AMDGPUPlugin =
static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index 50e430597e64..1a042e1b3831 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -2,6 +2,7 @@ add_offload_test_device_code(foo.cpp foo)
add_offload_test_device_code(bar.cpp bar)
# Compile with optimizations to eliminate AMDGPU implicit arguments.
add_offload_test_device_code(noargs.cpp noargs -O3)
+add_offload_test_device_code(byte.cpp byte)
add_offload_test_device_code(localmem.cpp localmem)
add_offload_test_device_code(localmem_reduction.cpp localmem_reduction)
add_offload_test_device_code(localmem_static.cpp localmem_static)
@@ -14,6 +15,7 @@ add_custom_target(offload_device_binaries DEPENDS
foo.bin
bar.bin
noargs.bin
+ byte.bin
localmem.bin
localmem_reduction.bin
localmem_static.bin
diff --git a/offload/unittests/OffloadAPI/device_code/byte.cpp b/offload/unittests/OffloadAPI/device_code/byte.cpp
new file mode 100644
index 000000000000..779d120fefca
--- /dev/null
+++ b/offload/unittests/OffloadAPI/device_code/byte.cpp
@@ -0,0 +1,3 @@
+#include <gpuintrin.h>
+
+extern "C" __gpu_kernel void byte(unsigned char c) { (void)c; }
diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
index 1dac8c50271b..c9eca36a4d44 100644
--- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
@@ -55,6 +55,7 @@ struct LaunchSingleKernelTestBase : LaunchKernelTestBase {
KERNEL_TEST(Foo, foo)
KERNEL_TEST(NoArgs, noargs)
+KERNEL_TEST(Byte, byte)
KERNEL_TEST(LocalMem, localmem)
KERNEL_TEST(LocalMemReduction, localmem_reduction)
KERNEL_TEST(LocalMemStatic, localmem_static)