summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll98
1 files changed, 50 insertions, 48 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll b/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll
index 1a73df341108..f74f9a8f2bdd 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll
@@ -10,28 +10,28 @@
; GCN: s_and_b32
; HSA-VI: .amdhsa_kernarg_size 12
-define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind {
+define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) #0 {
store i1 %x, ptr addrspace(1) %out, align 1
ret void
}
; FUNC-LABEL: {{^}}v3i8_arg:
-; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
+; HSA-VI: s_load_dword s{{[0-9]+}}, s[8:9], 0x8
+; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x0
; HSA-VI: .amdhsa_kernarg_size 12
-define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %in) nounwind {
+define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %in) #0 {
entry:
store <3 x i8> %in, ptr addrspace(1) %out, align 4
ret void
}
; FUNC-LABEL: {{^}}i65_arg:
-; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
+; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x0
; HSA-VI: .amdhsa_kernarg_size 24
-define amdgpu_kernel void @i65_arg(ptr addrspace(1) nocapture %out, i65 %in) nounwind {
+define amdgpu_kernel void @i65_arg(ptr addrspace(1) nocapture %out, i65 %in) #0 {
entry:
store i65 %in, ptr addrspace(1) %out, align 4
ret void
@@ -39,7 +39,7 @@ entry:
; FUNC-LABEL: {{^}}empty_struct_arg:
; HSA-VI: .amdhsa_kernarg_size 0
-define amdgpu_kernel void @empty_struct_arg({} %in) nounwind {
+define amdgpu_kernel void @empty_struct_arg({} %in) #0 {
ret void
}
@@ -54,13 +54,13 @@ define amdgpu_kernel void @empty_struct_arg({} %in) nounwind {
; FIXME: Total argument size is computed wrong
; FUNC-LABEL: {{^}}struct_argument_alignment:
-; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
-; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x18
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
+; HSA-VI: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
+; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x8
+; HSA-VI: s_load_dword s{{[0-9]+}}, s[8:9], 0x18
+; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x20
; HSA-VI: .amdhsa_kernarg_size 40
-define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, i64} %arg1) {
+define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, i64} %arg1) #0 {
%val0 = extractvalue {i32, i64} %arg0, 0
%val1 = extractvalue {i32, i64} %arg0, 1
%val2 = extractvalue {i32, i64} %arg1, 0
@@ -78,11 +78,11 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32,
; HSA-VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; HSA-VI: global_load_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}} offset:13
; HSA-VI: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}} offset:17
-; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4
+; HSA-VI: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
+; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x4
; HSA-VI: .amdhsa_kernarg_size 28
-define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
+define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) #0 {
%val0 = extractvalue <{i32, i64}> %arg0, 0
%val1 = extractvalue <{i32, i64}> %arg0, 1
%val2 = extractvalue <{i32, i64}> %arg1, 0
@@ -95,14 +95,14 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
}
; GCN-LABEL: {{^}}struct_argument_alignment_after:
-; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
-; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x18
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
-; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x30
+; HSA-VI: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
+; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x8
+; HSA-VI: s_load_dword s{{[0-9]+}}, s[8:9], 0x18
+; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x20
+; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x30
; HSA-VI: .amdhsa_kernarg_size 64
-define amdgpu_kernel void @struct_argument_alignment_after({i32, i64} %arg0, i8, {i32, i64} %arg2, i8, <4 x i32> %arg4) {
+define amdgpu_kernel void @struct_argument_alignment_after({i32, i64} %arg0, i8, {i32, i64} %arg2, i8, <4 x i32> %arg4) #0 {
%val0 = extractvalue {i32, i64} %arg0, 0
%val1 = extractvalue {i32, i64} %arg0, 1
%val2 = extractvalue {i32, i64} %arg2, 0
@@ -116,7 +116,7 @@ define amdgpu_kernel void @struct_argument_alignment_after({i32, i64} %arg0, i8,
}
; GCN-LABEL: {{^}}array_3xi32:
-; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
+; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x0
define amdgpu_kernel void @array_3xi32(i16 %arg0, [3 x i32] %arg1) {
store volatile i16 %arg0, ptr addrspace(1) undef
store volatile [3 x i32] %arg1, ptr addrspace(1) undef
@@ -124,7 +124,7 @@ define amdgpu_kernel void @array_3xi32(i16 %arg0, [3 x i32] %arg1) {
}
; GCN-LABEL: {{^}}array_3xi16:
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
+; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x0
define amdgpu_kernel void @array_3xi16(i8 %arg0, [3 x i16] %arg1) {
store volatile i8 %arg0, ptr addrspace(1) undef
store volatile [3 x i16] %arg1, ptr addrspace(1) undef
@@ -135,7 +135,7 @@ define amdgpu_kernel void @array_3xi16(i8 %arg0, [3 x i16] %arg1) {
; GCN: s_load_dword [[DWORD:s[0-9]+]]
; GCN-DAG: s_bfe_u32 [[BFE:s[0-9]+]], [[DWORD]], 0x100010{{$}}
; GCN-DAG: s_and_b32 [[AND:s[0-9]+]], [[DWORD]], 0x7fff{{$}}
-define amdgpu_kernel void @v2i15_arg(ptr addrspace(1) nocapture %out, <2 x i15> %in) {
+define amdgpu_kernel void @v2i15_arg(ptr addrspace(1) nocapture %out, <2 x i15> %in) #0 {
entry:
store <2 x i15> %in, ptr addrspace(1) %out, align 4
ret void
@@ -147,7 +147,7 @@ entry:
; GCN: s_and_b32
; GCN: s_and_b32
; GCN: s_or_b32
-define amdgpu_kernel void @v3i15_arg(ptr addrspace(1) nocapture %out, <3 x i15> %in) {
+define amdgpu_kernel void @v3i15_arg(ptr addrspace(1) nocapture %out, <3 x i15> %in) #0 {
entry:
store <3 x i15> %in, ptr addrspace(1) %out, align 4
ret void
@@ -156,9 +156,9 @@ entry:
; Byref pointers should only be treated as offsets from kernarg
; GCN-LABEL: {{^}}byref_constant_i8_arg:
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GCN: global_load_ubyte v{{[0-9]+}}, [[ZERO]], s[4:5] offset:8
+; GCN: global_load_ubyte v{{[0-9]+}}, [[ZERO]], s[8:9] offset:8
; GCN: .amdhsa_kernarg_size 12
-define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) {
+define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) #0 {
%in = load i8, ptr addrspace(4) %in.byref
%ext = zext i8 %in to i32
store i32 %ext, ptr addrspace(1) %out, align 4
@@ -167,9 +167,9 @@ define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out
; GCN-LABEL: {{^}}byref_constant_i16_arg:
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GCN: global_load_ushort v{{[0-9]+}}, [[ZERO]], s[4:5] offset:8
+; GCN: global_load_ushort v{{[0-9]+}}, [[ZERO]], s[8:9] offset:8
; GCN: .amdhsa_kernarg_size 12
-define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) %in.byref) {
+define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) %in.byref) #0 {
%in = load i16, ptr addrspace(4) %in.byref
%ext = zext i16 %in to i32
store i32 %ext, ptr addrspace(1) %out, align 4
@@ -177,9 +177,9 @@ define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %ou
}
; GCN-LABEL: {{^}}byref_constant_i32_arg:
-; GCN: s_load_dwordx4 [[LOAD:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}}
+; GCN: s_load_dwordx4 [[LOAD:s\[[0-9]+:[0-9]+\]]], s[8:9], 0x0{{$}}
; GCN: .amdhsa_kernarg_size 16
-define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in.byref, i32 %after.offset) {
+define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in.byref, i32 %after.offset) #0 {
%in = load i32, ptr addrspace(4) %in.byref
store volatile i32 %in, ptr addrspace(1) %out, align 4
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
@@ -187,10 +187,10 @@ define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %ou
}
; GCN-LABEL: {{^}}byref_constant_v4i32_arg:
-; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x10{{$}}
-; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x20{{$}}
+; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x10{{$}}
+; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x20{{$}}
; GCN: .amdhsa_kernarg_size 36
-define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) %in.byref, i32 %after.offset) {
+define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) %in.byref, i32 %after.offset) #0 {
%in = load <4 x i32>, ptr addrspace(4) %in.byref
store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
@@ -198,13 +198,13 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %
}
; GCN-LABEL: {{^}}byref_align_constant_i32_arg:
-; GCN-DAG: s_load_dwordx2 s[[[IN:[0-9]+]]:[[AFTER_OFFSET:[0-9]+]]], s[4:5], 0x100{{$}}
+; GCN-DAG: s_load_dwordx2 s[[[IN:[0-9]+]]:[[AFTER_OFFSET:[0-9]+]]], s[8:9], 0x100{{$}}
; GCN-DAG: v_mov_b32_e32 [[V_IN:v[0-9]+]], s[[IN]]
; GCN-DAG: v_mov_b32_e32 [[V_AFTER_OFFSET:v[0-9]+]], s[[AFTER_OFFSET]]
; GCN: global_store_dword v{{[0-9]+}}, [[V_IN]], s
; GCN: global_store_dword v{{[0-9]+}}, [[V_AFTER_OFFSET]], s
; GCN: .amdhsa_kernarg_size 264
-define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) {
+define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) #0 {
%in = load i32, ptr addrspace(4) %in.byref
store volatile i32 %in, ptr addrspace(1) %out, align 4
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
@@ -212,10 +212,10 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocaptu
}
; GCN-LABEL: {{^}}byref_natural_align_constant_v16i32_arg:
-; GCN-DAG: s_load_dword s{{[0-9]+}}, s[4:5], 0x80
-; GCN-DAG: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x40{{$}}
+; GCN-DAG: s_load_dword s{{[0-9]+}}, s[8:9], 0x80
+; GCN-DAG: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[8:9], 0x40{{$}}
; GCN: .amdhsa_kernarg_size 132
-define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
+define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) #0 {
%in = load <16 x i32>, ptr addrspace(4) %in.byref
store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
@@ -224,9 +224,9 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace
; Also accept byref kernel arguments with other global address spaces.
; GCN-LABEL: {{^}}byref_global_i32_arg:
-; GCN: s_load_dword [[IN:s[0-9]+]], s[4:5], 0x8{{$}}
+; GCN: s_load_dword [[IN:s[0-9]+]], s[8:9], 0x8{{$}}
; GCN: .amdhsa_kernarg_size 12
-define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) %in.byref) {
+define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) %in.byref) #0 {
%in = load i32, ptr addrspace(1) %in.byref
store i32 %in, ptr addrspace(1) %out, align 4
ret void
@@ -234,17 +234,17 @@ define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out,
; GCN-LABEL: {{^}}byref_flat_i32_arg:
; GCN: flat_load_dword [[IN:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}} offset:8{{$}}
-define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, ptr byref(i32) %in.byref) {
+define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, ptr byref(i32) %in.byref) #0 {
%in = load i32, ptr %in.byref
store i32 %in, ptr addrspace(1) %out, align 4
ret void
}
; GCN-LABEL: {{^}}byref_constant_32bit_i32_arg:
-; GCN: s_add_i32 s[[PTR_LO:[0-9]+]], s4, 8
+; GCN: s_add_i32 s[[PTR_LO:[0-9]+]], s8, 8
; GCN: s_mov_b32 s[[PTR_HI:[0-9]+]], 0{{$}}
; GCN: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
-define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(6) byref(i32) %in.byref) {
+define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(6) byref(i32) %in.byref) #0 {
%in = load i32, ptr addrspace(6) %in.byref
store i32 %in, ptr addrspace(1) %out, align 4
ret void
@@ -257,9 +257,9 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocaptu
; }
; GCN-LABEL: {{^}}multi_byref_constant_i32_arg:
-; GCN: s_load_dwordx4 {{s\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
+; GCN: s_load_dwordx4 {{s\[[0-9]+:[0-9]+\]}}, s[8:9], 0x0
; GCN: .amdhsa_kernarg_size 20
-define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in0.byref, ptr addrspace(4) byref(i32) %in1.byref, i32 %after.offset) {
+define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in0.byref, ptr addrspace(4) byref(i32) %in1.byref, i32 %after.offset) #0 {
%in0 = load i32, ptr addrspace(4) %in0.byref
%in1 = load i32, ptr addrspace(4) %in1.byref
store volatile i32 %in0, ptr addrspace(1) %out, align 4
@@ -271,13 +271,15 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocaptu
; GCN-LABEL: {{^}}byref_constant_i32_arg_offset0:
; GCN-NOT: s4
; GCN-NOT: s5
-; GCN: s_load_dword {{s[0-9]+}}, s[4:5], 0x0{{$}}
+; GCN: s_load_dword {{s[0-9]+}}, s[8:9], 0x0{{$}}
; GCN: .amdhsa_kernarg_size 4
-define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) %in.byref) {
+define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) %in.byref) #0 {
%in = load i32, ptr addrspace(4) %in.byref
store i32 %in, ptr addrspace(1) undef, align 4
ret void
}
+attributes #0 = { "amdgpu-no-implicitarg-ptr" }
+
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}