diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll')
| -rw-r--r-- | llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll | 99 |
1 files changed, 97 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll index a846607d816c..e8b43ad28ad2 100644 --- a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll +++ b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck -check-prefixes=CHECK,SM90 %s -; RUN: %if ptxas-12.9 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %} +; RUN: %if ptxas-sm_90 && ptxas-isa-8.7 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %} ; RUN: llc < %s -march=nvptx64 -mcpu=sm_100 -mattr=+ptx88 | FileCheck %s -check-prefixes=CHECK,SM100 -; RUN: %if ptxas-12.9 %{ llc < %s -march=nvptx64 -mcpu=sm_100 -mattr=+ptx88 | %ptxas-verify -arch=sm_100 %} +; RUN: %if ptxas-sm_100 && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100 -mattr=+ptx88 | %ptxas-verify -arch=sm_100 %} ; This test is based on load-store-vectors.ll, ; and contains testing for lowering 256-bit vector loads/stores @@ -1506,3 +1506,98 @@ define void @local_volatile_4xdouble(ptr addrspace(5) %a, ptr addrspace(5) %b) { store volatile <4 x double> %a.load, ptr addrspace(5) %b ret void } + +define void @test_i256_global(ptr addrspace(1) %a, ptr addrspace(1) %b) { +; SM90-LABEL: test_i256_global( +; SM90: { +; SM90-NEXT: .reg .b64 %rd<7>; +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: ld.param.b64 %rd1, [test_i256_global_param_0]; +; SM90-NEXT: ld.global.v2.b64 {%rd2, %rd3}, [%rd1]; +; SM90-NEXT: ld.global.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; SM90-NEXT: ld.param.b64 %rd6, [test_i256_global_param_1]; +; SM90-NEXT: st.global.v2.b64 [%rd6+16], {%rd4, %rd5}; +; SM90-NEXT: st.global.v2.b64 [%rd6], {%rd2, %rd3}; +; SM90-NEXT: ret; +; +; SM100-LABEL: test_i256_global( +; SM100: { +; SM100-NEXT: .reg .b64 %rd<7>; +; SM100-EMPTY: +; SM100-NEXT: // %bb.0: +; SM100-NEXT: ld.param.b64 %rd1, [test_i256_global_param_0]; +; SM100-NEXT: ld.global.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1]; +; SM100-NEXT: ld.param.b64 %rd6, [test_i256_global_param_1]; +; SM100-NEXT: st.global.v4.b64 [%rd6], {%rd2, %rd3, %rd4, %rd5}; +; SM100-NEXT: ret; + %a.load = load i256, ptr addrspace(1) %a, align 32 + store i256 %a.load, ptr addrspace(1) %b, align 32 + ret void +} + + +define void @test_i256_global_unaligned(ptr addrspace(1) %a, ptr addrspace(1) %b) { +; CHECK-LABEL: test_i256_global_unaligned( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_i256_global_unaligned_param_0]; +; CHECK-NEXT: ld.global.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.global.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.param.b64 %rd6, [test_i256_global_unaligned_param_1]; +; CHECK-NEXT: st.global.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.global.v2.b64 [%rd6], {%rd2, %rd3}; +; CHECK-NEXT: ret; + %a.load = load i256, ptr addrspace(1) %a, align 16 + store i256 %a.load, ptr addrspace(1) %b, align 16 + ret void +} + +define void @test_i256_generic(ptr %a, ptr %b) { +; CHECK-LABEL: test_i256_generic( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_i256_generic_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.param.b64 %rd6, [test_i256_generic_param_1]; +; CHECK-NEXT: st.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.v2.b64 [%rd6], {%rd2, %rd3}; +; CHECK-NEXT: ret; + %a.load = load i256, ptr %a, align 32 + store i256 %a.load, ptr %b, align 32 + ret void +} + +define void @test_i256_global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b) { +; SM90-LABEL: test_i256_global_volatile( +; SM90: { +; SM90-NEXT: .reg .b64 %rd<7>; +; SM90-EMPTY: +; SM90-NEXT: // %bb.0: +; SM90-NEXT: ld.param.b64 %rd1, [test_i256_global_volatile_param_0]; +; SM90-NEXT: ld.volatile.global.v2.b64 {%rd2, %rd3}, [%rd1]; +; SM90-NEXT: ld.volatile.global.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; SM90-NEXT: ld.param.b64 %rd6, [test_i256_global_volatile_param_1]; +; SM90-NEXT: st.volatile.global.v2.b64 [%rd6+16], {%rd4, %rd5}; +; SM90-NEXT: st.volatile.global.v2.b64 [%rd6], {%rd2, %rd3}; +; SM90-NEXT: ret; +; +; SM100-LABEL: test_i256_global_volatile( +; SM100: { +; SM100-NEXT: .reg .b64 %rd<7>; +; SM100-EMPTY: +; SM100-NEXT: // %bb.0: +; SM100-NEXT: ld.param.b64 %rd1, [test_i256_global_volatile_param_0]; +; SM100-NEXT: ld.volatile.global.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1]; +; SM100-NEXT: ld.param.b64 %rd6, [test_i256_global_volatile_param_1]; +; SM100-NEXT: st.volatile.global.v4.b64 [%rd6], {%rd2, %rd3, %rd4, %rd5}; +; SM100-NEXT: ret; + %a.load = load volatile i256, ptr addrspace(1) %a, align 32 + store volatile i256 %a.load, ptr addrspace(1) %b, align 32 + ret void +} |
