summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll')
-rw-r--r--llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll99
1 files changed, 97 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll
index a846607d816c..e8b43ad28ad2 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck -check-prefixes=CHECK,SM90 %s
-; RUN: %if ptxas-12.9 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.7 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100 -mattr=+ptx88 | FileCheck %s -check-prefixes=CHECK,SM100
-; RUN: %if ptxas-12.9 %{ llc < %s -march=nvptx64 -mcpu=sm_100 -mattr=+ptx88 | %ptxas-verify -arch=sm_100 %}
+; RUN: %if ptxas-sm_100 && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100 -mattr=+ptx88 | %ptxas-verify -arch=sm_100 %}
; This test is based on load-store-vectors.ll,
; and contains testing for lowering 256-bit vector loads/stores
@@ -1506,3 +1506,98 @@ define void @local_volatile_4xdouble(ptr addrspace(5) %a, ptr addrspace(5) %b) {
store volatile <4 x double> %a.load, ptr addrspace(5) %b
ret void
}
+
+define void @test_i256_global(ptr addrspace(1) %a, ptr addrspace(1) %b) {
+; SM90-LABEL: test_i256_global(
+; SM90: {
+; SM90-NEXT: .reg .b64 %rd<7>;
+; SM90-EMPTY:
+; SM90-NEXT: // %bb.0:
+; SM90-NEXT: ld.param.b64 %rd1, [test_i256_global_param_0];
+; SM90-NEXT: ld.global.v2.b64 {%rd2, %rd3}, [%rd1];
+; SM90-NEXT: ld.global.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; SM90-NEXT: ld.param.b64 %rd6, [test_i256_global_param_1];
+; SM90-NEXT: st.global.v2.b64 [%rd6+16], {%rd4, %rd5};
+; SM90-NEXT: st.global.v2.b64 [%rd6], {%rd2, %rd3};
+; SM90-NEXT: ret;
+;
+; SM100-LABEL: test_i256_global(
+; SM100: {
+; SM100-NEXT: .reg .b64 %rd<7>;
+; SM100-EMPTY:
+; SM100-NEXT: // %bb.0:
+; SM100-NEXT: ld.param.b64 %rd1, [test_i256_global_param_0];
+; SM100-NEXT: ld.global.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1];
+; SM100-NEXT: ld.param.b64 %rd6, [test_i256_global_param_1];
+; SM100-NEXT: st.global.v4.b64 [%rd6], {%rd2, %rd3, %rd4, %rd5};
+; SM100-NEXT: ret;
+ %a.load = load i256, ptr addrspace(1) %a, align 32
+ store i256 %a.load, ptr addrspace(1) %b, align 32
+ ret void
+}
+
+
+define void @test_i256_global_unaligned(ptr addrspace(1) %a, ptr addrspace(1) %b) {
+; CHECK-LABEL: test_i256_global_unaligned(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_i256_global_unaligned_param_0];
+; CHECK-NEXT: ld.global.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT: ld.global.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT: ld.param.b64 %rd6, [test_i256_global_unaligned_param_1];
+; CHECK-NEXT: st.global.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT: st.global.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT: ret;
+ %a.load = load i256, ptr addrspace(1) %a, align 16
+ store i256 %a.load, ptr addrspace(1) %b, align 16
+ ret void
+}
+
+define void @test_i256_generic(ptr %a, ptr %b) {
+; CHECK-LABEL: test_i256_generic(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_i256_generic_param_0];
+; CHECK-NEXT: ld.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT: ld.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT: ld.param.b64 %rd6, [test_i256_generic_param_1];
+; CHECK-NEXT: st.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT: st.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT: ret;
+ %a.load = load i256, ptr %a, align 32
+ store i256 %a.load, ptr %b, align 32
+ ret void
+}
+
+define void @test_i256_global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b) {
+; SM90-LABEL: test_i256_global_volatile(
+; SM90: {
+; SM90-NEXT: .reg .b64 %rd<7>;
+; SM90-EMPTY:
+; SM90-NEXT: // %bb.0:
+; SM90-NEXT: ld.param.b64 %rd1, [test_i256_global_volatile_param_0];
+; SM90-NEXT: ld.volatile.global.v2.b64 {%rd2, %rd3}, [%rd1];
+; SM90-NEXT: ld.volatile.global.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; SM90-NEXT: ld.param.b64 %rd6, [test_i256_global_volatile_param_1];
+; SM90-NEXT: st.volatile.global.v2.b64 [%rd6+16], {%rd4, %rd5};
+; SM90-NEXT: st.volatile.global.v2.b64 [%rd6], {%rd2, %rd3};
+; SM90-NEXT: ret;
+;
+; SM100-LABEL: test_i256_global_volatile(
+; SM100: {
+; SM100-NEXT: .reg .b64 %rd<7>;
+; SM100-EMPTY:
+; SM100-NEXT: // %bb.0:
+; SM100-NEXT: ld.param.b64 %rd1, [test_i256_global_volatile_param_0];
+; SM100-NEXT: ld.volatile.global.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1];
+; SM100-NEXT: ld.param.b64 %rd6, [test_i256_global_volatile_param_1];
+; SM100-NEXT: st.volatile.global.v4.b64 [%rd6], {%rd2, %rd3, %rd4, %rd5};
+; SM100-NEXT: ret;
+ %a.load = load volatile i256, ptr addrspace(1) %a, align 32
+ store volatile i256 %a.load, ptr addrspace(1) %b, align 32
+ ret void
+}