diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/atomics-b128.ll')
| -rw-r--r-- | llvm/test/CodeGen/NVPTX/atomics-b128.ll | 1033 |
1 files changed, 1033 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/NVPTX/atomics-b128.ll b/llvm/test/CodeGen/NVPTX/atomics-b128.ll new file mode 100644 index 000000000000..b2a3f94d11a1 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/atomics-b128.ll @@ -0,0 +1,1033 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: not llc < %s -mcpu=sm_90 -mattr=+ptx82 2>&1 | FileCheck %s --check-prefix=ERROR +; RUN: not llc < %s -mcpu=sm_80 -mattr=+ptx84 2>&1 | FileCheck %s --check-prefix=ERROR +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx84 | FileCheck %s --check-prefix=CHECK +; RUN: %if ptxas-sm_90 && ptxas-isa-8.4 %{ llc < %s -mcpu=sm_90 -mattr=+ptx84 | %ptxas-verify -arch=sm_90 %} + +;; TODO: Update cmpxchg.py so that it can automatically generate the IR for +;; these test cases. + +target triple = "nvptx64-nvidia-cuda" + +;; Check that the first couple of error messages are correct. +; ERROR: error: unsupported cmpxchg +; ERROR: error: unsupported cmpxchg + +define i128 @test_xchg_generic(ptr %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_generic( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_generic_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_generic_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr %addr, i128 %amt release + ret i128 %old +} + +define i128 @test_xchg_global(ptr addrspace(1) %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_global( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_global_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_global_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.release.sys.global.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr addrspace(1) %addr, i128 %amt release + ret i128 %old +} + +define i128 @test_xchg_shared(ptr addrspace(3) %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_shared( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_shared_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_shared_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.release.sys.shared.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr addrspace(3) %addr, i128 %amt release + ret i128 %old +} + +define i128 @test_xchg_shared_cluster(ptr addrspace(7) %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_shared_cluster( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_shared_cluster_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_shared_cluster_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.release.sys.shared::cluster.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr addrspace(7) %addr, i128 %amt release + ret i128 %old +} + +define i128 @test_xchg_block(ptr %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_block( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_block_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_block_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.release.cta.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("block") release + ret i128 %old +} + +define i128 @test_xchg_cluster(ptr %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_cluster( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_cluster_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_cluster_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.release.cluster.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("cluster") release + ret i128 %old +} + +define i128 @test_xchg_gpu(ptr %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_gpu( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_gpu_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_gpu_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.release.gpu.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("device") release + ret i128 %old +} + +define i128 @test_xchg_sys(ptr %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_sys( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_sys_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_sys_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr %addr, i128 %amt release + ret i128 %old +} + +define i128 @test_xchg_relaxed(ptr %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_relaxed( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_relaxed_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_relaxed_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.relaxed.sys.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr %addr, i128 %amt monotonic + ret i128 %old +} + +define i128 @test_xchg_acquire(ptr %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_acquire( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_acquire_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_acquire_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.acquire.sys.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr %addr, i128 %amt acquire + ret i128 %old +} + +define i128 @test_xchg_release(ptr %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_release( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_release_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_release_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr %addr, i128 %amt release + ret i128 %old +} + +define i128 @test_xchg_acq_rel(ptr %addr, i128 %amt) { +; CHECK-LABEL: test_xchg_acq_rel( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_acq_rel_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_acq_rel_param_1]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; +; CHECK-NEXT: atom.acq_rel.sys.exch.b128 dst, [%rd1], amt; +; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %old = atomicrmw xchg ptr %addr, i128 %amt acq_rel + ret i128 %old +} + +define i128 @test_cmpxchg_generic(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_generic( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_generic_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_generic_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_generic_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_global(ptr addrspace(1) %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_global( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_global_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_global_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_global_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.relaxed.sys.global.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr addrspace(1) %addr, i128 %cmp, i128 %new monotonic monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_shared(ptr addrspace(3) %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_shared( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_shared_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_shared_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_shared_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.relaxed.sys.shared.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr addrspace(3) %addr, i128 %cmp, i128 %new monotonic monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_block(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_block( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_block_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_block_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_block_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.relaxed.cta.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("block") monotonic monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_cluster(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_cluster( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_cluster_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_cluster_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_cluster_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.relaxed.cluster.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("cluster") monotonic monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_gpu(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_gpu( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_gpu_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_gpu_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_gpu_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.relaxed.gpu.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("device") monotonic monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_shared_cluster(ptr addrspace(7) %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_shared_cluster( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_shared_cluster_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_shared_cluster_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_shared_cluster_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.relaxed.sys.shared::cluster.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr addrspace(7) %addr, i128 %cmp, i128 %new monotonic monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_monotonic_monotonic(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_monotonic_monotonic( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_monotonic_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_monotonic_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_monotonic_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_monotonic_acquire(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_monotonic_acquire( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_acquire_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_acquire_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_acquire_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic acquire + ret i128 %new +} + +define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_monotonic_seq_cst( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0]; +; CHECK-NEXT: fence.sc.sys; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic seq_cst + ret i128 %new +} + +define i128 @test_cmpxchg_acquire_monotonic(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_acquire_monotonic( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_monotonic_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_monotonic_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_monotonic_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_acquire_acquire(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_acquire_acquire( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_acquire_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_acquire_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_acquire_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire acquire + ret i128 %new +} + +define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_acquire_seq_cst( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0]; +; CHECK-NEXT: fence.sc.sys; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire seq_cst + ret i128 %new +} + +define i128 @test_cmpxchg_release_monotonic(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_release_monotonic( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_monotonic_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_monotonic_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_monotonic_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.release.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_release_acquire(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_release_acquire( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_acquire_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_acquire_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_acquire_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release acquire + ret i128 %new +} + +define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_release_seq_cst( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0]; +; CHECK-NEXT: fence.sc.sys; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release seq_cst + ret i128 %new +} + +define i128 @test_cmpxchg_acq_rel_monotonic(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_acq_rel_monotonic( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_monotonic_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_monotonic_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_monotonic_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_acq_rel_acquire(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_acq_rel_acquire( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_acquire_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_acquire_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_acquire_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel acquire + ret i128 %new +} + +define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_acq_rel_seq_cst( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0]; +; CHECK-NEXT: fence.sc.sys; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel seq_cst + ret i128 %new +} + +define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_seq_cst_monotonic( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0]; +; CHECK-NEXT: fence.sc.sys; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst monotonic + ret i128 %new +} + +define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_seq_cst_acquire( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0]; +; CHECK-NEXT: fence.sc.sys; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst acquire + ret i128 %new +} + +define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) { +; CHECK-LABEL: test_cmpxchg_seq_cst_seq_cst( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0]; +; CHECK-NEXT: fence.sc.sys; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2]; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; +; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; +; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; +; CHECK-NEXT: ret; + %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst seq_cst + ret i128 %new +} + +define i128 @test_atomicrmw_and(ptr %ptr, i128 %val) { +; CHECK-LABEL: test_atomicrmw_and( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<13>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_and_param_1]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_and_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; +; CHECK-NEXT: $L__BB34_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: and.b64 %rd6, %rd11, %rd4; +; CHECK-NEXT: and.b64 %rd7, %rd12, %rd5; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; +; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7}; +; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; +; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; +; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; +; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0; +; CHECK-NEXT: mov.b64 %rd11, %rd1; +; CHECK-NEXT: mov.b64 %rd12, %rd2; +; CHECK-NEXT: @%p1 bra $L__BB34_1; +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; + %ret = atomicrmw and ptr %ptr, i128 %val monotonic + ret i128 %ret +} + +define i128 @test_atomicrmw_or(ptr %ptr, i128 %val) { +; CHECK-LABEL: test_atomicrmw_or( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<13>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_or_param_1]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_or_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; +; CHECK-NEXT: $L__BB35_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or.b64 %rd6, %rd11, %rd4; +; CHECK-NEXT: or.b64 %rd7, %rd12, %rd5; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; +; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7}; +; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; +; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; +; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; +; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0; +; CHECK-NEXT: mov.b64 %rd11, %rd1; +; CHECK-NEXT: mov.b64 %rd12, %rd2; +; CHECK-NEXT: @%p1 bra $L__BB35_1; +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; + %ret = atomicrmw or ptr %ptr, i128 %val monotonic + ret i128 %ret +} + +define i128 @test_atomicrmw_xor(ptr %ptr, i128 %val) { +; CHECK-LABEL: test_atomicrmw_xor( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<13>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_xor_param_1]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_xor_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; +; CHECK-NEXT: $L__BB36_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: xor.b64 %rd6, %rd11, %rd4; +; CHECK-NEXT: xor.b64 %rd7, %rd12, %rd5; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; +; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7}; +; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; +; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; +; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; +; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0; +; CHECK-NEXT: mov.b64 %rd11, %rd1; +; CHECK-NEXT: mov.b64 %rd12, %rd2; +; CHECK-NEXT: @%p1 bra $L__BB36_1; +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; + %ret = atomicrmw xor ptr %ptr, i128 %val monotonic + ret i128 %ret +} + +define i128 @test_atomicrmw_min(ptr %ptr, i128 %val) { +; CHECK-LABEL: test_atomicrmw_min( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<7>; +; CHECK-NEXT: .reg .b64 %rd<13>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_min_param_1]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_min_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; +; CHECK-NEXT: $L__BB37_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4; +; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; +; CHECK-NEXT: and.pred %p3, %p2, %p1; +; CHECK-NEXT: setp.lt.s64 %p4, %rd12, %rd5; +; CHECK-NEXT: or.pred %p5, %p3, %p4; +; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; +; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; +; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; +; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; +; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; +; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; +; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; +; CHECK-NEXT: mov.b64 %rd11, %rd1; +; CHECK-NEXT: mov.b64 %rd12, %rd2; +; CHECK-NEXT: @%p6 bra $L__BB37_1; +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; + %ret = atomicrmw min ptr %ptr, i128 %val monotonic + ret i128 %ret +} + +define i128 @test_atomicrmw_max(ptr %ptr, i128 %val) { +; CHECK-LABEL: test_atomicrmw_max( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<7>; +; CHECK-NEXT: .reg .b64 %rd<13>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_max_param_1]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_max_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; +; CHECK-NEXT: $L__BB38_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4; +; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; +; CHECK-NEXT: and.pred %p3, %p2, %p1; +; CHECK-NEXT: setp.gt.s64 %p4, %rd12, %rd5; +; CHECK-NEXT: or.pred %p5, %p3, %p4; +; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; +; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; +; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; +; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; +; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; +; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; +; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; +; CHECK-NEXT: mov.b64 %rd11, %rd1; +; CHECK-NEXT: mov.b64 %rd12, %rd2; +; CHECK-NEXT: @%p6 bra $L__BB38_1; +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; + %ret = atomicrmw max ptr %ptr, i128 %val monotonic + ret i128 %ret +} + +define i128 @test_atomicrmw_umin(ptr %ptr, i128 %val) { +; CHECK-LABEL: test_atomicrmw_umin( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<7>; +; CHECK-NEXT: .reg .b64 %rd<13>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_umin_param_1]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_umin_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; +; CHECK-NEXT: $L__BB39_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4; +; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; +; CHECK-NEXT: and.pred %p3, %p2, %p1; +; CHECK-NEXT: setp.lt.u64 %p4, %rd12, %rd5; +; CHECK-NEXT: or.pred %p5, %p3, %p4; +; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; +; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; +; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; +; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; +; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; +; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; +; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; +; CHECK-NEXT: mov.b64 %rd11, %rd1; +; CHECK-NEXT: mov.b64 %rd12, %rd2; +; CHECK-NEXT: @%p6 bra $L__BB39_1; +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; + %ret = atomicrmw umin ptr %ptr, i128 %val monotonic + ret i128 %ret +} + +define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) { +; CHECK-LABEL: test_atomicrmw_umax( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<7>; +; CHECK-NEXT: .reg .b64 %rd<13>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_umax_param_1]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_umax_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; +; CHECK-NEXT: $L__BB40_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4; +; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; +; CHECK-NEXT: and.pred %p3, %p2, %p1; +; CHECK-NEXT: setp.gt.u64 %p4, %rd12, %rd5; +; CHECK-NEXT: or.pred %p5, %p3, %p4; +; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; +; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 cmp, swap, dst; +; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; +; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; +; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; +; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; +; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; +; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; +; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; +; CHECK-NEXT: mov.b64 %rd11, %rd1; +; CHECK-NEXT: mov.b64 %rd12, %rd2; +; CHECK-NEXT: @%p6 bra $L__BB40_1; +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; + %ret = atomicrmw umax ptr %ptr, i128 %val monotonic + ret i128 %ret +} + + +@si128 = internal addrspace(3) global i128 0, align 16 + +define void @test_atomicrmw_xchg_const() { +; CHECK-LABEL: test_atomicrmw_xchg_const( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-NEXT: // demoted variable +; CHECK-NEXT: .shared .align 16 .b8 si128[16]; +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b64 %rd1, 0; +; CHECK-NEXT: mov.b64 %rd2, 23; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd1}; +; CHECK-NEXT: atom.relaxed.sys.shared.exch.b128 dst, [si128], amt; +; CHECK-NEXT: mov.b128 {%rd3, %rd4}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: ret; + %res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 monotonic + ret void +} |
