summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/NVPTX/atomics-b128.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/atomics-b128.ll')
-rw-r--r--llvm/test/CodeGen/NVPTX/atomics-b128.ll1033
1 files changed, 1033 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/NVPTX/atomics-b128.ll b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
new file mode 100644
index 000000000000..b2a3f94d11a1
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
@@ -0,0 +1,1033 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: not llc < %s -mcpu=sm_90 -mattr=+ptx82 2>&1 | FileCheck %s --check-prefix=ERROR
+; RUN: not llc < %s -mcpu=sm_80 -mattr=+ptx84 2>&1 | FileCheck %s --check-prefix=ERROR
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx84 | FileCheck %s --check-prefix=CHECK
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.4 %{ llc < %s -mcpu=sm_90 -mattr=+ptx84 | %ptxas-verify -arch=sm_90 %}
+
+;; TODO: Update cmpxchg.py so that it can automatically generate the IR for
+;; these test cases.
+
+target triple = "nvptx64-nvidia-cuda"
+
+;; Check that the first couple of error messages are correct.
+; ERROR: error: unsupported cmpxchg
+; ERROR: error: unsupported cmpxchg
+
+define i128 @test_xchg_generic(ptr %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_generic(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_generic_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_generic_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr %addr, i128 %amt release
+ ret i128 %old
+}
+
+define i128 @test_xchg_global(ptr addrspace(1) %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_global(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_global_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_global_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.release.sys.global.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr addrspace(1) %addr, i128 %amt release
+ ret i128 %old
+}
+
+define i128 @test_xchg_shared(ptr addrspace(3) %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_shared(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_shared_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_shared_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.release.sys.shared.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr addrspace(3) %addr, i128 %amt release
+ ret i128 %old
+}
+
+define i128 @test_xchg_shared_cluster(ptr addrspace(7) %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_shared_cluster(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_shared_cluster_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_shared_cluster_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.release.sys.shared::cluster.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr addrspace(7) %addr, i128 %amt release
+ ret i128 %old
+}
+
+define i128 @test_xchg_block(ptr %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_block(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_block_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_block_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.release.cta.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("block") release
+ ret i128 %old
+}
+
+define i128 @test_xchg_cluster(ptr %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_cluster(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_cluster_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_cluster_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.release.cluster.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("cluster") release
+ ret i128 %old
+}
+
+define i128 @test_xchg_gpu(ptr %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_gpu(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_gpu_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_gpu_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.release.gpu.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("device") release
+ ret i128 %old
+}
+
+define i128 @test_xchg_sys(ptr %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_sys_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_sys_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr %addr, i128 %amt release
+ ret i128 %old
+}
+
+define i128 @test_xchg_relaxed(ptr %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_relaxed(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_relaxed_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_relaxed_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.relaxed.sys.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr %addr, i128 %amt monotonic
+ ret i128 %old
+}
+
+define i128 @test_xchg_acquire(ptr %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_acquire(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_acquire_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_acquire_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.acquire.sys.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr %addr, i128 %amt acquire
+ ret i128 %old
+}
+
+define i128 @test_xchg_release(ptr %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_release(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_release_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_release_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr %addr, i128 %amt release
+ ret i128 %old
+}
+
+define i128 @test_xchg_acq_rel(ptr %addr, i128 %amt) {
+; CHECK-LABEL: test_xchg_acq_rel(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_acq_rel_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_acq_rel_param_1];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
+; CHECK-NEXT: atom.acq_rel.sys.exch.b128 dst, [%rd1], amt;
+; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %old = atomicrmw xchg ptr %addr, i128 %amt acq_rel
+ ret i128 %old
+}
+
+define i128 @test_cmpxchg_generic(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_generic(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_generic_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_generic_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_generic_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_global(ptr addrspace(1) %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_global(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_global_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_global_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_global_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.relaxed.sys.global.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr addrspace(1) %addr, i128 %cmp, i128 %new monotonic monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_shared(ptr addrspace(3) %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_shared(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_shared_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_shared_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_shared_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.relaxed.sys.shared.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr addrspace(3) %addr, i128 %cmp, i128 %new monotonic monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_block(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_block(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_block_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_block_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_block_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.relaxed.cta.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("block") monotonic monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_cluster(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_cluster(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_cluster_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_cluster_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_cluster_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.relaxed.cluster.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("cluster") monotonic monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_gpu(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_gpu(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_gpu_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_gpu_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_gpu_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.relaxed.gpu.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("device") monotonic monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_shared_cluster(ptr addrspace(7) %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_shared_cluster(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_shared_cluster_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_shared_cluster_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_shared_cluster_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.relaxed.sys.shared::cluster.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr addrspace(7) %addr, i128 %cmp, i128 %new monotonic monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_monotonic_monotonic(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_monotonic_monotonic(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_monotonic_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_monotonic_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_monotonic_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_monotonic_acquire(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_monotonic_acquire(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_acquire_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_acquire_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_acquire_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic acquire
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_monotonic_seq_cst(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic seq_cst
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_acquire_monotonic(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_acquire_monotonic(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_monotonic_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_monotonic_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_monotonic_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_acquire_acquire(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_acquire_acquire(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_acquire_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_acquire_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_acquire_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire acquire
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_acquire_seq_cst(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire seq_cst
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_release_monotonic(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_release_monotonic(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_monotonic_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_monotonic_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_monotonic_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.release.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_release_acquire(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_release_acquire(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_acquire_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_acquire_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_acquire_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release acquire
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_release_seq_cst(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release seq_cst
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_acq_rel_monotonic(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_acq_rel_monotonic(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_monotonic_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_monotonic_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_monotonic_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_acq_rel_acquire(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_acq_rel_acquire(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_acquire_param_0];
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_acquire_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_acquire_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel acquire
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_acq_rel_seq_cst(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel seq_cst
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_seq_cst_monotonic(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0];
+; CHECK-NEXT: fence.sc.sys;
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst monotonic
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_seq_cst_acquire(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0];
+; CHECK-NEXT: fence.sc.sys;
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst acquire
+ ret i128 %new
+}
+
+define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
+; CHECK-LABEL: test_cmpxchg_seq_cst_seq_cst(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
+; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1];
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
+; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
+; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
+; CHECK-NEXT: ret;
+ %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst seq_cst
+ ret i128 %new
+}
+
+define i128 @test_atomicrmw_and(ptr %ptr, i128 %val) {
+; CHECK-LABEL: test_atomicrmw_and(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<13>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_and_param_1];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_and_param_0];
+; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
+; CHECK-NEXT: $L__BB34_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: and.b64 %rd6, %rd11, %rd4;
+; CHECK-NEXT: and.b64 %rd7, %rd12, %rd5;
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
+; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
+; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
+; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
+; CHECK-NEXT: mov.b64 %rd11, %rd1;
+; CHECK-NEXT: mov.b64 %rd12, %rd2;
+; CHECK-NEXT: @%p1 bra $L__BB34_1;
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: ret;
+ %ret = atomicrmw and ptr %ptr, i128 %val monotonic
+ ret i128 %ret
+}
+
+define i128 @test_atomicrmw_or(ptr %ptr, i128 %val) {
+; CHECK-LABEL: test_atomicrmw_or(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<13>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_or_param_1];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_or_param_0];
+; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
+; CHECK-NEXT: $L__BB35_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: or.b64 %rd6, %rd11, %rd4;
+; CHECK-NEXT: or.b64 %rd7, %rd12, %rd5;
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
+; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
+; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
+; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
+; CHECK-NEXT: mov.b64 %rd11, %rd1;
+; CHECK-NEXT: mov.b64 %rd12, %rd2;
+; CHECK-NEXT: @%p1 bra $L__BB35_1;
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: ret;
+ %ret = atomicrmw or ptr %ptr, i128 %val monotonic
+ ret i128 %ret
+}
+
+define i128 @test_atomicrmw_xor(ptr %ptr, i128 %val) {
+; CHECK-LABEL: test_atomicrmw_xor(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<13>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_xor_param_1];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_xor_param_0];
+; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
+; CHECK-NEXT: $L__BB36_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: xor.b64 %rd6, %rd11, %rd4;
+; CHECK-NEXT: xor.b64 %rd7, %rd12, %rd5;
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
+; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
+; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
+; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
+; CHECK-NEXT: mov.b64 %rd11, %rd1;
+; CHECK-NEXT: mov.b64 %rd12, %rd2;
+; CHECK-NEXT: @%p1 bra $L__BB36_1;
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: ret;
+ %ret = atomicrmw xor ptr %ptr, i128 %val monotonic
+ ret i128 %ret
+}
+
+define i128 @test_atomicrmw_min(ptr %ptr, i128 %val) {
+; CHECK-LABEL: test_atomicrmw_min(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<7>;
+; CHECK-NEXT: .reg .b64 %rd<13>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_min_param_1];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_min_param_0];
+; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
+; CHECK-NEXT: $L__BB37_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: and.pred %p3, %p2, %p1;
+; CHECK-NEXT: setp.lt.s64 %p4, %rd12, %rd5;
+; CHECK-NEXT: or.pred %p5, %p3, %p4;
+; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
+; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
+; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
+; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
+; CHECK-NEXT: mov.b64 %rd11, %rd1;
+; CHECK-NEXT: mov.b64 %rd12, %rd2;
+; CHECK-NEXT: @%p6 bra $L__BB37_1;
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: ret;
+ %ret = atomicrmw min ptr %ptr, i128 %val monotonic
+ ret i128 %ret
+}
+
+define i128 @test_atomicrmw_max(ptr %ptr, i128 %val) {
+; CHECK-LABEL: test_atomicrmw_max(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<7>;
+; CHECK-NEXT: .reg .b64 %rd<13>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_max_param_1];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_max_param_0];
+; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
+; CHECK-NEXT: $L__BB38_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: and.pred %p3, %p2, %p1;
+; CHECK-NEXT: setp.gt.s64 %p4, %rd12, %rd5;
+; CHECK-NEXT: or.pred %p5, %p3, %p4;
+; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
+; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
+; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
+; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
+; CHECK-NEXT: mov.b64 %rd11, %rd1;
+; CHECK-NEXT: mov.b64 %rd12, %rd2;
+; CHECK-NEXT: @%p6 bra $L__BB38_1;
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: ret;
+ %ret = atomicrmw max ptr %ptr, i128 %val monotonic
+ ret i128 %ret
+}
+
+define i128 @test_atomicrmw_umin(ptr %ptr, i128 %val) {
+; CHECK-LABEL: test_atomicrmw_umin(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<7>;
+; CHECK-NEXT: .reg .b64 %rd<13>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_umin_param_1];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_umin_param_0];
+; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
+; CHECK-NEXT: $L__BB39_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: and.pred %p3, %p2, %p1;
+; CHECK-NEXT: setp.lt.u64 %p4, %rd12, %rd5;
+; CHECK-NEXT: or.pred %p5, %p3, %p4;
+; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
+; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
+; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
+; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
+; CHECK-NEXT: mov.b64 %rd11, %rd1;
+; CHECK-NEXT: mov.b64 %rd12, %rd2;
+; CHECK-NEXT: @%p6 bra $L__BB39_1;
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: ret;
+ %ret = atomicrmw umin ptr %ptr, i128 %val monotonic
+ ret i128 %ret
+}
+
+define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
+; CHECK-LABEL: test_atomicrmw_umax(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<7>;
+; CHECK-NEXT: .reg .b64 %rd<13>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_umax_param_1];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_umax_param_0];
+; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
+; CHECK-NEXT: $L__BB40_1: // %atomicrmw.start
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: and.pred %p3, %p2, %p1;
+; CHECK-NEXT: setp.gt.u64 %p4, %rd12, %rd5;
+; CHECK-NEXT: or.pred %p5, %p3, %p4;
+; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 cmp, swap, dst;
+; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
+; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
+; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
+; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
+; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
+; CHECK-NEXT: mov.b64 %rd11, %rd1;
+; CHECK-NEXT: mov.b64 %rd12, %rd2;
+; CHECK-NEXT: @%p6 bra $L__BB40_1;
+; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: ret;
+ %ret = atomicrmw umax ptr %ptr, i128 %val monotonic
+ ret i128 %ret
+}
+
+
+@si128 = internal addrspace(3) global i128 0, align 16
+
+define void @test_atomicrmw_xchg_const() {
+; CHECK-LABEL: test_atomicrmw_xchg_const(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-NEXT: // demoted variable
+; CHECK-NEXT: .shared .align 16 .b8 si128[16];
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: mov.b64 %rd1, 0;
+; CHECK-NEXT: mov.b64 %rd2, 23;
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd1};
+; CHECK-NEXT: atom.relaxed.sys.shared.exch.b128 dst, [si128], amt;
+; CHECK-NEXT: mov.b128 {%rd3, %rd4}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: ret;
+ %res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 monotonic
+ ret void
+}