summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll')
-rw-r--r--llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll380
1 files changed, 181 insertions, 199 deletions
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
index 6e480996e7e6..d895c715ab3c 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | FileCheck %s --check-prefix=SM60
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 && ptxas-isa-5.0 %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %}
define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-LABEL: monotonic_monotonic_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -22,23 +22,22 @@ define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB0_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB0_1;
; SM60-NEXT: $L__BB0_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r12;
@@ -52,7 +51,7 @@ define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -67,23 +66,22 @@ define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB1_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB1_1;
; SM60-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -98,7 +96,7 @@ define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -114,23 +112,22 @@ define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB2_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB2_1;
; SM60-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -145,7 +142,7 @@ define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -160,23 +157,22 @@ define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB3_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB3_1;
; SM60-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -191,7 +187,7 @@ define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -206,23 +202,22 @@ define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB4_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB4_1;
; SM60-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -237,7 +232,7 @@ define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -253,23 +248,22 @@ define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB5_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB5_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB5_1;
; SM60-NEXT: $L__BB5_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -284,7 +278,7 @@ define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -300,23 +294,22 @@ define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB6_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB6_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB6_1;
; SM60-NEXT: $L__BB6_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r12;
@@ -330,7 +323,7 @@ define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -346,23 +339,22 @@ define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB7_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB7_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB7_1;
; SM60-NEXT: $L__BB7_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -377,7 +369,7 @@ define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -393,23 +385,22 @@ define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB8_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB8_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB8_1;
; SM60-NEXT: $L__BB8_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -424,7 +415,7 @@ define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -440,23 +431,22 @@ define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB9_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB9_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB9_1;
; SM60-NEXT: $L__BB9_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -471,7 +461,7 @@ define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -487,23 +477,22 @@ define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB10_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB10_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB10_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB10_1;
; SM60-NEXT: $L__BB10_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -518,7 +507,7 @@ define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -534,23 +523,22 @@ define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB11_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB11_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB11_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB11_1;
; SM60-NEXT: $L__BB11_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -565,7 +553,7 @@ define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -581,23 +569,22 @@ define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB12_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB12_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB12_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB12_1;
; SM60-NEXT: $L__BB12_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -612,7 +599,7 @@ define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -628,23 +615,22 @@ define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB13_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB13_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB13_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB13_1;
; SM60-NEXT: $L__BB13_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -659,7 +645,7 @@ define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -675,23 +661,22 @@ define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB14_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB14_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB14_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB14_1;
; SM60-NEXT: $L__BB14_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -1899,7 +1884,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -1915,23 +1900,22 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.global.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.global.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB60_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.sys.global.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.sys.global.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB60_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB60_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB60_1;
; SM60-NEXT: $L__BB60_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1997,7 +1981,7 @@ define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -2013,23 +1997,22 @@ define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB64_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB64_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB64_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB64_1;
; SM60-NEXT: $L__BB64_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
@@ -2044,7 +2027,7 @@ define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
; SM60-NEXT: .reg .b16 %rs<2>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
@@ -2060,23 +2043,22 @@ define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
; SM60-NEXT: not.b32 %r2, %r11;
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM60-NEXT: and.b32 %r13, %r12, 255;
-; SM60-NEXT: shl.b32 %r3, %r13, %r1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
-; SM60-NEXT: ld.shared.b32 %r14, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r14, %r2;
+; SM60-NEXT: ld.shared.b32 %r13, [%rd1];
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
; SM60-NEXT: $L__BB65_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r15, %r17, %r3;
-; SM60-NEXT: or.b32 %r16, %r17, %r4;
-; SM60-NEXT: atom.cta.shared.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
+; SM60-NEXT: atom.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM60-NEXT: @%p1 bra $L__BB65_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB65_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
; SM60-NEXT: @%p2 bra $L__BB65_1;
; SM60-NEXT: $L__BB65_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;