summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/NVPTX/cmpxchg.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/cmpxchg.ll')
-rw-r--r--llvm/test/CodeGen/NVPTX/cmpxchg.ll212
1 files changed, 101 insertions, 111 deletions
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
index 997df7a8ad8b..ec37025ec4c9 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s --check-prefixes=SM30,CHECK
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %}
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=SM70,CHECK
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && ptxas-isa-6.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
; TODO: these are system scope, but are compiled to gpu scope..
; TODO: these are seq_cst, but are compiled to relaxed..
@@ -14,7 +14,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30: {
; SM30-NEXT: .reg .pred %p<3>;
; SM30-NEXT: .reg .b16 %rs<2>;
-; SM30-NEXT: .reg .b32 %r<18>;
+; SM30-NEXT: .reg .b32 %r<17>;
; SM30-NEXT: .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT: // %bb.0:
@@ -29,23 +29,22 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: shl.b32 %r11, %r10, %r1;
; SM30-NEXT: not.b32 %r2, %r11;
; SM30-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM30-NEXT: and.b32 %r13, %r12, 255;
-; SM30-NEXT: shl.b32 %r3, %r13, %r1;
+; SM30-NEXT: shl.b32 %r3, %r12, %r1;
; SM30-NEXT: shl.b32 %r4, %r7, %r1;
-; SM30-NEXT: ld.b32 %r14, [%rd1];
-; SM30-NEXT: and.b32 %r17, %r14, %r2;
+; SM30-NEXT: ld.b32 %r13, [%rd1];
+; SM30-NEXT: and.b32 %r16, %r13, %r2;
; SM30-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
; SM30-NEXT: // =>This Inner Loop Header: Depth=1
-; SM30-NEXT: or.b32 %r15, %r17, %r3;
-; SM30-NEXT: or.b32 %r16, %r17, %r4;
-; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM30-NEXT: or.b32 %r14, %r16, %r3;
+; SM30-NEXT: or.b32 %r15, %r16, %r4;
+; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM30-NEXT: @%p1 bra $L__BB0_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM30-NEXT: and.b32 %r6, %r5, %r2;
-; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM30-NEXT: mov.b32 %r17, %r6;
+; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM30-NEXT: mov.b32 %r16, %r6;
; SM30-NEXT: @%p2 bra $L__BB0_1;
; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end
; SM30-NEXT: st.param.b32 [func_retval0], %r12;
@@ -55,7 +54,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
@@ -70,23 +69,22 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
; SM70-NEXT: not.b32 %r2, %r11;
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM70-NEXT: and.b32 %r13, %r12, 255;
-; SM70-NEXT: shl.b32 %r3, %r13, %r1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
-; SM70-NEXT: ld.b32 %r14, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r14, %r2;
+; SM70-NEXT: ld.b32 %r13, [%rd1];
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
; SM70-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r15, %r17, %r3;
-; SM70-NEXT: or.b32 %r16, %r17, %r4;
-; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
+; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM70-NEXT: @%p1 bra $L__BB0_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
; SM70-NEXT: @%p2 bra $L__BB0_1;
; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r12;
@@ -140,7 +138,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30: {
; SM30-NEXT: .reg .pred %p<3>;
; SM30-NEXT: .reg .b16 %rs<2>;
-; SM30-NEXT: .reg .b32 %r<18>;
+; SM30-NEXT: .reg .b32 %r<17>;
; SM30-NEXT: .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT: // %bb.0:
@@ -155,23 +153,22 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: shl.b32 %r11, %r10, %r1;
; SM30-NEXT: not.b32 %r2, %r11;
; SM30-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM30-NEXT: and.b32 %r13, %r12, 255;
-; SM30-NEXT: shl.b32 %r3, %r13, %r1;
+; SM30-NEXT: shl.b32 %r3, %r12, %r1;
; SM30-NEXT: shl.b32 %r4, %r7, %r1;
-; SM30-NEXT: ld.b32 %r14, [%rd1];
-; SM30-NEXT: and.b32 %r17, %r14, %r2;
+; SM30-NEXT: ld.b32 %r13, [%rd1];
+; SM30-NEXT: and.b32 %r16, %r13, %r2;
; SM30-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
; SM30-NEXT: // =>This Inner Loop Header: Depth=1
-; SM30-NEXT: or.b32 %r15, %r17, %r3;
-; SM30-NEXT: or.b32 %r16, %r17, %r4;
-; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM30-NEXT: or.b32 %r14, %r16, %r3;
+; SM30-NEXT: or.b32 %r15, %r16, %r4;
+; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM30-NEXT: @%p1 bra $L__BB1_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM30-NEXT: and.b32 %r6, %r5, %r2;
-; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM30-NEXT: mov.b32 %r17, %r6;
+; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM30-NEXT: mov.b32 %r16, %r6;
; SM30-NEXT: @%p2 bra $L__BB1_1;
; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM30-NEXT: membar.sys;
@@ -182,7 +179,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
@@ -197,23 +194,22 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
; SM70-NEXT: not.b32 %r2, %r11;
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM70-NEXT: and.b32 %r13, %r12, 255;
-; SM70-NEXT: shl.b32 %r3, %r13, %r1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
-; SM70-NEXT: ld.b32 %r14, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r14, %r2;
+; SM70-NEXT: ld.b32 %r13, [%rd1];
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
; SM70-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r15, %r17, %r3;
-; SM70-NEXT: or.b32 %r16, %r17, %r4;
-; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
+; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM70-NEXT: @%p1 bra $L__BB1_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
; SM70-NEXT: @%p2 bra $L__BB1_1;
; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -269,7 +265,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30: {
; SM30-NEXT: .reg .pred %p<3>;
; SM30-NEXT: .reg .b16 %rs<2>;
-; SM30-NEXT: .reg .b32 %r<18>;
+; SM30-NEXT: .reg .b32 %r<17>;
; SM30-NEXT: .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT: // %bb.0:
@@ -285,23 +281,22 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: shl.b32 %r11, %r10, %r1;
; SM30-NEXT: not.b32 %r2, %r11;
; SM30-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM30-NEXT: and.b32 %r13, %r12, 255;
-; SM30-NEXT: shl.b32 %r3, %r13, %r1;
+; SM30-NEXT: shl.b32 %r3, %r12, %r1;
; SM30-NEXT: shl.b32 %r4, %r7, %r1;
-; SM30-NEXT: ld.b32 %r14, [%rd1];
-; SM30-NEXT: and.b32 %r17, %r14, %r2;
+; SM30-NEXT: ld.b32 %r13, [%rd1];
+; SM30-NEXT: and.b32 %r16, %r13, %r2;
; SM30-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
; SM30-NEXT: // =>This Inner Loop Header: Depth=1
-; SM30-NEXT: or.b32 %r15, %r17, %r3;
-; SM30-NEXT: or.b32 %r16, %r17, %r4;
-; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM30-NEXT: or.b32 %r14, %r16, %r3;
+; SM30-NEXT: or.b32 %r15, %r16, %r4;
+; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM30-NEXT: @%p1 bra $L__BB2_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM30-NEXT: and.b32 %r6, %r5, %r2;
-; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM30-NEXT: mov.b32 %r17, %r6;
+; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM30-NEXT: mov.b32 %r16, %r6;
; SM30-NEXT: @%p2 bra $L__BB2_1;
; SM30-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM30-NEXT: st.param.b32 [func_retval0], %r12;
@@ -311,7 +306,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
@@ -327,23 +322,22 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
; SM70-NEXT: not.b32 %r2, %r11;
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM70-NEXT: and.b32 %r13, %r12, 255;
-; SM70-NEXT: shl.b32 %r3, %r13, %r1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
-; SM70-NEXT: ld.b32 %r14, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r14, %r2;
+; SM70-NEXT: ld.b32 %r13, [%rd1];
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
; SM70-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r15, %r17, %r3;
-; SM70-NEXT: or.b32 %r16, %r17, %r4;
-; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
+; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM70-NEXT: @%p1 bra $L__BB2_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
; SM70-NEXT: @%p2 bra $L__BB2_1;
; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r12;
@@ -398,7 +392,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30: {
; SM30-NEXT: .reg .pred %p<3>;
; SM30-NEXT: .reg .b16 %rs<2>;
-; SM30-NEXT: .reg .b32 %r<18>;
+; SM30-NEXT: .reg .b32 %r<17>;
; SM30-NEXT: .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT: // %bb.0:
@@ -414,23 +408,22 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: shl.b32 %r11, %r10, %r1;
; SM30-NEXT: not.b32 %r2, %r11;
; SM30-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM30-NEXT: and.b32 %r13, %r12, 255;
-; SM30-NEXT: shl.b32 %r3, %r13, %r1;
+; SM30-NEXT: shl.b32 %r3, %r12, %r1;
; SM30-NEXT: shl.b32 %r4, %r7, %r1;
-; SM30-NEXT: ld.b32 %r14, [%rd1];
-; SM30-NEXT: and.b32 %r17, %r14, %r2;
+; SM30-NEXT: ld.b32 %r13, [%rd1];
+; SM30-NEXT: and.b32 %r16, %r13, %r2;
; SM30-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
; SM30-NEXT: // =>This Inner Loop Header: Depth=1
-; SM30-NEXT: or.b32 %r15, %r17, %r3;
-; SM30-NEXT: or.b32 %r16, %r17, %r4;
-; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM30-NEXT: or.b32 %r14, %r16, %r3;
+; SM30-NEXT: or.b32 %r15, %r16, %r4;
+; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM30-NEXT: @%p1 bra $L__BB3_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM30-NEXT: and.b32 %r6, %r5, %r2;
-; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM30-NEXT: mov.b32 %r17, %r6;
+; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM30-NEXT: mov.b32 %r16, %r6;
; SM30-NEXT: @%p2 bra $L__BB3_1;
; SM30-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM30-NEXT: membar.sys;
@@ -441,7 +434,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
@@ -457,23 +450,22 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
; SM70-NEXT: not.b32 %r2, %r11;
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM70-NEXT: and.b32 %r13, %r12, 255;
-; SM70-NEXT: shl.b32 %r3, %r13, %r1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
-; SM70-NEXT: ld.b32 %r14, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r14, %r2;
+; SM70-NEXT: ld.b32 %r13, [%rd1];
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
; SM70-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r15, %r17, %r3;
-; SM70-NEXT: or.b32 %r16, %r17, %r4;
-; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
+; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM70-NEXT: @%p1 bra $L__BB3_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
; SM70-NEXT: @%p2 bra $L__BB3_1;
; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -530,7 +522,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30: {
; SM30-NEXT: .reg .pred %p<3>;
; SM30-NEXT: .reg .b16 %rs<2>;
-; SM30-NEXT: .reg .b32 %r<18>;
+; SM30-NEXT: .reg .b32 %r<17>;
; SM30-NEXT: .reg .b64 %rd<3>;
; SM30-EMPTY:
; SM30-NEXT: // %bb.0:
@@ -546,23 +538,22 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: shl.b32 %r11, %r10, %r1;
; SM30-NEXT: not.b32 %r2, %r11;
; SM30-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM30-NEXT: and.b32 %r13, %r12, 255;
-; SM30-NEXT: shl.b32 %r3, %r13, %r1;
+; SM30-NEXT: shl.b32 %r3, %r12, %r1;
; SM30-NEXT: shl.b32 %r4, %r7, %r1;
-; SM30-NEXT: ld.b32 %r14, [%rd1];
-; SM30-NEXT: and.b32 %r17, %r14, %r2;
+; SM30-NEXT: ld.b32 %r13, [%rd1];
+; SM30-NEXT: and.b32 %r16, %r13, %r2;
; SM30-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
; SM30-NEXT: // =>This Inner Loop Header: Depth=1
-; SM30-NEXT: or.b32 %r15, %r17, %r3;
-; SM30-NEXT: or.b32 %r16, %r17, %r4;
-; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM30-NEXT: or.b32 %r14, %r16, %r3;
+; SM30-NEXT: or.b32 %r15, %r16, %r4;
+; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM30-NEXT: @%p1 bra $L__BB4_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM30-NEXT: and.b32 %r6, %r5, %r2;
-; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM30-NEXT: mov.b32 %r17, %r6;
+; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM30-NEXT: mov.b32 %r16, %r6;
; SM30-NEXT: @%p2 bra $L__BB4_1;
; SM30-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM30-NEXT: membar.sys;
@@ -573,7 +564,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
@@ -589,23 +580,22 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
; SM70-NEXT: not.b32 %r2, %r11;
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
-; SM70-NEXT: and.b32 %r13, %r12, 255;
-; SM70-NEXT: shl.b32 %r3, %r13, %r1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
-; SM70-NEXT: ld.b32 %r14, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r14, %r2;
+; SM70-NEXT: ld.b32 %r13, [%rd1];
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
; SM70-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r15, %r17, %r3;
-; SM70-NEXT: or.b32 %r16, %r17, %r4;
-; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15;
-; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
+; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14;
+; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
; SM70-NEXT: @%p1 bra $L__BB4_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
; SM70-NEXT: @%p2 bra $L__BB4_1;
; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;