diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/cmpxchg.ll')
| -rw-r--r-- | llvm/test/CodeGen/NVPTX/cmpxchg.ll | 212 |
1 files changed, 101 insertions, 111 deletions
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll index 997df7a8ad8b..ec37025ec4c9 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s --check-prefixes=SM30,CHECK ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %} ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=SM70,CHECK -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %} +; RUN: %if ptxas-sm_70 && ptxas-isa-6.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %} ; TODO: these are system scope, but are compiled to gpu scope.. ; TODO: these are seq_cst, but are compiled to relaxed.. @@ -14,7 +14,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30: { ; SM30-NEXT: .reg .pred %p<3>; ; SM30-NEXT: .reg .b16 %rs<2>; -; SM30-NEXT: .reg .b32 %r<18>; +; SM30-NEXT: .reg .b32 %r<17>; ; SM30-NEXT: .reg .b64 %rd<3>; ; SM30-EMPTY: ; SM30-NEXT: // %bb.0: @@ -29,23 +29,22 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: shl.b32 %r11, %r10, %r1; ; SM30-NEXT: not.b32 %r2, %r11; ; SM30-NEXT: cvt.u32.u16 %r12, %rs1; -; SM30-NEXT: and.b32 %r13, %r12, 255; -; SM30-NEXT: shl.b32 %r3, %r13, %r1; +; SM30-NEXT: shl.b32 %r3, %r12, %r1; ; SM30-NEXT: shl.b32 %r4, %r7, %r1; -; SM30-NEXT: ld.b32 %r14, [%rd1]; -; SM30-NEXT: and.b32 %r17, %r14, %r2; +; SM30-NEXT: ld.b32 %r13, [%rd1]; +; SM30-NEXT: and.b32 %r16, %r13, %r2; ; SM30-NEXT: $L__BB0_1: // %partword.cmpxchg.loop ; SM30-NEXT: // =>This Inner Loop Header: Depth=1 -; SM30-NEXT: or.b32 %r15, %r17, %r3; -; SM30-NEXT: or.b32 %r16, %r17, %r4; -; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15; -; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM30-NEXT: or.b32 %r14, %r16, %r3; +; SM30-NEXT: or.b32 %r15, %r16, %r4; +; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14; +; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM30-NEXT: @%p1 bra $L__BB0_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM30-NEXT: and.b32 %r6, %r5, %r2; -; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM30-NEXT: mov.b32 %r17, %r6; +; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM30-NEXT: mov.b32 %r16, %r6; ; SM30-NEXT: @%p2 bra $L__BB0_1; ; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end ; SM30-NEXT: st.param.b32 [func_retval0], %r12; @@ -55,7 +54,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70: { ; SM70-NEXT: .reg .pred %p<3>; ; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<18>; +; SM70-NEXT: .reg .b32 %r<17>; ; SM70-NEXT: .reg .b64 %rd<3>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: @@ -70,23 +69,22 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: shl.b32 %r11, %r10, %r1; ; SM70-NEXT: not.b32 %r2, %r11; ; SM70-NEXT: cvt.u32.u16 %r12, %rs1; -; SM70-NEXT: and.b32 %r13, %r12, 255; -; SM70-NEXT: shl.b32 %r3, %r13, %r1; +; SM70-NEXT: shl.b32 %r3, %r12, %r1; ; SM70-NEXT: shl.b32 %r4, %r7, %r1; -; SM70-NEXT: ld.b32 %r14, [%rd1]; -; SM70-NEXT: and.b32 %r17, %r14, %r2; +; SM70-NEXT: ld.b32 %r13, [%rd1]; +; SM70-NEXT: and.b32 %r16, %r13, %r2; ; SM70-NEXT: $L__BB0_1: // %partword.cmpxchg.loop ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 -; SM70-NEXT: or.b32 %r15, %r17, %r3; -; SM70-NEXT: or.b32 %r16, %r17, %r4; -; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15; -; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM70-NEXT: or.b32 %r14, %r16, %r3; +; SM70-NEXT: or.b32 %r15, %r16, %r4; +; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14; +; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM70-NEXT: @%p1 bra $L__BB0_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM70-NEXT: and.b32 %r6, %r5, %r2; -; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM70-NEXT: mov.b32 %r17, %r6; +; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM70-NEXT: mov.b32 %r16, %r6; ; SM70-NEXT: @%p2 bra $L__BB0_1; ; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r12; @@ -140,7 +138,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30: { ; SM30-NEXT: .reg .pred %p<3>; ; SM30-NEXT: .reg .b16 %rs<2>; -; SM30-NEXT: .reg .b32 %r<18>; +; SM30-NEXT: .reg .b32 %r<17>; ; SM30-NEXT: .reg .b64 %rd<3>; ; SM30-EMPTY: ; SM30-NEXT: // %bb.0: @@ -155,23 +153,22 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: shl.b32 %r11, %r10, %r1; ; SM30-NEXT: not.b32 %r2, %r11; ; SM30-NEXT: cvt.u32.u16 %r12, %rs1; -; SM30-NEXT: and.b32 %r13, %r12, 255; -; SM30-NEXT: shl.b32 %r3, %r13, %r1; +; SM30-NEXT: shl.b32 %r3, %r12, %r1; ; SM30-NEXT: shl.b32 %r4, %r7, %r1; -; SM30-NEXT: ld.b32 %r14, [%rd1]; -; SM30-NEXT: and.b32 %r17, %r14, %r2; +; SM30-NEXT: ld.b32 %r13, [%rd1]; +; SM30-NEXT: and.b32 %r16, %r13, %r2; ; SM30-NEXT: $L__BB1_1: // %partword.cmpxchg.loop ; SM30-NEXT: // =>This Inner Loop Header: Depth=1 -; SM30-NEXT: or.b32 %r15, %r17, %r3; -; SM30-NEXT: or.b32 %r16, %r17, %r4; -; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15; -; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM30-NEXT: or.b32 %r14, %r16, %r3; +; SM30-NEXT: or.b32 %r15, %r16, %r4; +; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14; +; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM30-NEXT: @%p1 bra $L__BB1_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM30-NEXT: and.b32 %r6, %r5, %r2; -; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM30-NEXT: mov.b32 %r17, %r6; +; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM30-NEXT: mov.b32 %r16, %r6; ; SM30-NEXT: @%p2 bra $L__BB1_1; ; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end ; SM30-NEXT: membar.sys; @@ -182,7 +179,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70: { ; SM70-NEXT: .reg .pred %p<3>; ; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<18>; +; SM70-NEXT: .reg .b32 %r<17>; ; SM70-NEXT: .reg .b64 %rd<3>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: @@ -197,23 +194,22 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: shl.b32 %r11, %r10, %r1; ; SM70-NEXT: not.b32 %r2, %r11; ; SM70-NEXT: cvt.u32.u16 %r12, %rs1; -; SM70-NEXT: and.b32 %r13, %r12, 255; -; SM70-NEXT: shl.b32 %r3, %r13, %r1; +; SM70-NEXT: shl.b32 %r3, %r12, %r1; ; SM70-NEXT: shl.b32 %r4, %r7, %r1; -; SM70-NEXT: ld.b32 %r14, [%rd1]; -; SM70-NEXT: and.b32 %r17, %r14, %r2; +; SM70-NEXT: ld.b32 %r13, [%rd1]; +; SM70-NEXT: and.b32 %r16, %r13, %r2; ; SM70-NEXT: $L__BB1_1: // %partword.cmpxchg.loop ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 -; SM70-NEXT: or.b32 %r15, %r17, %r3; -; SM70-NEXT: or.b32 %r16, %r17, %r4; -; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15; -; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM70-NEXT: or.b32 %r14, %r16, %r3; +; SM70-NEXT: or.b32 %r15, %r16, %r4; +; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14; +; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM70-NEXT: @%p1 bra $L__BB1_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM70-NEXT: and.b32 %r6, %r5, %r2; -; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM70-NEXT: mov.b32 %r17, %r6; +; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM70-NEXT: mov.b32 %r16, %r6; ; SM70-NEXT: @%p2 bra $L__BB1_1; ; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -269,7 +265,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30: { ; SM30-NEXT: .reg .pred %p<3>; ; SM30-NEXT: .reg .b16 %rs<2>; -; SM30-NEXT: .reg .b32 %r<18>; +; SM30-NEXT: .reg .b32 %r<17>; ; SM30-NEXT: .reg .b64 %rd<3>; ; SM30-EMPTY: ; SM30-NEXT: // %bb.0: @@ -285,23 +281,22 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: shl.b32 %r11, %r10, %r1; ; SM30-NEXT: not.b32 %r2, %r11; ; SM30-NEXT: cvt.u32.u16 %r12, %rs1; -; SM30-NEXT: and.b32 %r13, %r12, 255; -; SM30-NEXT: shl.b32 %r3, %r13, %r1; +; SM30-NEXT: shl.b32 %r3, %r12, %r1; ; SM30-NEXT: shl.b32 %r4, %r7, %r1; -; SM30-NEXT: ld.b32 %r14, [%rd1]; -; SM30-NEXT: and.b32 %r17, %r14, %r2; +; SM30-NEXT: ld.b32 %r13, [%rd1]; +; SM30-NEXT: and.b32 %r16, %r13, %r2; ; SM30-NEXT: $L__BB2_1: // %partword.cmpxchg.loop ; SM30-NEXT: // =>This Inner Loop Header: Depth=1 -; SM30-NEXT: or.b32 %r15, %r17, %r3; -; SM30-NEXT: or.b32 %r16, %r17, %r4; -; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15; -; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM30-NEXT: or.b32 %r14, %r16, %r3; +; SM30-NEXT: or.b32 %r15, %r16, %r4; +; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14; +; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM30-NEXT: @%p1 bra $L__BB2_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM30-NEXT: and.b32 %r6, %r5, %r2; -; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM30-NEXT: mov.b32 %r17, %r6; +; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM30-NEXT: mov.b32 %r16, %r6; ; SM30-NEXT: @%p2 bra $L__BB2_1; ; SM30-NEXT: $L__BB2_3: // %partword.cmpxchg.end ; SM30-NEXT: st.param.b32 [func_retval0], %r12; @@ -311,7 +306,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70: { ; SM70-NEXT: .reg .pred %p<3>; ; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<18>; +; SM70-NEXT: .reg .b32 %r<17>; ; SM70-NEXT: .reg .b64 %rd<3>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: @@ -327,23 +322,22 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: shl.b32 %r11, %r10, %r1; ; SM70-NEXT: not.b32 %r2, %r11; ; SM70-NEXT: cvt.u32.u16 %r12, %rs1; -; SM70-NEXT: and.b32 %r13, %r12, 255; -; SM70-NEXT: shl.b32 %r3, %r13, %r1; +; SM70-NEXT: shl.b32 %r3, %r12, %r1; ; SM70-NEXT: shl.b32 %r4, %r7, %r1; -; SM70-NEXT: ld.b32 %r14, [%rd1]; -; SM70-NEXT: and.b32 %r17, %r14, %r2; +; SM70-NEXT: ld.b32 %r13, [%rd1]; +; SM70-NEXT: and.b32 %r16, %r13, %r2; ; SM70-NEXT: $L__BB2_1: // %partword.cmpxchg.loop ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 -; SM70-NEXT: or.b32 %r15, %r17, %r3; -; SM70-NEXT: or.b32 %r16, %r17, %r4; -; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15; -; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM70-NEXT: or.b32 %r14, %r16, %r3; +; SM70-NEXT: or.b32 %r15, %r16, %r4; +; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14; +; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM70-NEXT: @%p1 bra $L__BB2_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM70-NEXT: and.b32 %r6, %r5, %r2; -; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM70-NEXT: mov.b32 %r17, %r6; +; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM70-NEXT: mov.b32 %r16, %r6; ; SM70-NEXT: @%p2 bra $L__BB2_1; ; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r12; @@ -398,7 +392,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30: { ; SM30-NEXT: .reg .pred %p<3>; ; SM30-NEXT: .reg .b16 %rs<2>; -; SM30-NEXT: .reg .b32 %r<18>; +; SM30-NEXT: .reg .b32 %r<17>; ; SM30-NEXT: .reg .b64 %rd<3>; ; SM30-EMPTY: ; SM30-NEXT: // %bb.0: @@ -414,23 +408,22 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: shl.b32 %r11, %r10, %r1; ; SM30-NEXT: not.b32 %r2, %r11; ; SM30-NEXT: cvt.u32.u16 %r12, %rs1; -; SM30-NEXT: and.b32 %r13, %r12, 255; -; SM30-NEXT: shl.b32 %r3, %r13, %r1; +; SM30-NEXT: shl.b32 %r3, %r12, %r1; ; SM30-NEXT: shl.b32 %r4, %r7, %r1; -; SM30-NEXT: ld.b32 %r14, [%rd1]; -; SM30-NEXT: and.b32 %r17, %r14, %r2; +; SM30-NEXT: ld.b32 %r13, [%rd1]; +; SM30-NEXT: and.b32 %r16, %r13, %r2; ; SM30-NEXT: $L__BB3_1: // %partword.cmpxchg.loop ; SM30-NEXT: // =>This Inner Loop Header: Depth=1 -; SM30-NEXT: or.b32 %r15, %r17, %r3; -; SM30-NEXT: or.b32 %r16, %r17, %r4; -; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15; -; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM30-NEXT: or.b32 %r14, %r16, %r3; +; SM30-NEXT: or.b32 %r15, %r16, %r4; +; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14; +; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM30-NEXT: @%p1 bra $L__BB3_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM30-NEXT: and.b32 %r6, %r5, %r2; -; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM30-NEXT: mov.b32 %r17, %r6; +; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM30-NEXT: mov.b32 %r16, %r6; ; SM30-NEXT: @%p2 bra $L__BB3_1; ; SM30-NEXT: $L__BB3_3: // %partword.cmpxchg.end ; SM30-NEXT: membar.sys; @@ -441,7 +434,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70: { ; SM70-NEXT: .reg .pred %p<3>; ; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<18>; +; SM70-NEXT: .reg .b32 %r<17>; ; SM70-NEXT: .reg .b64 %rd<3>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: @@ -457,23 +450,22 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: shl.b32 %r11, %r10, %r1; ; SM70-NEXT: not.b32 %r2, %r11; ; SM70-NEXT: cvt.u32.u16 %r12, %rs1; -; SM70-NEXT: and.b32 %r13, %r12, 255; -; SM70-NEXT: shl.b32 %r3, %r13, %r1; +; SM70-NEXT: shl.b32 %r3, %r12, %r1; ; SM70-NEXT: shl.b32 %r4, %r7, %r1; -; SM70-NEXT: ld.b32 %r14, [%rd1]; -; SM70-NEXT: and.b32 %r17, %r14, %r2; +; SM70-NEXT: ld.b32 %r13, [%rd1]; +; SM70-NEXT: and.b32 %r16, %r13, %r2; ; SM70-NEXT: $L__BB3_1: // %partword.cmpxchg.loop ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 -; SM70-NEXT: or.b32 %r15, %r17, %r3; -; SM70-NEXT: or.b32 %r16, %r17, %r4; -; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15; -; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM70-NEXT: or.b32 %r14, %r16, %r3; +; SM70-NEXT: or.b32 %r15, %r16, %r4; +; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14; +; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM70-NEXT: @%p1 bra $L__BB3_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM70-NEXT: and.b32 %r6, %r5, %r2; -; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM70-NEXT: mov.b32 %r17, %r6; +; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM70-NEXT: mov.b32 %r16, %r6; ; SM70-NEXT: @%p2 bra $L__BB3_1; ; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -530,7 +522,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30: { ; SM30-NEXT: .reg .pred %p<3>; ; SM30-NEXT: .reg .b16 %rs<2>; -; SM30-NEXT: .reg .b32 %r<18>; +; SM30-NEXT: .reg .b32 %r<17>; ; SM30-NEXT: .reg .b64 %rd<3>; ; SM30-EMPTY: ; SM30-NEXT: // %bb.0: @@ -546,23 +538,22 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: shl.b32 %r11, %r10, %r1; ; SM30-NEXT: not.b32 %r2, %r11; ; SM30-NEXT: cvt.u32.u16 %r12, %rs1; -; SM30-NEXT: and.b32 %r13, %r12, 255; -; SM30-NEXT: shl.b32 %r3, %r13, %r1; +; SM30-NEXT: shl.b32 %r3, %r12, %r1; ; SM30-NEXT: shl.b32 %r4, %r7, %r1; -; SM30-NEXT: ld.b32 %r14, [%rd1]; -; SM30-NEXT: and.b32 %r17, %r14, %r2; +; SM30-NEXT: ld.b32 %r13, [%rd1]; +; SM30-NEXT: and.b32 %r16, %r13, %r2; ; SM30-NEXT: $L__BB4_1: // %partword.cmpxchg.loop ; SM30-NEXT: // =>This Inner Loop Header: Depth=1 -; SM30-NEXT: or.b32 %r15, %r17, %r3; -; SM30-NEXT: or.b32 %r16, %r17, %r4; -; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15; -; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM30-NEXT: or.b32 %r14, %r16, %r3; +; SM30-NEXT: or.b32 %r15, %r16, %r4; +; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14; +; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM30-NEXT: @%p1 bra $L__BB4_3; ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM30-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM30-NEXT: and.b32 %r6, %r5, %r2; -; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM30-NEXT: mov.b32 %r17, %r6; +; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM30-NEXT: mov.b32 %r16, %r6; ; SM30-NEXT: @%p2 bra $L__BB4_1; ; SM30-NEXT: $L__BB4_3: // %partword.cmpxchg.end ; SM30-NEXT: membar.sys; @@ -573,7 +564,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70: { ; SM70-NEXT: .reg .pred %p<3>; ; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<18>; +; SM70-NEXT: .reg .b32 %r<17>; ; SM70-NEXT: .reg .b64 %rd<3>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: @@ -589,23 +580,22 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: shl.b32 %r11, %r10, %r1; ; SM70-NEXT: not.b32 %r2, %r11; ; SM70-NEXT: cvt.u32.u16 %r12, %rs1; -; SM70-NEXT: and.b32 %r13, %r12, 255; -; SM70-NEXT: shl.b32 %r3, %r13, %r1; +; SM70-NEXT: shl.b32 %r3, %r12, %r1; ; SM70-NEXT: shl.b32 %r4, %r7, %r1; -; SM70-NEXT: ld.b32 %r14, [%rd1]; -; SM70-NEXT: and.b32 %r17, %r14, %r2; +; SM70-NEXT: ld.b32 %r13, [%rd1]; +; SM70-NEXT: and.b32 %r16, %r13, %r2; ; SM70-NEXT: $L__BB4_1: // %partword.cmpxchg.loop ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 -; SM70-NEXT: or.b32 %r15, %r17, %r3; -; SM70-NEXT: or.b32 %r16, %r17, %r4; -; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15; -; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM70-NEXT: or.b32 %r14, %r16, %r3; +; SM70-NEXT: or.b32 %r15, %r16, %r4; +; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14; +; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM70-NEXT: @%p1 bra $L__BB4_3; ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM70-NEXT: and.b32 %r6, %r5, %r2; -; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM70-NEXT: mov.b32 %r17, %r6; +; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM70-NEXT: mov.b32 %r16, %r6; ; SM70-NEXT: @%p2 bra $L__BB4_1; ; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; |
