diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll')
| -rw-r--r-- | llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll | 380 |
1 files changed, 181 insertions, 199 deletions
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll index 6e480996e7e6..d895c715ab3c 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | FileCheck %s --check-prefix=SM60 -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %} +; RUN: %if ptxas-sm_60 && ptxas-isa-5.0 %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %} define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-LABEL: monotonic_monotonic_i8_global_cta( ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -22,23 +22,22 @@ define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB0_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB0_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB0_1; ; SM60-NEXT: $L__BB0_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r12; @@ -52,7 +51,7 @@ define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -67,23 +66,22 @@ define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB1_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB1_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB1_1; ; SM60-NEXT: $L__BB1_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -98,7 +96,7 @@ define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -114,23 +112,22 @@ define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB2_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB2_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB2_1; ; SM60-NEXT: $L__BB2_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -145,7 +142,7 @@ define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -160,23 +157,22 @@ define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB3_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB3_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB3_1; ; SM60-NEXT: $L__BB3_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -191,7 +187,7 @@ define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -206,23 +202,22 @@ define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB4_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB4_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB4_1; ; SM60-NEXT: $L__BB4_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -237,7 +232,7 @@ define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -253,23 +248,22 @@ define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB5_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB5_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB5_1; ; SM60-NEXT: $L__BB5_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -284,7 +278,7 @@ define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -300,23 +294,22 @@ define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB6_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB6_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB6_1; ; SM60-NEXT: $L__BB6_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r12; @@ -330,7 +323,7 @@ define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -346,23 +339,22 @@ define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB7_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB7_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB7_1; ; SM60-NEXT: $L__BB7_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -377,7 +369,7 @@ define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -393,23 +385,22 @@ define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB8_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB8_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB8_1; ; SM60-NEXT: $L__BB8_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -424,7 +415,7 @@ define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -440,23 +431,22 @@ define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB9_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB9_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB9_1; ; SM60-NEXT: $L__BB9_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -471,7 +461,7 @@ define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -487,23 +477,22 @@ define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB10_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB10_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB10_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB10_1; ; SM60-NEXT: $L__BB10_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -518,7 +507,7 @@ define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -534,23 +523,22 @@ define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB11_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB11_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB11_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB11_1; ; SM60-NEXT: $L__BB11_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -565,7 +553,7 @@ define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -581,23 +569,22 @@ define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 % ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB12_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB12_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB12_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB12_1; ; SM60-NEXT: $L__BB12_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -612,7 +599,7 @@ define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -628,23 +615,22 @@ define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB13_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB13_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB13_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB13_1; ; SM60-NEXT: $L__BB13_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -659,7 +645,7 @@ define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -675,23 +661,22 @@ define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB14_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB14_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB14_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB14_1; ; SM60-NEXT: $L__BB14_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -1899,7 +1884,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -1915,23 +1900,22 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.global.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.global.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB60_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.sys.global.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.sys.global.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB60_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB60_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB60_1; ; SM60-NEXT: $L__BB60_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1997,7 +1981,7 @@ define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) { ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -2013,23 +1997,22 @@ define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB64_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB64_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB64_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB64_1; ; SM60-NEXT: $L__BB64_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; @@ -2044,7 +2027,7 @@ define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %ne ; SM60: { ; SM60-NEXT: .reg .pred %p<3>; ; SM60-NEXT: .reg .b16 %rs<2>; -; SM60-NEXT: .reg .b32 %r<18>; +; SM60-NEXT: .reg .b32 %r<17>; ; SM60-NEXT: .reg .b64 %rd<3>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: @@ -2060,23 +2043,22 @@ define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: shl.b32 %r11, %r10, %r1; ; SM60-NEXT: not.b32 %r2, %r11; ; SM60-NEXT: cvt.u32.u16 %r12, %rs1; -; SM60-NEXT: and.b32 %r13, %r12, 255; -; SM60-NEXT: shl.b32 %r3, %r13, %r1; +; SM60-NEXT: shl.b32 %r3, %r12, %r1; ; SM60-NEXT: shl.b32 %r4, %r7, %r1; -; SM60-NEXT: ld.shared.b32 %r14, [%rd1]; -; SM60-NEXT: and.b32 %r17, %r14, %r2; +; SM60-NEXT: ld.shared.b32 %r13, [%rd1]; +; SM60-NEXT: and.b32 %r16, %r13, %r2; ; SM60-NEXT: $L__BB65_1: // %partword.cmpxchg.loop ; SM60-NEXT: // =>This Inner Loop Header: Depth=1 -; SM60-NEXT: or.b32 %r15, %r17, %r3; -; SM60-NEXT: or.b32 %r16, %r17, %r4; -; SM60-NEXT: atom.cta.shared.cas.b32 %r5, [%rd1], %r16, %r15; -; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16; +; SM60-NEXT: or.b32 %r14, %r16, %r3; +; SM60-NEXT: or.b32 %r15, %r16, %r4; +; SM60-NEXT: atom.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14; +; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15; ; SM60-NEXT: @%p1 bra $L__BB65_3; ; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure ; SM60-NEXT: // in Loop: Header=BB65_1 Depth=1 ; SM60-NEXT: and.b32 %r6, %r5, %r2; -; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6; -; SM60-NEXT: mov.b32 %r17, %r6; +; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6; +; SM60-NEXT: mov.b32 %r16, %r6; ; SM60-NEXT: @%p2 bra $L__BB65_1; ; SM60-NEXT: $L__BB65_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.cta; |
