summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/a-v-ds-atomicrmw.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/a-v-ds-atomicrmw.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/a-v-ds-atomicrmw.ll1123
1 files changed, 1123 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/a-v-ds-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-ds-atomicrmw.ll
new file mode 100644
index 000000000000..4c62409a85c0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/a-v-ds-atomicrmw.ll
@@ -0,0 +1,1123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
+
+;---------------------------------------------------------------------
+; xchg i32 cases
+;---------------------------------------------------------------------
+
+; Input and result use AGPR
+define void @ds_atomic_xchg_i32_ret_a_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_a_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+; Input is AGPR, result used as VGPR.
+define void @ds_atomic_xchg_i32_ret_a_v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_a_v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "v"(i32 %result)
+ ret void
+}
+
+; Input is VGPR, result used as AGPR
+define void @ds_atomic_xchg_i32_ret_v_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_v_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=v"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+; Input is AV, result also used as AV
+define void @ds_atomic_xchg_i32_ret_av_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_av_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+; Input is AV, used as v
+define void @ds_atomic_xchg_i32_ret_av_v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_av_v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "v"(i32 %result)
+ ret void
+}
+
+; Input is AV, used as a
+define void @ds_atomic_xchg_i32_ret_av_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_av_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+; Input is a, result used as AV
+define void @ds_atomic_xchg_i32_ret_a_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_a_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+; Input is v, result used as AV
+define void @ds_atomic_xchg_i32_ret_v_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_v_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=v"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+define void @ds_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_ret_av_av_no_agprs:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a2, v40 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a3, v41 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a4, v42 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a5, v43 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a6, v44 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a7, v45 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a8, v46 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a9, v47 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a10, v56 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a11, v57 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a12, v58 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a13, v59 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a14, v60 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a15, v61 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a16, v62 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a17, v63 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[0:31]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_write_b32 a18, v31 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a31, v18 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a30, v19 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a29, v20 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a28, v21 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a27, v22 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a26, v23 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a25, v24 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a24, v25 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a23, v26 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a22, v27 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a21, v28 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a20, v29 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a19, v30 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; CHECK-NEXT: v_accvgpr_read_b32 v18, a31 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v19, a30 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v20, a29 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v21, a28 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v22, a27 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v23, a26 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v24, a25 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v25, a24 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v26, a23 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v27, a22 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v28, a21 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v29, a20 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v30, a19 ; Reload Reuse
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_accvgpr_read_b32 v31, a18 ; Reload Reuse
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:31]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v63, a17 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v62, a16 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v61, a15 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v60, a14 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v59, a13 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v58, a12 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v57, a11 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v56, a10 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v47, a9 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v46, a8 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v45, a7 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v44, a6 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v43, a5 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v42, a4 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v41, a3 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v40, a2 ; Reload Reuse
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %vgpr.def = call { <32 x i32>, <32 x i32> } asm sideeffect "; def $0", "=${v[0:31]},=${v[32:63]}"()
+ %vgpr.0 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 0
+ %vgpr.1 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 1
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm sideeffect "; use $0", "{v[0:31]},{v[32:63]}"(<32 x i32> %vgpr.0, <32 x i32> %vgpr.1)
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+define void @ds_atomic_xchg_i32_noret_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_noret_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %unused = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ ret void
+}
+
+define void @ds_atomic_xchg_i32_noret_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i32_noret_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %unused = atomicrmw xchg ptr addrspace(3) %ptr, i32 %data seq_cst
+ ret void
+}
+
+;---------------------------------------------------------------------
+; xchg i64 cases
+;---------------------------------------------------------------------
+
+; Input and result use AGPR
+define void @ds_atomic_xchg_i64_ret_a_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i64_ret_a_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
+; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=a"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "a"(i64 %result)
+ ret void
+}
+
+; Input is AGPR, result used as VGPR.
+define void @ds_atomic_xchg_i64_ret_a_v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i64_ret_a_v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
+; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=a"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "v"(i64 %result)
+ ret void
+}
+
+; Input is VGPR, result used as AGPR
+define void @ds_atomic_xchg_i64_ret_v_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i64_ret_v_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=v"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "a"(i64 %result)
+ ret void
+}
+
+; Input is AV, result also used as AV
+define void @ds_atomic_xchg_i64_ret_av_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i64_ret_av_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=^VA"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "^VA"(i64 %result)
+ ret void
+}
+
+; Input is AV, used as v
+define void @ds_atomic_xchg_i64_ret_av_v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i64_ret_av_v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=^VA"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "v"(i64 %result)
+ ret void
+}
+
+; Input is AV, used as a
+define void @ds_atomic_xchg_i64_ret_av_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i64_ret_av_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=^VA"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "a"(i64 %result)
+ ret void
+}
+
+; Input is a, result used as AV
+define void @ds_atomic_xchg_i64_ret_a_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i64_ret_a_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
+; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=a"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "^VA"(i64 %result)
+ ret void
+}
+
+; Input is v, result used as AV
+define void @ds_atomic_xchg_i64_ret_v_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i64_ret_v_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=v"()
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "^VA"(i64 %result)
+ ret void
+}
+
+define void @ds_atomic_xchg_i64_noret_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i64_noret_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
+; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
+; CHECK-NEXT: ds_wrxchg_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i64 0, i64 10
+ %data = call i64 asm "; def $0", "=a"()
+ %unused = atomicrmw xchg ptr addrspace(3) %ptr, i64 %data seq_cst
+ ret void
+}
+
+define void @ds_atomic_xchg_i64_noret_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xchg_i64_noret_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_wrxchg_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i64 0, i64 10
+ %data = call i64 asm "; def $0", "=^VA"()
+ %unused = atomicrmw xchg ptr addrspace(3) %ptr, i64 %data seq_cst
+ ret void
+}
+
+;---------------------------------------------------------------------
+; xor i32 cases
+;---------------------------------------------------------------------
+
+; Input and result use AGPR
+define void @ds_atomic_xor_i32_ret_a_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_ret_a_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
+; CHECK-NEXT: ds_xor_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+; Input is AGPR, result used as VGPR.
+define void @ds_atomic_xor_i32_ret_a_v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_ret_a_v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
+; CHECK-NEXT: ds_xor_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "v"(i32 %result)
+ ret void
+}
+
+; Input is VGPR, result used as AGPR
+define void @ds_atomic_xor_i32_ret_v_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_ret_v_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=v"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+; Input is AV, result also used as AV
+define void @ds_atomic_xor_i32_ret_av_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_ret_av_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+; Input is AV, used as v
+define void @ds_atomic_xor_i32_ret_av_v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_ret_av_v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "v"(i32 %result)
+ ret void
+}
+
+; Input is AV, used as a
+define void @ds_atomic_xor_i32_ret_av_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_ret_av_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "a"(i32 %result)
+ ret void
+}
+
+; Input is a, result used as AV
+define void @ds_atomic_xor_i32_ret_a_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_ret_a_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
+; CHECK-NEXT: ds_xor_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+; Input is v, result used as AV
+define void @ds_atomic_xor_i32_ret_v_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_ret_v_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=v"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+define void @ds_atomic_xor_i32_ret_av_av_no_agprs(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_ret_av_av_no_agprs:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a2, v40 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a3, v41 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a4, v42 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a5, v43 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a6, v44 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a7, v45 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a8, v46 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a9, v47 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a10, v56 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a11, v57 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a12, v58 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a13, v59 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a14, v60 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a15, v61 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a16, v62 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a17, v63 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[0:31]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_write_b32 a18, v31 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
+; CHECK-NEXT: ds_xor_rtn_b32 v0, v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a31, v18 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a30, v19 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a29, v20 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a28, v21 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a27, v22 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a26, v23 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a25, v24 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a24, v25 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a23, v26 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a22, v27 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a21, v28 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a20, v29 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a19, v30 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; CHECK-NEXT: v_accvgpr_read_b32 v18, a31 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v19, a30 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v20, a29 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v21, a28 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v22, a27 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v23, a26 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v24, a25 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v25, a24 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v26, a23 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v27, a22 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v28, a21 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v29, a20 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v30, a19 ; Reload Reuse
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_accvgpr_read_b32 v31, a18 ; Reload Reuse
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:31]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v63, a17 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v62, a16 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v61, a15 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v60, a14 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v59, a13 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v58, a12 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v57, a11 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v56, a10 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v47, a9 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v46, a8 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v45, a7 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v44, a6 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v43, a5 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v42, a4 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v41, a3 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v40, a2 ; Reload Reuse
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %vgpr.def = call { <32 x i32>, <32 x i32> } asm sideeffect "; def $0", "=${v[0:31]},=${v[32:63]}"()
+ %vgpr.0 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 0
+ %vgpr.1 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 1
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ call void asm sideeffect "; use $0", "{v[0:31]},{v[32:63]}"(<32 x i32> %vgpr.0, <32 x i32> %vgpr.1)
+ call void asm "; use $0", "^VA"(i32 %result)
+ ret void
+}
+
+define void @ds_atomic_xor_i32_noret_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_noret_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_b32 v0, a0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=a"()
+ %unused = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ ret void
+}
+
+define void @ds_atomic_xor_i32_noret_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i32_noret_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v1
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_b32 v0, v1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i32 asm "; def $0", "=^VA"()
+ %unused = atomicrmw xor ptr addrspace(3) %ptr, i32 %data seq_cst
+ ret void
+}
+
+;---------------------------------------------------------------------
+; xor i64 cases
+;---------------------------------------------------------------------
+
+; Input and result use AGPR
+define void @ds_atomic_xor_i64_ret_a_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i64_ret_a_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
+; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
+; CHECK-NEXT: ds_xor_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=a"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "a"(i64 %result)
+ ret void
+}
+
+; Input is AGPR, result used as VGPR.
+define void @ds_atomic_xor_i64_ret_a_v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i64_ret_a_v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
+; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
+; CHECK-NEXT: ds_xor_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=a"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "v"(i64 %result)
+ ret void
+}
+
+; Input is VGPR, result used as AGPR
+define void @ds_atomic_xor_i64_ret_v_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i64_ret_v_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=v"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "a"(i64 %result)
+ ret void
+}
+
+; Input is AV, result also used as AV
+define void @ds_atomic_xor_i64_ret_av_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i64_ret_av_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=^VA"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "^VA"(i64 %result)
+ ret void
+}
+
+; Input is AV, used as v
+define void @ds_atomic_xor_i64_ret_av_v(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i64_ret_av_v:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=^VA"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "v"(i64 %result)
+ ret void
+}
+
+; Input is AV, used as a
+define void @ds_atomic_xor_i64_ret_av_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i64_ret_av_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=^VA"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "a"(i64 %result)
+ ret void
+}
+
+; Input is a, result used as AV
+define void @ds_atomic_xor_i64_ret_a_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i64_ret_a_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v3, a1
+; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
+; CHECK-NEXT: ds_xor_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=a"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "^VA"(i64 %result)
+ ret void
+}
+
+; Input is v, result used as AV
+define void @ds_atomic_xor_i64_ret_v_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i64_ret_v_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_rtn_b64 v[0:1], v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i32 0, i32 10
+ %data = call i64 asm "; def $0", "=v"()
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i64 %data seq_cst
+ call void asm "; use $0", "^VA"(i64 %result)
+ ret void
+}
+
+define void @ds_atomic_xor_i64_noret_a(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i64_noret_a:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_b64 v0, a[0:1]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i64 0, i64 10
+ %data = call i64 asm "; def $0", "=a"()
+ %unused = atomicrmw xor ptr addrspace(3) %ptr, i64 %data seq_cst
+ ret void
+}
+
+define void @ds_atomic_xor_i64_noret_av(ptr addrspace(3) %ptr) #0 {
+; CHECK-LABEL: ds_atomic_xor_i64_noret_av:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v[2:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ds_xor_b64 v0, v[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(3) %ptr, i64 0, i64 10
+ %data = call i64 asm "; def $0", "=^VA"()
+ %unused = atomicrmw xor ptr addrspace(3) %ptr, i64 %data seq_cst
+ ret void
+}
+
+attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" }