diff options
| author | Aiden Grossman <aidengrossman@google.com> | 2025-11-06 08:06:41 +0000 |
|---|---|---|
| committer | Aiden Grossman <aidengrossman@google.com> | 2025-11-06 08:06:41 +0000 |
| commit | 0b8d9fc2999f630c32ddeb8c8376bf24a4106156 (patch) | |
| tree | a5e9e2efa16d001e7f60b13559081e050fb85be4 | |
| parent | 1b232e544b601ebe6a07c38bb081ec06d9ffa15c (diff) | |
| parent | 9f5811ec6bd5e9f99dd22c4a06e6e984cb15ae4b (diff) | |
[𝘀𝗽𝗿] changes introduced through rebaseusers/boomanaiden154/main.compiler-rtsanitizers-mark-three-tests-as-unsupported-on-android
Created using spr 1.3.7
[skip ci]
39 files changed, 1305 insertions, 2442 deletions
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index d3cca82b4bdf..40fc66ea12e3 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -454,7 +454,7 @@ LANGOPT(BranchTargetEnforcement, 1, 0, NotCompatible, "Branch-target enforcement LANGOPT(BranchProtectionPAuthLR, 1, 0, NotCompatible, "Use PC as a diversifier using PAuthLR NOP instructions.") LANGOPT(GuardedControlStack, 1, 0, NotCompatible, "Guarded control stack enabled") -LANGOPT(SpeculativeLoadHardening, 1, 0, NotCompatible, "Speculative load hardening enabled") +LANGOPT(SpeculativeLoadHardening, 1, 0, Benign, "Speculative load hardening enabled") LANGOPT(RelativeCXXABIVTables, 1, 0, NotCompatible, "Use an ABI-incompatible v-table layout that uses relative references") diff --git a/clang/test/ClangScanDeps/strip-codegen-args.m b/clang/test/ClangScanDeps/strip-codegen-args.m index 71171f498338..f2cec6281f7d 100644 --- a/clang/test/ClangScanDeps/strip-codegen-args.m +++ b/clang/test/ClangScanDeps/strip-codegen-args.m @@ -16,6 +16,7 @@ // CHECK-NOT: "-flto" // CHECK-NOT: "-fno-autolink" // CHECK-NOT: "-mrelax-relocations=no" +// CHECK-NOT: "-mspeculative-load-hardening" // CHECK: ] // CHECK: "name": "A" // CHECK: } @@ -39,6 +40,11 @@ "command": "clang -Imodules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-modules -O2 -flto=full -fsyntax-only DIR/t3.m", "file": "DIR/t2.m" } + { + "directory": "DIR", + "command": "clang -Imodules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-modules -O2 -mspeculative-load-hardening -fsyntax-only DIR/t3.m", + "file": "DIR/t3.m" + } ] //--- modules/A/module.modulemap diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp index 6312e61f5e62..4c0d26642863 100644 --- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp @@ -1122,13 +1122,7 @@ CUDAIntrinsicLibrary::genSyncThreadsOr(mlir::Type resultType, void CUDAIntrinsicLibrary::genSyncWarp( llvm::ArrayRef<fir::ExtendedValue> args) { assert(args.size() == 1); - constexpr llvm::StringLiteral funcName = "llvm.nvvm.bar.warp.sync"; - mlir::Value mask = fir::getBase(args[0]); - mlir::FunctionType funcType = - mlir::FunctionType::get(builder.getContext(), {mask.getType()}, {}); - auto funcOp = builder.createFunction(loc, funcName, funcType); - llvm::SmallVector<mlir::Value> argsList{mask}; - fir::CallOp::create(builder, loc, funcOp, argsList); + mlir::NVVM::SyncWarpOp::create(builder, loc, fir::getBase(args[0])); } // THIS_GRID diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 2d2c801b48f4..9f8f74a0c7b5 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -105,7 +105,7 @@ end ! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>} ! CHECK: nvvm.barrier0 -! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath<contract> : (i32) -> () +! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32 ! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath<contract> : () -> () ! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath<contract> : () -> () ! CHECK: fir.call @llvm.nvvm.membar.sys() fastmath<contract> : () -> () @@ -219,7 +219,7 @@ end ! CHECK-LABEL: func.func @_QPhost1() ! CHECK: cuf.kernel ! CHECK: nvvm.barrier0 -! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath<contract> : (i32) -> () +! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32 ! CHECK: fir.call @llvm.nvvm.barrier0.and(%c1{{.*}}) fastmath<contract> : (i32) -> i32 ! CHECK: fir.call @llvm.nvvm.barrier0.popc(%c1{{.*}}) fastmath<contract> : (i32) -> i32 ! CHECK: fir.call @llvm.nvvm.barrier0.or(%c1{{.*}}) fastmath<contract> : (i32) -> i32 diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index ca83af9824b8..2ac69c38ebff 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -103,7 +103,6 @@ steps: queue: libcxx-builders os: aix <<: *common - skip: "https://github.com/llvm/llvm-project/issues/162516" - label: AIX (64-bit) command: libcxx/utils/ci/run-buildbot aix @@ -115,7 +114,6 @@ steps: queue: libcxx-builders os: aix <<: *common - skip: "https://github.com/llvm/llvm-project/issues/162516" - group: ':freebsd: FreeBSD' steps: diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h index 7cc78d4be279..fc41641fd5cf 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h @@ -211,6 +211,21 @@ public: return FilteredView(Libraries.begin(), Libraries.end(), S, K); } + using LibraryFilterFn = std::function<bool(const LibraryInfo &)>; + void getLibraries(LibState S, PathType K, + std::vector<std::shared_ptr<LibraryInfo>> &Outs, + LibraryFilterFn Filter = nullptr) const { + std::shared_lock<std::shared_mutex> Lock(Mtx); + for (const auto &[_, Entry] : Libraries) { + const auto &Info = *Entry; + if (Info.getKind() != K || Info.getState() != S) + continue; + if (Filter && !Filter(Info)) + continue; + Outs.push_back(Entry); + } + } + void forEachLibrary(const LibraryVisitor &visitor) const { std::unique_lock<std::shared_mutex> Lock(Mtx); for (const auto &[_, entry] : Libraries) { @@ -220,14 +235,14 @@ public: } bool isLoaded(StringRef Path) const { - std::unique_lock<std::shared_mutex> Lock(Mtx); + std::shared_lock<std::shared_mutex> Lock(Mtx); if (auto It = Libraries.find(Path.str()); It != Libraries.end()) return It->second->getState() == LibState::Loaded; return false; } bool isQueried(StringRef Path) const { - std::unique_lock<std::shared_mutex> Lock(Mtx); + std::shared_lock<std::shared_mutex> Lock(Mtx); if (auto It = Libraries.find(Path.str()); It != Libraries.end()) return It->second->getState() == LibState::Queried; return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 46c4bb85a742..816b7ba92bd8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4046,6 +4046,8 @@ static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) { m_ConstInt(AndMask)))) { // Type Legalisation Pattern: // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff) + if (BitWidthDiff.getZExtValue() >= BitWidth) + return SDValue(); unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue(); if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth)) return SDValue(); diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp index 35da82a10306..7e1d5285463c 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp @@ -184,9 +184,9 @@ class SymbolSearchContext { public: SymbolSearchContext(SymbolQuery &Q) : Q(Q) {} - bool hasSearched(LibraryInfo *Lib) const { return Searched.count(Lib); } + bool hasSearched(const LibraryInfo *Lib) const { return Searched.count(Lib); } - void markSearched(LibraryInfo *Lib) { Searched.insert(Lib); } + void markSearched(const LibraryInfo *Lib) { Searched.insert(Lib); } inline bool allResolved() const { return Q.allResolved(); } @@ -194,7 +194,7 @@ public: private: SymbolQuery &Q; - DenseSet<LibraryInfo *> Searched; + DenseSet<const LibraryInfo *> Searched; }; void LibraryResolver::resolveSymbolsInLibrary( @@ -226,19 +226,18 @@ void LibraryResolver::resolveSymbolsInLibrary( return EnumerateResult::Continue; }, Opts); + }; + if (!Lib.hasFilter()) { + LLVM_DEBUG(dbgs() << "Building filter for library: " << Lib.getFullPath() + << "\n";); + enumerateSymbolsIfNeeded(); if (DiscoveredSymbols.empty()) { LLVM_DEBUG(dbgs() << " No symbols and remove library : " << Lib.getFullPath() << "\n";); LibMgr.removeLibrary(Lib.getFullPath()); return; } - }; - - if (!Lib.hasFilter()) { - LLVM_DEBUG(dbgs() << "Building filter for library: " << Lib.getFullPath() - << "\n";); - enumerateSymbolsIfNeeded(); SmallVector<StringRef> SymbolVec; SymbolVec.reserve(DiscoveredSymbols.size()); for (const auto &KV : DiscoveredSymbols) @@ -288,11 +287,15 @@ void LibraryResolver::searchSymbolsInLibraries( SymbolSearchContext Ctx(Q); while (!Ctx.allResolved()) { + std::vector<std::shared_ptr<LibraryInfo>> Libs; + LibMgr.getLibraries(S, K, Libs, [&](const LibraryInfo &Lib) { + return !Ctx.hasSearched(&Lib); + }); - for (auto &Lib : LibMgr.getView(S, K)) { - if (Ctx.hasSearched(Lib.get())) - continue; + if (Libs.empty() && !scanLibrariesIfNeeded(K, scanBatchSize)) + break; // no more new libs to scan + for (auto &Lib : Libs) { // can use Async here? resolveSymbolsInLibrary(*Lib, Ctx.query(), Config.Options); Ctx.markSearched(Lib.get()); @@ -300,12 +303,6 @@ void LibraryResolver::searchSymbolsInLibraries( if (Ctx.allResolved()) return; } - - if (Ctx.allResolved()) - return; - - if (!scanLibrariesIfNeeded(K, scanBatchSize)) - break; // no more new libs to scan } }; diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp index d93f68622fcc..32f6dbefb848 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp @@ -50,7 +50,7 @@ void handleError(Error Err, StringRef context = "") { } bool ObjectFileLoader::isArchitectureCompatible(const object::ObjectFile &Obj) { - Triple HostTriple(sys::getDefaultTargetTriple()); + Triple HostTriple(sys::getProcessTriple()); Triple ObjTriple = Obj.makeTriple(); LLVM_DEBUG({ diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index e3b0a1bec53e..e62fdb678684 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -312,7 +312,7 @@ public: } bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const { - if (!ST->hasSVE()) + if (!ST->isSVEorStreamingSVEAvailable()) return false; // For fixed vectors, avoid scalarization if using SVE for them. diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 4fe194c813c4..54d94b1f8682 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2366,18 +2366,6 @@ def isGFX8GFX9NotGFX90A : " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; -// Pre-90A GFX9s allow the NV bit in FLAT instructions. -def isNVAllowedInFlat : - Predicate<"!Subtarget->hasGFX90AInsts() &&" - " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, - AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX90AInsts), (not FeatureGFX10Insts))>; - -// GFX8 or GFX90A+ do not allow the NV bit in FLAT instructions. -def isNVNotAllowedInFlat : - Predicate<"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) ||" - " ((Subtarget->getGeneration() == AMDGPUSubtarget::GFX9) && Subtarget->hasGFX90AInsts())">, - AssemblerPredicate <(any_of FeatureVolcanicIslands, FeatureGFX90AInsts)>; - def isGFX90AOnly : Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">, AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 2808c44c59c1..09338c533fdf 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1602,11 +1602,6 @@ public: bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); } - bool isFlatInstAndNVAllowed(const MCInst &Inst) const { - uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; - return (TSFlags & SIInstrFlags::FLAT) && isGFX9() && !isGFX90A(); - } - AMDGPUTargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast<AMDGPUTargetStreamer &>(TS); @@ -5375,7 +5370,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]); Error(S, "scale_offset is not supported on this GPU"); } - if ((CPol & CPol::NV) && !isFlatInstAndNVAllowed(Inst)) { + if (CPol & CPol::NV) { SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]); @@ -7150,13 +7145,6 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { unsigned Enabled = 0, Seen = 0; for (;;) { SMLoc S = getLoc(); - - if (isGFX9() && trySkipId("nv")) { - Enabled |= CPol::NV; - Seen |= CPol::NV; - continue; - } - bool Disabling; unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); if (!CPol) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 6ef224148e44..8ea64d17417f 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -125,7 +125,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : bits<7> saddr; bits<10> vdst; - bits<6> cpol; + bits<5> cpol; // Only valid on gfx9 bits<1> lds = ps.lds; // LDS DMA for global and scratch @@ -2693,52 +2693,29 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); } -class FLAT_Real_vi_ex_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : - FLAT_Real_vi <op, ps, has_sccb> { - let AssemblerPredicate = isNVNotAllowedInFlat; -} - -class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : - FLAT_Real_vi <op, ps, has_sccb> { - let AssemblerPredicate = isNVAllowedInFlat; - let Subtarget = SIEncodingFamily.GFX9; - let DecoderNamespace = "GFX9"; - let Inst{55} = cpol{CPolBit.NV}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit. -} - -multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> { - def _vi: FLAT_Real_vi_ex_gfx9<op, ps, has_sccb>; - def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>; -} - multiclass FLAT_Real_AllAddr_vi<bits<7> op, bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { - defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; - defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; -} - -multiclass FLAT_Real_AllAddr_vi_ex_gfx9<bits<7> op, - bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { - def _vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; - def _SADDR_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; + def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; + def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; } class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> : FLAT_Real <op, ps>, SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> { let AssemblerPredicate = isGFX940Plus; - let DecoderNamespace = "GFX940"; + let DecoderNamespace = "GFX9"; let Inst{13} = ps.sve; let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); } multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> { - let OtherPredicates = [isGFX8GFX9NotGFX940] in { - defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>; + def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> { + let AssemblerPredicate = isGFX8GFX9NotGFX940; + let OtherPredicates = [isGFX8GFX9NotGFX940]; + } + def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> { + let DecoderNamespace = "GFX9"; } - - defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; - let AssemblerPredicate = isGFX940Plus in { def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; @@ -2751,11 +2728,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op, bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { let OtherPredicates = [isGFX8GFX9NotGFX940] in { - let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in { - defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb>; + def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> { + let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds"; } - let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in { - defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; + def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> { + let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds"; } } @@ -2771,66 +2748,47 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> { def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; } -defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>; -defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>; -defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>; -defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>; -defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>; -defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>; -defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>; -defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>; - -defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>; -defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>; -defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>; -defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; -defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>; -defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>; -defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>; -defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>; - -defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>; -defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; -defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>; -defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; -defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>; -defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; +def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; +def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; +def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; +def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; +def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; +def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; +def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; +def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; + +def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; +def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; +def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; +def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; +def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; +def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; +def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; +def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; + +def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; +def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; +def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; +def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; +def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; +def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; multiclass FLAT_Real_Atomics_vi <bits<7> op, bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { defvar ps = !cast<FLAT_Pseudo>(NAME); - defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; - defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; - def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>; -} - -multiclass FLAT_Real_Atomics_vi_ex_gfx9 <bits<7> op, - bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { - defvar ps = !cast<FLAT_Pseudo>(NAME); - def _vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; - def _RTN_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; - - def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>; + def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; + def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; + def _RTN_agpr_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>; } multiclass FLAT_Global_Real_Atomics_vi<bits<7> op, bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : FLAT_Real_AllAddr_vi<op, has_sccb> { - defm _RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; - defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; - - def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>; - def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>; -} - -multiclass FLAT_Global_Real_Atomics_vi_ex_gfx9<bits<7> op, - bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : - FLAT_Real_AllAddr_vi_ex_gfx9<op, has_sccb> { - def _RTN_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; - def _SADDR_RTN_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; + def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; + def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; - def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>; - def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>; + def _RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>; + def _SADDR_RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>; } defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>; @@ -2992,10 +2950,10 @@ let AssemblerPredicate = isGFX940Plus in { defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; - defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_ex_gfx9<0x4d>; - defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_ex_gfx9<0x4e>; - defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_ex_gfx9<0x52>; - defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_ex_gfx9<0x52>; + defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>; + defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>; + defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>; + defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; } // End AssemblerPredicate = isGFX940Plus //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 3e6f35dbf5e5..703ec0a4befa 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -186,12 +186,8 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo, O << " dlc"; if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI)) O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc"); - if (Imm & ~CPol::ALL_pregfx12) { - if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI)) - O << " nv"; - else - O << " /* unexpected cache policy bit */"; - } + if (Imm & ~CPol::ALL_pregfx12) + O << " /* unexpected cache policy bit */"; } void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope, diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 6616b3041059..84984a0871da 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1129,40 +1129,11 @@ bool SIFoldOperandsImpl::tryToFoldACImm( if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx)) return false; - MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) { appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold); return true; } - // TODO: Verify the following code handles subregisters correctly. - // TODO: Handle extract of global reference - if (UseOp.getSubReg()) - return false; - - if (!OpToFold.isReg()) - return false; - - Register UseReg = OpToFold.getReg(); - if (!UseReg.isVirtual()) - return false; - - // Maybe it is just a COPY of an immediate itself. - - // FIXME: Remove this handling. There is already special case folding of - // immediate into copy in foldOperand. This is looking for the def of the - // value the folding started from in the first place. - MachineInstr *Def = MRI->getVRegDef(UseReg); - if (Def && TII->isFoldableCopy(*Def)) { - MachineOperand &DefOp = Def->getOperand(1); - if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) { - FoldableDef FoldableImm(DefOp.getImm(), OpToFold.DefRC, - OpToFold.DefSubReg); - appendFoldCandidate(FoldList, UseMI, UseOpIdx, FoldableImm); - return true; - } - } - return false; } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index c89212dae72d..90a4723c9a3e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -756,6 +756,155 @@ LoongArchInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { return ArrayRef(TargetFlags); } +bool LoongArchInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, + Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const { + enum MemIOffsetType { + Imm14Shift2, + Imm12, + Imm11Shift1, + Imm10Shift2, + Imm9Shift3, + Imm8, + Imm8Shift1, + Imm8Shift2, + Imm8Shift3 + }; + + MemIOffsetType OT; + switch (MemI.getOpcode()) { + default: + return false; + case LoongArch::LDPTR_W: + case LoongArch::LDPTR_D: + case LoongArch::STPTR_W: + case LoongArch::STPTR_D: + OT = Imm14Shift2; + break; + case LoongArch::LD_B: + case LoongArch::LD_H: + case LoongArch::LD_W: + case LoongArch::LD_D: + case LoongArch::LD_BU: + case LoongArch::LD_HU: + case LoongArch::LD_WU: + case LoongArch::ST_B: + case LoongArch::ST_H: + case LoongArch::ST_W: + case LoongArch::ST_D: + case LoongArch::FLD_S: + case LoongArch::FLD_D: + case LoongArch::FST_S: + case LoongArch::FST_D: + case LoongArch::VLD: + case LoongArch::VST: + case LoongArch::XVLD: + case LoongArch::XVST: + case LoongArch::VLDREPL_B: + case LoongArch::XVLDREPL_B: + OT = Imm12; + break; + case LoongArch::VLDREPL_H: + case LoongArch::XVLDREPL_H: + OT = Imm11Shift1; + break; + case LoongArch::VLDREPL_W: + case LoongArch::XVLDREPL_W: + OT = Imm10Shift2; + break; + case LoongArch::VLDREPL_D: + case LoongArch::XVLDREPL_D: + OT = Imm9Shift3; + break; + case LoongArch::VSTELM_B: + case LoongArch::XVSTELM_B: + OT = Imm8; + break; + case LoongArch::VSTELM_H: + case LoongArch::XVSTELM_H: + OT = Imm8Shift1; + break; + case LoongArch::VSTELM_W: + case LoongArch::XVSTELM_W: + OT = Imm8Shift2; + break; + case LoongArch::VSTELM_D: + case LoongArch::XVSTELM_D: + OT = Imm8Shift3; + break; + } + + if (MemI.getOperand(0).getReg() == Reg) + return false; + + if ((AddrI.getOpcode() != LoongArch::ADDI_W && + AddrI.getOpcode() != LoongArch::ADDI_D) || + !AddrI.getOperand(1).isReg() || !AddrI.getOperand(2).isImm()) + return false; + + int64_t OldOffset = MemI.getOperand(2).getImm(); + int64_t Disp = AddrI.getOperand(2).getImm(); + int64_t NewOffset = OldOffset + Disp; + if (!STI.is64Bit()) + NewOffset = SignExtend64<32>(NewOffset); + + if (!(OT == Imm14Shift2 && isShiftedInt<14, 2>(NewOffset) && STI.hasUAL()) && + !(OT == Imm12 && isInt<12>(NewOffset)) && + !(OT == Imm11Shift1 && isShiftedInt<11, 1>(NewOffset)) && + !(OT == Imm10Shift2 && isShiftedInt<10, 2>(NewOffset)) && + !(OT == Imm9Shift3 && isShiftedInt<9, 3>(NewOffset)) && + !(OT == Imm8 && isInt<8>(NewOffset)) && + !(OT == Imm8Shift1 && isShiftedInt<8, 1>(NewOffset)) && + !(OT == Imm8Shift2 && isShiftedInt<8, 2>(NewOffset)) && + !(OT == Imm8Shift3 && isShiftedInt<8, 3>(NewOffset))) + return false; + + AM.BaseReg = AddrI.getOperand(1).getReg(); + AM.ScaledReg = 0; + AM.Scale = 0; + AM.Displacement = NewOffset; + AM.Form = ExtAddrMode::Formula::Basic; + return true; +} + +MachineInstr * +LoongArchInstrInfo::emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const { + const DebugLoc &DL = MemI.getDebugLoc(); + MachineBasicBlock &MBB = *MemI.getParent(); + + assert(AM.ScaledReg == 0 && AM.Scale == 0 && + "Addressing mode not supported for folding"); + + unsigned MemIOp = MemI.getOpcode(); + switch (MemIOp) { + default: + return BuildMI(MBB, MemI, DL, get(MemIOp)) + .addReg(MemI.getOperand(0).getReg(), + MemI.mayLoad() ? RegState::Define : 0) + .addReg(AM.BaseReg) + .addImm(AM.Displacement) + .setMemRefs(MemI.memoperands()) + .setMIFlags(MemI.getFlags()); + case LoongArch::VSTELM_B: + case LoongArch::VSTELM_H: + case LoongArch::VSTELM_W: + case LoongArch::VSTELM_D: + case LoongArch::XVSTELM_B: + case LoongArch::XVSTELM_H: + case LoongArch::XVSTELM_W: + case LoongArch::XVSTELM_D: + return BuildMI(MBB, MemI, DL, get(MemIOp)) + .addReg(MemI.getOperand(0).getReg(), 0) + .addReg(AM.BaseReg) + .addImm(AM.Displacement) + .addImm(MemI.getOperand(3).getImm()) + .setMemRefs(MemI.memoperands()) + .setMIFlags(MemI.getFlags()); + } +} + // Returns true if this is the sext.w pattern, addi.w rd, rs, 0. bool LoongArch::isSEXT_W(const MachineInstr &MI) { return MI.getOpcode() == LoongArch::ADDI_W && MI.getOperand(1).isReg() && diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index f25958a32bec..f69a558bdeca 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -93,6 +93,12 @@ public: ArrayRef<std::pair<unsigned, const char *>> getSerializableBitmaskMachineOperandTargetFlags() const override; + bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const override; + MachineInstr *emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const override; + protected: const LoongArchSubtarget &STI; }; diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 9de4c9d83792..92a9388e5cb7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -62,6 +62,11 @@ static cl::opt<bool> cl::desc("Enable the merge base offset pass"), cl::init(true), cl::Hidden); +static cl::opt<bool> + EnableSinkFold("loongarch-enable-sink-fold", + cl::desc("Enable sinking and folding of instruction copies"), + cl::init(true), cl::Hidden); + static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { return RM.value_or(Reloc::Static); } @@ -146,7 +151,9 @@ namespace { class LoongArchPassConfig : public TargetPassConfig { public: LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + setEnableSinkAndFold(EnableSinkFold); + } LoongArchTargetMachine &getLoongArchTargetMachine() const { return getTM<LoongArchTargetMachine>(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c3f100e3197b..995ae75da1c3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16496,32 +16496,42 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, } static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX, - unsigned ShY) { + unsigned ShY, bool AddX) { SDLoc DL(N); EVT VT = N->getValueType(0); SDValue X = N->getOperand(0); SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, DAG.getTargetConstant(ShY, DL, VT), X); return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, - DAG.getTargetConstant(ShX, DL, VT), Mul359); + DAG.getTargetConstant(ShX, DL, VT), AddX ? X : Mul359); } static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt) { + // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X)) switch (MulAmt) { case 5 * 3: - return getShlAddShlAdd(N, DAG, 2, 1); + return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false); case 9 * 3: - return getShlAddShlAdd(N, DAG, 3, 1); + return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false); case 5 * 5: - return getShlAddShlAdd(N, DAG, 2, 2); + return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false); case 9 * 5: - return getShlAddShlAdd(N, DAG, 3, 2); + return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false); case 9 * 9: - return getShlAddShlAdd(N, DAG, 3, 3); + return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false); default: - return SDValue(); + break; } + + // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X) + int ShX; + if (int ShY = isShifted359(MulAmt - 1, ShX)) { + assert(ShX != 0 && "MulAmt=4,6,10 handled before"); + if (ShX <= 3) + return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true); + } + return SDValue(); } // Try to expand a scalar multiply to a faster sequence. @@ -16581,41 +16591,30 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, DAG.getConstant(Shift, DL, VT)); } - // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) - if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt)) - return V; + // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples + // of 25 which happen to be quite common. + // (2/4/8 * 3/5/9 + 1) * 2^N + Shift = llvm::countr_zero(MulAmt); + if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) { + if (Shift == 0) + return V; + SDLoc DL(N); + return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT)); + } // If this is a power 2 + 2/4/8, we can use a shift followed by a single // shXadd. First check if this a sum of two power of 2s because that's // easy. Then count how many zeros are up to the first bit. - if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { - unsigned ScaleShift = llvm::countr_zero(MulAmt); - if (ScaleShift >= 1 && ScaleShift < 4) { - unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); - SDLoc DL(N); - SDValue Shift1 = - DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); - return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getTargetConstant(ScaleShift, DL, VT), Shift1); - } + if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) { + unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1))); + SDLoc DL(N); + SDValue Shift1 = + DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); + return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, + DAG.getTargetConstant(Shift, DL, VT), Shift1); } - // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x) - // This is the two instruction form, there are also three instruction - // variants we could implement. e.g. - // (2^(1,2,3) * 3,5,9 + 1) << C2 - // 2^(C1>3) * 3,5,9 +/- 1 - if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) { - assert(Shift != 0 && "MulAmt=4,6,10 handled before"); - if (Shift <= 3) { - SDLoc DL(N); - SDValue Mul359 = - DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, - DAG.getTargetConstant(ShXAmount, DL, VT), X); - return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359, - DAG.getTargetConstant(Shift, DL, VT), X); - } - } + // TODO: 2^(C1>3) * 3,5,9 +/- 1 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X)) if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) { @@ -16647,14 +16646,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359); } } - - // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples - // of 25 which happen to be quite common. - Shift = llvm::countr_zero(MulAmt); - if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) { - SDLoc DL(N); - return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT)); - } } if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt)) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index 18c462ffd0ff..dd2cffd7bd16 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -77,17 +77,53 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10CHECK-NEXT: s_endpgm ; -; GFX11CHECK-LABEL: sgpr_isnan_f16: -; GFX11CHECK: ; %bb.0: -; GFX11CHECK-NEXT: s_clause 0x1 -; GFX11CHECK-NEXT: s_load_b32 s2, s[4:5], 0x2c -; GFX11CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3 -; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11CHECK-NEXT: s_endpgm +; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16: +; GFX11SELDAG-TRUE16: ; %bb.0: +; GFX11SELDAG-TRUE16-NEXT: s_clause 0x1 +; GFX11SELDAG-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11SELDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11SELDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0 +; GFX11SELDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s2, v0.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo +; GFX11SELDAG-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11SELDAG-TRUE16-NEXT: s_endpgm +; +; GFX11SELDAG-FAKE16-LABEL: sgpr_isnan_f16: +; GFX11SELDAG-FAKE16: ; %bb.0: +; GFX11SELDAG-FAKE16-NEXT: s_clause 0x1 +; GFX11SELDAG-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11SELDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11SELDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX11SELDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11SELDAG-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s2, 3 +; GFX11SELDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11SELDAG-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11SELDAG-FAKE16-NEXT: s_endpgm +; +; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16: +; GFX11GLISEL-TRUE16: ; %bb.0: +; GFX11GLISEL-TRUE16-NEXT: s_clause 0x1 +; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0 +; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s2, v0.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo +; GFX11GLISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11GLISEL-TRUE16-NEXT: s_endpgm +; +; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16: +; GFX11GLISEL-FAKE16: ; %bb.0: +; GFX11GLISEL-FAKE16-NEXT: s_clause 0x1 +; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11GLISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s2, 3 +; GFX11GLISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11GLISEL-FAKE16-NEXT: s_endpgm %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3) %sext = sext i1 %result to i32 store i32 %sext, ptr addrspace(1) %out, align 4 @@ -212,8 +248,9 @@ define i1 @snan_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: snan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 1 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 1 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: snan_f16: @@ -226,8 +263,9 @@ define i1 @snan_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: snan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 1 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 1 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: snan_f16: @@ -285,8 +323,9 @@ define i1 @qnan_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: qnan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 2 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 2 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: qnan_f16: @@ -299,8 +338,9 @@ define i1 @qnan_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: qnan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 2 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 2 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: qnan_f16: @@ -358,8 +398,9 @@ define i1 @posinf_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: posinf_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x200 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x200 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: posinf_f16: @@ -372,8 +413,9 @@ define i1 @posinf_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: posinf_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x200 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x200 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: posinf_f16: @@ -429,8 +471,9 @@ define i1 @neginf_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: neginf_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 4 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 4 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: neginf_f16: @@ -443,8 +486,9 @@ define i1 @neginf_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: neginf_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 4 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 4 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: neginf_f16: @@ -514,8 +558,9 @@ define i1 @posnormal_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: posnormal_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x100 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x100 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: posnormal_f16: @@ -528,8 +573,9 @@ define i1 @posnormal_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: posnormal_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x100 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x100 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: posnormal_f16: @@ -597,8 +643,9 @@ define i1 @negnormal_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: negnormal_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 8 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 8 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: negnormal_f16: @@ -611,8 +658,9 @@ define i1 @negnormal_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: negnormal_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 8 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 8 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: negnormal_f16: @@ -673,8 +721,9 @@ define i1 @possubnormal_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: possubnormal_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x80 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x80 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: possubnormal_f16: @@ -687,8 +736,9 @@ define i1 @possubnormal_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: possubnormal_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x80 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x80 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: possubnormal_f16: @@ -755,8 +805,9 @@ define i1 @negsubnormal_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: negsubnormal_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 16 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 16 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: negsubnormal_f16: @@ -769,8 +820,9 @@ define i1 @negsubnormal_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: negsubnormal_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 16 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 16 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: negsubnormal_f16: @@ -824,8 +876,9 @@ define i1 @poszero_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: poszero_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 64 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 64 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: poszero_f16: @@ -838,8 +891,9 @@ define i1 @poszero_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: poszero_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 64 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 64 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: poszero_f16: @@ -895,8 +949,9 @@ define i1 @negzero_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: negzero_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 32 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 32 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: negzero_f16: @@ -909,8 +964,9 @@ define i1 @negzero_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: negzero_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 32 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 32 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: negzero_f16: @@ -968,8 +1024,9 @@ define i1 @posfinite_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: posfinite_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1c0 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1c0 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: posfinite_f16: @@ -982,8 +1039,9 @@ define i1 @posfinite_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: posfinite_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1c0 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1c0 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: posfinite_f16: @@ -1047,8 +1105,9 @@ define i1 @negfinite_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: negfinite_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 56 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 56 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: negfinite_f16: @@ -1061,8 +1120,9 @@ define i1 @negfinite_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: negfinite_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 56 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 56 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: negfinite_f16: @@ -1120,8 +1180,9 @@ define i1 @isnan_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: isnan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 3 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: isnan_f16: @@ -1134,8 +1195,9 @@ define i1 @isnan_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: isnan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 3 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isnan_f16: @@ -1195,8 +1257,9 @@ define i1 @not_isnan_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_isnan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3fc -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3fc +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_isnan_f16: @@ -1209,8 +1272,9 @@ define i1 @not_isnan_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_isnan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3fc -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3fc +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_isnan_f16: @@ -1336,11 +1400,13 @@ define <2 x i1> @isnan_v2f16(<2 x half> %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: isnan_v2f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.h, 3 +; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v1, 3 :: v_dual_mov_b32 v2, 3 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.h, v3.l ; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v2 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isnan_v2f16: @@ -1499,13 +1565,17 @@ define <3 x i1> @isnan_v3f16(<3 x half> %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: isnan_v3f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.h, 3 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v1.l, 3 -; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v3 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 3 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v4, 3 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v2.l +; GFX11GLISEL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.h, v3.l +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v4 +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v1.l, v5.l +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, v3 +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isnan_v3f16: @@ -1693,16 +1763,20 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: isnan_v4f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.h, 3 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v1.l, 3 +; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 3 +; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v4, 3 :: v_dual_mov_b32 v5, 3 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v2.l +; GFX11GLISEL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.l +; GFX11GLISEL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.h, v3.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v1.l, v6.l ; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v4 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v1.h, 3 +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v1.h, v7.l ; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, v5 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isnan_v4f16: @@ -1771,8 +1845,9 @@ define i1 @isnan_f16_strictfp(half %x) strictfp nounwind { ; GFX11SELDAG-TRUE16-LABEL: isnan_f16_strictfp: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 3 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: isnan_f16_strictfp: @@ -1785,8 +1860,9 @@ define i1 @isnan_f16_strictfp(half %x) strictfp nounwind { ; GFX11GLISEL-TRUE16-LABEL: isnan_f16_strictfp: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 3 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isnan_f16_strictfp: @@ -1846,8 +1922,9 @@ define i1 @isinf_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: isinf_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x204 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: isinf_f16: @@ -1860,8 +1937,9 @@ define i1 @isinf_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: isinf_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x204 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isinf_f16: @@ -1921,8 +1999,9 @@ define i1 @isfinite_f16(half %x) nounwind { ; GFX11SELDAG-TRUE16-LABEL: isfinite_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1f8 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: isfinite_f16: @@ -1935,8 +2014,9 @@ define i1 @isfinite_f16(half %x) nounwind { ; GFX11GLISEL-TRUE16-LABEL: isfinite_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1f8 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isfinite_f16: @@ -1994,8 +2074,9 @@ define i1 @issubnormal_or_zero_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: issubnormal_or_zero_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0xf0 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0xf0 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: issubnormal_or_zero_f16: @@ -2008,8 +2089,9 @@ define i1 @issubnormal_or_zero_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: issubnormal_or_zero_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0xf0 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0xf0 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: issubnormal_or_zero_f16: @@ -2074,8 +2156,9 @@ define i1 @not_issubnormal_or_zero_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_issubnormal_or_zero_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x30f -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x30f +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_issubnormal_or_zero_f16: @@ -2088,8 +2171,9 @@ define i1 @not_issubnormal_or_zero_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_issubnormal_or_zero_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x30f -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x30f +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_issubnormal_or_zero_f16: @@ -2153,8 +2237,9 @@ define i1 @isnormal_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: isnormal_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x108 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x108 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: isnormal_f16: @@ -2167,8 +2252,9 @@ define i1 @isnormal_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: isnormal_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x108 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x108 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isnormal_f16: @@ -2236,8 +2322,9 @@ define i1 @not_isnormal_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_isnormal_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x2f7 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x2f7 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_isnormal_f16: @@ -2250,8 +2337,9 @@ define i1 @not_isnormal_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_isnormal_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x2f7 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x2f7 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_isnormal_f16: @@ -2330,8 +2418,9 @@ define i1 @not_is_plus_normal_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_is_plus_normal_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x2ff -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x2ff +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_is_plus_normal_f16: @@ -2344,8 +2433,9 @@ define i1 @not_is_plus_normal_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_is_plus_normal_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x2ff -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x2ff +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_is_plus_normal_f16: @@ -2424,8 +2514,9 @@ define i1 @not_is_neg_normal_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_is_neg_normal_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3f7 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3f7 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_is_neg_normal_f16: @@ -2438,8 +2529,9 @@ define i1 @not_is_neg_normal_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_is_neg_normal_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3f7 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3f7 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_is_neg_normal_f16: @@ -2501,8 +2593,9 @@ define i1 @issubnormal_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: issubnormal_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x90 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x90 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: issubnormal_f16: @@ -2515,8 +2608,9 @@ define i1 @issubnormal_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: issubnormal_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x90 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x90 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: issubnormal_f16: @@ -2586,8 +2680,9 @@ define i1 @not_issubnormal_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_issubnormal_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x36f -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x36f +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_issubnormal_f16: @@ -2600,8 +2695,9 @@ define i1 @not_issubnormal_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_issubnormal_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x36f -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x36f +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_issubnormal_f16: @@ -2659,8 +2755,9 @@ define i1 @iszero_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: iszero_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x60 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x60 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: iszero_f16: @@ -2673,8 +2770,9 @@ define i1 @iszero_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: iszero_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x60 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x60 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: iszero_f16: @@ -2745,8 +2843,9 @@ define i1 @not_iszero_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_iszero_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39f -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39f +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_iszero_f16: @@ -2759,8 +2858,9 @@ define i1 @not_iszero_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_iszero_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39f -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39f +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_iszero_f16: @@ -2818,8 +2918,9 @@ define i1 @ispositive_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: ispositive_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3c0 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3c0 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: ispositive_f16: @@ -2832,8 +2933,9 @@ define i1 @ispositive_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: ispositive_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3c0 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3c0 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: ispositive_f16: @@ -2907,8 +3009,9 @@ define i1 @not_ispositive_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_ispositive_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 63 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 63 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_ispositive_f16: @@ -2921,8 +3024,9 @@ define i1 @not_ispositive_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_ispositive_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 63 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 63 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_ispositive_f16: @@ -2992,8 +3096,9 @@ define i1 @isnegative_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: isnegative_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 60 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 60 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: isnegative_f16: @@ -3006,8 +3111,9 @@ define i1 @isnegative_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: isnegative_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 60 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 60 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isnegative_f16: @@ -3074,8 +3180,9 @@ define i1 @not_isnegative_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_isnegative_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3c3 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3c3 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_isnegative_f16: @@ -3088,8 +3195,9 @@ define i1 @not_isnegative_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_isnegative_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3c3 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3c3 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_isnegative_f16: @@ -3152,8 +3260,9 @@ define i1 @iszero_or_nan_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f16: @@ -3166,8 +3275,9 @@ define i1 @iszero_or_nan_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: iszero_or_nan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: iszero_or_nan_f16: @@ -3231,8 +3341,9 @@ define i1 @iszero_or_nan_f_daz(half %x) #0 { ; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f_daz: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f_daz: @@ -3245,8 +3356,9 @@ define i1 @iszero_or_nan_f_daz(half %x) #0 { ; GFX11GLISEL-TRUE16-LABEL: iszero_or_nan_f_daz: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: iszero_or_nan_f_daz: @@ -3310,8 +3422,9 @@ define i1 @iszero_or_nan_f_maybe_daz(half %x) #1 { ; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f_maybe_daz: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f_maybe_daz: @@ -3324,8 +3437,9 @@ define i1 @iszero_or_nan_f_maybe_daz(half %x) #1 { ; GFX11GLISEL-TRUE16-LABEL: iszero_or_nan_f_maybe_daz: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: iszero_or_nan_f_maybe_daz: @@ -3398,8 +3512,9 @@ define i1 @not_iszero_or_nan_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f16: @@ -3412,8 +3527,9 @@ define i1 @not_iszero_or_nan_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_nan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_nan_f16: @@ -3486,8 +3602,9 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 { ; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f_daz: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f_daz: @@ -3500,8 +3617,9 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 { ; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_nan_f_daz: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_nan_f_daz: @@ -3574,8 +3692,9 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 { ; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f_maybe_daz: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f_maybe_daz: @@ -3588,8 +3707,9 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 { ; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_nan_f_maybe_daz: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_nan_f_maybe_daz: @@ -3653,8 +3773,9 @@ define i1 @iszero_or_qnan_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: iszero_or_qnan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x62 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x62 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: iszero_or_qnan_f16: @@ -3667,8 +3788,9 @@ define i1 @iszero_or_qnan_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: iszero_or_qnan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x62 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x62 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: iszero_or_qnan_f16: @@ -3737,8 +3859,9 @@ define i1 @iszero_or_snan_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: iszero_or_snan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x61 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x61 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: iszero_or_snan_f16: @@ -3751,8 +3874,9 @@ define i1 @iszero_or_snan_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: iszero_or_snan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x61 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x61 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: iszero_or_snan_f16: @@ -3841,8 +3965,9 @@ define i1 @not_iszero_or_qnan_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_qnan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39d -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39d +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_qnan_f16: @@ -3855,8 +3980,9 @@ define i1 @not_iszero_or_qnan_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_qnan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39d -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39d +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_qnan_f16: @@ -3942,8 +4068,9 @@ define i1 @not_iszero_or_snan_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_snan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39e -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39e +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_snan_f16: @@ -3956,8 +4083,9 @@ define i1 @not_iszero_or_snan_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_snan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39e -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39e +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_snan_f16: @@ -4018,8 +4146,9 @@ define i1 @isinf_or_nan_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: isinf_or_nan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x207 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x207 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: isinf_or_nan_f16: @@ -4032,8 +4161,9 @@ define i1 @isinf_or_nan_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: isinf_or_nan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x207 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x207 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isinf_or_nan_f16: @@ -4094,8 +4224,9 @@ define i1 @not_isinf_or_nan_f16(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_isinf_or_nan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1f8 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_isinf_or_nan_f16: @@ -4108,8 +4239,9 @@ define i1 @not_isinf_or_nan_f16(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_isinf_or_nan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1f8 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1f8 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_isinf_or_nan_f16: @@ -4170,8 +4302,9 @@ define i1 @isfinite_or_nan_f(half %x) { ; GFX11SELDAG-TRUE16-LABEL: isfinite_or_nan_f: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1fb -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1fb +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: isfinite_or_nan_f: @@ -4184,8 +4317,9 @@ define i1 @isfinite_or_nan_f(half %x) { ; GFX11GLISEL-TRUE16-LABEL: isfinite_or_nan_f: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1fb -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1fb +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: isfinite_or_nan_f: @@ -4246,8 +4380,9 @@ define i1 @not_isfinite_or_nan_f(half %x) { ; GFX11SELDAG-TRUE16-LABEL: not_isfinite_or_nan_f: ; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry ; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x204 -; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11SELDAG-FAKE16-LABEL: not_isfinite_or_nan_f: @@ -4260,8 +4395,9 @@ define i1 @not_isfinite_or_nan_f(half %x) { ; GFX11GLISEL-TRUE16-LABEL: not_isfinite_or_nan_f: ; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x204 -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x204 +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l +; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11GLISEL-FAKE16-LABEL: not_isfinite_or_nan_f: diff --git a/llvm/test/CodeGen/AMDGPU/true16-fold.mir b/llvm/test/CodeGen/AMDGPU/true16-fold.mir index 9484417e63c9..6706de13bb89 100644 --- a/llvm/test/CodeGen/AMDGPU/true16-fold.mir +++ b/llvm/test/CodeGen/AMDGPU/true16-fold.mir @@ -48,7 +48,9 @@ body: | ; CHECK-LABEL: name: sgpr_lo16 ; CHECK: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_ALIGNBIT_B32_t16_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, [[DEF]], 0, killed [[DEF1]], 0, 30, 0, 0, implicit $exec + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 30 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_16 = COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ALIGNBIT_B32_t16_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, [[DEF]], 0, killed [[DEF1]], 0, killed [[COPY]], 0, 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_t16_e64_]] %0:sreg_32 = IMPLICIT_DEF %1:sreg_32 = IMPLICIT_DEF @@ -232,3 +234,34 @@ body: | $vgpr0 = COPY %3 S_ENDPGM 0, implicit $vgpr0 ... + +# Make sure the immediate materialized by the v_mov_b16 isn't +# incorrectly folded into the bfi as 0. + +# FIXME: %4:vgpr_32 = COPY %3 is a direct copy from v16 to v32 and +# should probably fail the verifier +--- +name: mov_v16_copy_v32_fold_b32_regression +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: mov_v16_copy_v32_fold_b32_regression + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MOV_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, 15360, 0, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B16_t16_e64_]] + ; CHECK-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 32767, [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_BFI_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:vgpr_32 = COPY $vgpr1 + %1:vgpr_32 = COPY $vgpr0 + %3:vgpr_16 = V_MOV_B16_t16_e64 0, 15360, 0, implicit $exec + %4:vgpr_32 = COPY %3 + %5:vgpr_32 = V_BFI_B32_e64 32767, %4, %1, implicit $exec + $vgpr0 = COPY %5 + SI_RETURN implicit $vgpr0 +... diff --git a/llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll b/llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll new file mode 100644 index 000000000000..0bebb5849ed8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s + +; Make sure that the 16-bit constant 0x3c00 isn't folded as 0 into +; v_bfi_b32. +define i32 @mov16_bfi_fold_regression(half %arg, i32 %arg1) { +; CHECK-LABEL: bfi_fold_regression: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b16_e32 v2.l, 0x3c00 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0 +; CHECK-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_pack_b32_f16 v0, v0.l, 0 +; CHECK-NEXT: s_setpc_b64 s[30:31] +bb: + %cmp = icmp eq i32 %arg1, 0 + %call = call half @llvm.copysign.f16(half 0xH3C00, half %arg) + %select = select i1 %cmp, half 0xH3C00, half %call + %insertelement = insertelement <2 x half> zeroinitializer, half %select, i64 0 + %bitcast = bitcast <2 x half> %insertelement to i32 + ret i32 %bitcast +} + +declare half @llvm.copysign.f16(half, half) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/CodeGen/ARM/ldexp-fp128.ll b/llvm/test/CodeGen/ARM/ldexp-fp128.ll new file mode 100644 index 000000000000..93fcd39e824f --- /dev/null +++ b/llvm/test/CodeGen/ARM/ldexp-fp128.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=armv7-unknown-linux < %s | FileCheck -check-prefix=LINUX %s + +define fp128 @testExpl(fp128 %val, i32 %a) { +; LINUX-LABEL: testExpl: +; LINUX: @ %bb.0: +; LINUX-NEXT: push {r11, lr} +; LINUX-NEXT: sub sp, sp, #8 +; LINUX-NEXT: ldr r12, [sp, #16] +; LINUX-NEXT: str r12, [sp] +; LINUX-NEXT: bl ldexpl +; LINUX-NEXT: add sp, sp, #8 +; LINUX-NEXT: pop {r11, pc} + %call = tail call fp128 @ldexpl(fp128 %val, i32 %a) + ret fp128 %call +} + +declare fp128 @ldexpl(fp128, i32) memory(none) + +define fp128 @test_ldexp_f128_i32(fp128 %val, i32 %a) { +; LINUX-LABEL: test_ldexp_f128_i32: +; LINUX: @ %bb.0: +; LINUX-NEXT: push {r11, lr} +; LINUX-NEXT: sub sp, sp, #8 +; LINUX-NEXT: ldr r12, [sp, #16] +; LINUX-NEXT: str r12, [sp] +; LINUX-NEXT: bl ldexpl +; LINUX-NEXT: add sp, sp, #8 +; LINUX-NEXT: pop {r11, pc} + %call = tail call fp128 @llvm.ldexp.f128.i32(fp128 %val, i32 %a) + ret fp128 %call +} + +define <2 x fp128> @test_ldexp_v2f128_v2i32(<2 x fp128> %val, <2 x i32> %a) { +; LINUX-LABEL: test_ldexp_v2f128_v2i32: +; LINUX: @ %bb.0: +; LINUX-NEXT: push {r4, r5, r6, lr} +; LINUX-NEXT: vpush {d8} +; LINUX-NEXT: sub sp, sp, #8 +; LINUX-NEXT: mov r5, r3 +; LINUX-NEXT: add r3, sp, #40 +; LINUX-NEXT: mov r6, r2 +; LINUX-NEXT: mov r4, r0 +; LINUX-NEXT: ldm r3, {r0, r1, r2, r3} +; LINUX-NEXT: vldr d8, [sp, #56] +; LINUX-NEXT: vst1.32 {d8[1]}, [sp:32] +; LINUX-NEXT: bl ldexpl +; LINUX-NEXT: ldr r12, [sp, #32] +; LINUX-NEXT: vst1.32 {d8[0]}, [sp:32] +; LINUX-NEXT: ldr lr, [sp, #36] +; LINUX-NEXT: str r0, [r4, #16] +; LINUX-NEXT: mov r0, r6 +; LINUX-NEXT: str r1, [r4, #20] +; LINUX-NEXT: mov r1, r5 +; LINUX-NEXT: str r2, [r4, #24] +; LINUX-NEXT: mov r2, r12 +; LINUX-NEXT: str r3, [r4, #28] +; LINUX-NEXT: mov r3, lr +; LINUX-NEXT: bl ldexpl +; LINUX-NEXT: stm r4, {r0, r1, r2, r3} +; LINUX-NEXT: add sp, sp, #8 +; LINUX-NEXT: vpop {d8} +; LINUX-NEXT: pop {r4, r5, r6, pc} + %call = tail call <2 x fp128> @llvm.ldexp.v2f128.v2i32(<2 x fp128> %val, <2 x i32> %a) + ret <2 x fp128> %call +} diff --git a/llvm/test/CodeGen/LoongArch/ldptr.ll b/llvm/test/CodeGen/LoongArch/ldptr.ll index c3656a6bdafb..9bafa10c47e3 100644 --- a/llvm/test/CodeGen/LoongArch/ldptr.ll +++ b/llvm/test/CodeGen/LoongArch/ldptr.ll @@ -24,8 +24,7 @@ define signext i32 @ldptr_w(ptr %p) nounwind { ; LA32-LABEL: ldptr_w: ; LA32: # %bb.0: # %entry ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: ldptr_w: @@ -81,10 +80,9 @@ entry: define i64 @ldptr_d(ptr %p) nounwind { ; LA32-LABEL: ldptr_d: ; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a1, $a0, 1 -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: ld.w $a1, $a1, 4 +; LA32-NEXT: addi.w $a1, $a0, 2047 +; LA32-NEXT: ld.w $a0, $a1, 1 +; LA32-NEXT: ld.w $a1, $a1, 5 ; LA32-NEXT: ret ; ; LA64-LABEL: ldptr_d: diff --git a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll index 9a806a12f7de..93f73e5cd30f 100644 --- a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll +++ b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll @@ -25,14 +25,13 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 8 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB0_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -45,8 +44,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: ld.w $a0, $s2, 4 -; LA32-NEXT: ld.w $a1, $s2, 0 +; LA32-NEXT: ld.w $a0, $s2, 12 +; LA32-NEXT: ld.w $a1, $s2, 8 ; LA32-NEXT: add.w $a0, $a0, $s6 ; LA32-NEXT: add.w $s3, $a1, $s3 ; LA32-NEXT: sltu $a1, $s3, $a1 @@ -63,8 +62,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s3, $zero ; LA32-NEXT: move $s6, $zero ; LA32-NEXT: .LBB0_4: # %for.cond.cleanup -; LA32-NEXT: st.w $s3, $s2, 0 -; LA32-NEXT: st.w $s6, $s2, 4 +; LA32-NEXT: st.w $s3, $s2, 8 +; LA32-NEXT: st.w $s6, $s2, 12 ; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload @@ -88,8 +87,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 8 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB0_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -100,7 +98,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $a0, $s1, 0 +; LA64-NEXT: ld.d $a0, $s1, 8 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: add.d $s2, $a0, $s2 ; LA64-NEXT: bnez $s0, .LBB0_2 @@ -108,7 +106,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB0_3: ; LA64-NEXT: move $s2, $zero ; LA64-NEXT: .LBB0_4: # %for.cond.cleanup -; LA64-NEXT: st.d $s2, $s1, 0 +; LA64-NEXT: st.d $s2, $s1, 8 ; LA64-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload @@ -153,14 +151,13 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB1_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -172,7 +169,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: fld.s $fa0, $s2, 0 +; LA32-NEXT: fld.s $fa0, $s2, 16 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -185,7 +182,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB1_3: ; LA32-NEXT: movgr2fr.w $fs0, $zero ; LA32-NEXT: .LBB1_4: # %for.cond.cleanup -; LA32-NEXT: fst.s $fs0, $s2, 0 +; LA32-NEXT: fst.s $fs0, $s2, 16 ; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload @@ -208,8 +205,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB1_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -220,7 +216,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: fld.s $fa0, $s1, 0 +; LA64-NEXT: fld.s $fa0, $s1, 16 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: fadd.s $fs0, $fa0, $fs0 ; LA64-NEXT: bnez $s0, .LBB1_2 @@ -228,7 +224,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB1_3: ; LA64-NEXT: movgr2fr.w $fs0, $zero ; LA64-NEXT: .LBB1_4: # %for.cond.cleanup -; LA64-NEXT: fst.s $fs0, $s1, 0 +; LA64-NEXT: fst.s $fs0, $s1, 16 ; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload @@ -271,14 +267,13 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s0, $a3 ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a0, $a0, 6 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB2_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -291,7 +286,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: vld $vr0, $s2, 0 +; LA32-NEXT: vld $vr0, $s2, 16 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -307,7 +302,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB2_3: ; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: .LBB2_4: # %for.cond.cleanup -; LA32-NEXT: vst $vr0, $s2, 0 +; LA32-NEXT: vst $vr0, $s2, 16 ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload @@ -326,8 +321,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill ; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill ; LA64-NEXT: slli.d $a0, $a0, 6 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $a1, .LBB2_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -340,7 +334,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: vld $vr0, $s1, 0 +; LA64-NEXT: vld $vr0, $s1, 16 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload ; LA64-NEXT: vadd.w $vr1, $vr0, $vr1 @@ -351,7 +345,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB2_3: ; LA64-NEXT: vrepli.b $vr0, 0 ; LA64-NEXT: .LBB2_4: # %for.cond.cleanup -; LA64-NEXT: vst $vr0, $s1, 0 +; LA64-NEXT: vst $vr0, $s1, 16 ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload @@ -393,14 +387,13 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s0, $a3 ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a0, $a0, 6 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 32 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB3_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -413,7 +406,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: xvld $xr0, $s2, 0 +; LA32-NEXT: xvld $xr0, $s2, 32 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -429,7 +422,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB3_3: ; LA32-NEXT: xvrepli.b $xr0, 0 ; LA32-NEXT: .LBB3_4: # %for.cond.cleanup -; LA32-NEXT: xvst $xr0, $s2, 0 +; LA32-NEXT: xvst $xr0, $s2, 32 ; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload @@ -448,8 +441,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ; LA64-NEXT: slli.d $a0, $a0, 6 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 32 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $a1, .LBB3_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -462,7 +454,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: xvld $xr0, $s1, 0 +; LA64-NEXT: xvld $xr0, $s1, 32 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload ; LA64-NEXT: xvadd.h $xr1, $xr0, $xr1 @@ -473,7 +465,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB3_3: ; LA64-NEXT: xvrepli.b $xr0, 0 ; LA64-NEXT: .LBB3_4: # %for.cond.cleanup -; LA64-NEXT: xvst $xr0, $s1, 0 +; LA64-NEXT: xvst $xr0, $s1, 32 ; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload @@ -516,14 +508,13 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB4_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -536,7 +527,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: vldrepl.b $vr0, $s2, 0 +; LA32-NEXT: vldrepl.b $vr0, $s2, 16 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -552,7 +543,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB4_3: ; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: .LBB4_4: # %for.cond.cleanup -; LA32-NEXT: vstelm.b $vr0, $s2, 0, 1 +; LA32-NEXT: vstelm.b $vr0, $s2, 16, 1 ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload @@ -573,8 +564,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB4_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -586,7 +576,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: vldrepl.b $vr0, $s1, 0 +; LA64-NEXT: vldrepl.b $vr0, $s1, 16 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload ; LA64-NEXT: vadd.b $vr1, $vr0, $vr1 @@ -597,7 +587,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB4_3: ; LA64-NEXT: vrepli.b $vr0, 0 ; LA64-NEXT: .LBB4_4: # %for.cond.cleanup -; LA64-NEXT: vstelm.b $vr0, $s1, 0, 1 +; LA64-NEXT: vstelm.b $vr0, $s1, 16, 1 ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload @@ -643,14 +633,13 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 8 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB5_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -663,7 +652,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: xvldrepl.d $xr0, $s2, 0 +; LA32-NEXT: xvldrepl.d $xr0, $s2, 8 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -679,7 +668,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB5_3: ; LA32-NEXT: xvrepli.b $xr0, 0 ; LA32-NEXT: .LBB5_4: # %for.cond.cleanup -; LA32-NEXT: xvstelm.d $xr0, $s2, 0, 1 +; LA32-NEXT: xvstelm.d $xr0, $s2, 8, 1 ; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload @@ -700,8 +689,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 8 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB5_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -713,7 +701,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: xvldrepl.d $xr0, $s1, 0 +; LA64-NEXT: xvldrepl.d $xr0, $s1, 8 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload ; LA64-NEXT: xvfadd.d $xr1, $xr0, $xr1 @@ -724,7 +712,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB5_3: ; LA64-NEXT: xvrepli.b $xr0, 0 ; LA64-NEXT: .LBB5_4: # %for.cond.cleanup -; LA64-NEXT: xvstelm.d $xr0, $s1, 0, 1 +; LA64-NEXT: xvstelm.d $xr0, $s1, 8, 1 ; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/LoongArch/stptr.ll b/llvm/test/CodeGen/LoongArch/stptr.ll index d70f9f4ba160..23b433aa1585 100644 --- a/llvm/test/CodeGen/LoongArch/stptr.ll +++ b/llvm/test/CodeGen/LoongArch/stptr.ll @@ -23,8 +23,7 @@ define void @stptr_w(ptr %p, i32 signext %val) nounwind { ; LA32-LABEL: stptr_w: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a1, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: stptr_w: @@ -77,9 +76,8 @@ define void @stptr_d(ptr %p, i64 %val) nounwind { ; LA32-LABEL: stptr_d: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: st.w $a2, $a0, 4 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a2, $a0, 5 +; LA32-NEXT: st.w $a1, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: stptr_d: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll index 50bd22bf5fd6..f4964288e354 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll @@ -205,12 +205,19 @@ define i64 @addmul20(i64 %a, i64 %b) { } define i64 @addmul22(i64 %a, i64 %b) { -; CHECK-LABEL: addmul22: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 22 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addmul22: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 22 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul22: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a2, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a2, 1 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: ret %c = mul i64 %a, 22 %d = add i64 %c, %b ret i64 %d diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 7fd76262d547..d4b228828c04 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -585,6 +585,33 @@ define i64 @addmul12(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul14(i64 %a, i64 %b) { +; RV64I-LABEL: addmul14: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul14: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a2, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul14: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 14 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @addmul18(i64 %a, i64 %b) { ; RV64I-LABEL: addmul18: ; RV64I: # %bb.0: @@ -636,12 +663,26 @@ define i64 @addmul20(i64 %a, i64 %b) { } define i64 @addmul22(i64 %a, i64 %b) { -; CHECK-LABEL: addmul22: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 22 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addmul22: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 22 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul22: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a2, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul22: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 22 %d = add i64 %c, %b ret i64 %d @@ -672,6 +713,32 @@ define i64 @addmul24(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul26(i64 %a, i64 %b) { +; RV64I-LABEL: addmul26: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 26 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul26: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a2, a0, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul26: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 26 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @addmul36(i64 %a, i64 %b) { ; RV64I-LABEL: addmul36: ; RV64I: # %bb.0: @@ -722,6 +789,58 @@ define i64 @addmul40(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul38(i64 %a, i64 %b) { +; RV64I-LABEL: addmul38: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 38 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul38: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a2, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul38: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 38 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul42(i64 %a, i64 %b) { +; RV64I-LABEL: addmul42: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 42 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul42: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a2, a0, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul42: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 42 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @addmul72(i64 %a, i64 %b) { ; RV64I-LABEL: addmul72: ; RV64I: # %bb.0: @@ -747,6 +866,84 @@ define i64 @addmul72(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul74(i64 %a, i64 %b) { +; RV64I-LABEL: addmul74: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 74 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul74: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a2, a0, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul74: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 74 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul82(i64 %a, i64 %b) { +; RV64I-LABEL: addmul82: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 82 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul82: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a2, a0, a0 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul82: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 82 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul146(i64 %a, i64 %b) { +; RV64I-LABEL: addmul146: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 146 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul146: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a2, a0, a0 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul146: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 146 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @mul50(i64 %a) { ; RV64I-LABEL: mul50: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll index d8e2b2c2bf58..305ab934e44a 100644 --- a/llvm/test/CodeGen/RISCV/zicond-opts.ll +++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll @@ -263,3 +263,35 @@ define i64 @test_inv_and_eqz(i64 %f, i64 %x, i1 %cond) { %7 = and i64 %6, %f ret i64 %7 } + +define i32 @pr166596(i32 %conv.i, i1 %iszero) #0 { +; RV32ZICOND-LABEL: pr166596: +; RV32ZICOND: # %bb.0: # %entry +; RV32ZICOND-NEXT: andi a1, a1, 1 +; RV32ZICOND-NEXT: xori a0, a0, 1 +; RV32ZICOND-NEXT: zext.h a0, a0 +; RV32ZICOND-NEXT: clz a0, a0 +; RV32ZICOND-NEXT: addi a0, a0, 41 +; RV32ZICOND-NEXT: czero.nez a0, a0, a1 +; RV32ZICOND-NEXT: addi a0, a0, -9 +; RV32ZICOND-NEXT: ret +; +; RV64ZICOND-LABEL: pr166596: +; RV64ZICOND: # %bb.0: # %entry +; RV64ZICOND-NEXT: andi a1, a1, 1 +; RV64ZICOND-NEXT: xori a0, a0, 1 +; RV64ZICOND-NEXT: zext.h a0, a0 +; RV64ZICOND-NEXT: clz a0, a0 +; RV64ZICOND-NEXT: addi a0, a0, 9 +; RV64ZICOND-NEXT: czero.nez a0, a0, a1 +; RV64ZICOND-NEXT: addi a0, a0, -9 +; RV64ZICOND-NEXT: ret +entry: + %not.i = xor i32 %conv.i, 1 + %conv2.i = trunc i32 %not.i to i16 + %conv22 = zext i16 %conv2.i to i64 + %0 = call i64 @llvm.ctlz.i64(i64 %conv22, i1 false) + %cast = trunc i64 %0 to i32 + %clzg = select i1 %iszero, i32 -9, i32 %cast + ret i32 %clzg +} diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s index 78e4f86ec1b9..ff0dfb371bbb 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -674,46 +674,3 @@ v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. -// nv bit in FLAT instructions -flat_load_ubyte v5, v[2:3] offset:4095 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_load_ubyte a5, v[2:3] offset:4095 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_store_dword v[2:3], v5 offset:4095 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_store_dword v[2:3], a5 offset:4095 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_load_ubyte v5, v[2:3], off offset:-1 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_store_byte v[2:3], v5, off offset:-1 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_add v[2:3], v5, off nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_swap a1, v[2:3], a2, off glc nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_swap_x2 v[2:3], v[4:5], off nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_swap_x2 v[2:3], a[4:5], off nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -scratch_load_ubyte v5, off, s2 offset:-1 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -scratch_load_ubyte a5, off, s2 offset:-1 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -scratch_store_dword v2, v3, off nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s index 3af0d83fb305..c96a72ddc257 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s +++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s @@ -706,107 +706,107 @@ flat_load_short_d16_hi a5, v[2:3] offset:4095 glc flat_load_short_d16_hi a5, v[2:3] offset:4095 slc // GFX90A: flat_atomic_swap a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x01,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_swap a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x05,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_add a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x09,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_add a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_sub a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x0d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_sub a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_smin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x11,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_smin a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_umin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x15,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_umin a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_smax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x19,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_smax a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_umax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x1d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_umax a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_and a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x21,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_and a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_or a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x25,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_or a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_xor a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x29,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_xor a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_inc a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x2d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_inc a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_dec a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x31,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_dec a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x81,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc ; encoding: [0xff,0x0f,0x85,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc // GFX90A: flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x89,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x8d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x91,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x95,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x99,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x9d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa1,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa5,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa9,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xad,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xb1,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_swap v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x02,0x02,0x80,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx942_err.s b/llvm/test/MC/AMDGPU/gfx942_err.s index dc51bab65aa0..fd59a01b34a0 100644 --- a/llvm/test/MC/AMDGPU/gfx942_err.s +++ b/llvm/test/MC/AMDGPU/gfx942_err.s @@ -125,31 +125,3 @@ global_load_dword v[2:3], off lds scratch_load_dword v2, off lds // GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -// nv bit in FLAT instructions -flat_load_ubyte v5, v[2:3] offset:4095 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_store_dword v[2:3], v5 offset:4095 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_atomic_add_f32 v[2:3], v5 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_load_dword v2, v[2:3], off sc0 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_store_dword v[2:3], v5 off sc0 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_add_f64 v[0:1], v[2:3], off sc1 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_swap v0, v[2:3], v5 off sc0 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -scratch_load_lds_dword v2, off nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -scratch_store_dword v2, v3, off nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s index 7687c0a478bd..5cc3d2533a14 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s @@ -24,18 +24,6 @@ flat_load_ubyte v5, v[1:2] offset:4095 glc flat_load_ubyte v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05] -flat_load_ubyte v5, v[1:2] nv -// CHECK: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05] - flat_load_sbyte v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05] @@ -60,18 +48,6 @@ flat_load_sbyte v5, v[1:2] offset:4095 glc flat_load_sbyte v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05] -flat_load_sbyte v5, v[1:2] nv -// CHECK: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05] - flat_load_ushort v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05] @@ -96,18 +72,6 @@ flat_load_ushort v5, v[1:2] offset:4095 glc flat_load_ushort v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05] -flat_load_ushort v5, v[1:2] nv -// CHECK: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ushort v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ushort v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ushort v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05] - flat_load_sshort v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05] @@ -132,18 +96,6 @@ flat_load_sshort v5, v[1:2] offset:4095 glc flat_load_sshort v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05] -flat_load_sshort v5, v[1:2] nv -// CHECK: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sshort v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sshort v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sshort v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05] - flat_load_dword v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05] @@ -168,18 +120,6 @@ flat_load_dword v5, v[1:2] offset:4095 glc flat_load_dword v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05] -flat_load_dword v5, v[1:2] nv -// CHECK: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dword v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dword v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dword v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05] - flat_load_dwordx2 v[5:6], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05] @@ -204,18 +144,6 @@ flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05] -flat_load_dwordx2 v[5:6], v[1:2] nv -// CHECK: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05] - flat_load_dwordx3 v[5:7], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05] @@ -240,18 +168,6 @@ flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05] -flat_load_dwordx3 v[5:7], v[1:2] nv -// CHECK: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05] - flat_load_dwordx4 v[5:8], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05] @@ -276,18 +192,6 @@ flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05] -flat_load_dwordx4 v[5:8], v[1:2] nv -// CHECK: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05] - flat_store_byte v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00] @@ -312,18 +216,6 @@ flat_store_byte v[1:2], v2 offset:4095 glc flat_store_byte v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00] -flat_store_byte v[1:2], v2 nv -// CHECK: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00] - flat_store_byte_d16_hi v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00] @@ -348,18 +240,6 @@ flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00] -flat_store_byte_d16_hi v[1:2], v2 nv -// CHECK: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte_d16_hi v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00] - flat_store_short v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00] @@ -384,18 +264,6 @@ flat_store_short v[1:2], v2 offset:4095 glc flat_store_short v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00] -flat_store_short v[1:2], v2 nv -// CHECK: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00] - flat_store_short_d16_hi v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00] @@ -420,18 +288,6 @@ flat_store_short_d16_hi v[1:2], v2 offset:4095 glc flat_store_short_d16_hi v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00] -flat_store_short_d16_hi v[1:2], v2 nv -// CHECK: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short_d16_hi v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00] - flat_store_dword v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00] @@ -456,18 +312,6 @@ flat_store_dword v[1:2], v2 offset:4095 glc flat_store_dword v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00] -flat_store_dword v[1:2], v2 nv -// CHECK: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dword v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dword v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dword v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00] - flat_store_dwordx2 v[1:2], v[2:3] offset:4095 // CHECK: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00] @@ -492,18 +336,6 @@ flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc // CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00] -flat_store_dwordx2 v[1:2], v[2:3] nv -// CHECK: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv -// CHECK: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00] - flat_store_dwordx3 v[1:2], v[2:4] offset:4095 // CHECK: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00] @@ -528,18 +360,6 @@ flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc // CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00] -flat_store_dwordx3 v[1:2], v[2:4] nv -// CHECK: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv -// CHECK: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00] - flat_store_dwordx4 v[1:2], v[2:5] offset:4095 // CHECK: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00] @@ -564,18 +384,6 @@ flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc // CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00] -flat_store_dwordx4 v[1:2], v[2:5] nv -// CHECK: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv -// CHECK: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00] - flat_load_ubyte_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05] @@ -600,18 +408,6 @@ flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05] -flat_load_ubyte_d16 v5, v[1:2] nv -// CHECK: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16 v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05] - flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05] @@ -636,18 +432,6 @@ flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05] -flat_load_ubyte_d16_hi v5, v[1:2] nv -// CHECK: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05] - flat_load_sbyte_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05] @@ -672,18 +456,6 @@ flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05] -flat_load_sbyte_d16 v5, v[1:2] nv -// CHECK: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16 v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05] - flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05] @@ -708,18 +480,6 @@ flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05] -flat_load_sbyte_d16_hi v5, v[1:2] nv -// CHECK: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05] - flat_load_short_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05] @@ -744,18 +504,6 @@ flat_load_short_d16 v5, v[1:2] offset:4095 glc flat_load_short_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05] -flat_load_short_d16 v5, v[1:2] nv -// CHECK: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16 v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16 v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16 v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05] - flat_load_short_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05] @@ -780,18 +528,6 @@ flat_load_short_d16_hi v5, v[1:2] offset:4095 glc flat_load_short_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05] -flat_load_short_d16_hi v5, v[1:2] nv -// CHECK: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16_hi v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05] - flat_atomic_swap v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00] @@ -816,18 +552,6 @@ flat_atomic_swap v0, v[1:2], v2 offset:4095 glc flat_atomic_swap v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00] -flat_atomic_swap v[1:2], v2 nv -// CHECK: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_swap v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_swap v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00] - flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 // CHECK: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00] @@ -852,18 +576,6 @@ flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc // CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00] -flat_atomic_cmpswap v[1:2], v[2:3] nv -// CHECK: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv -// CHECK: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00] - flat_atomic_add v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00] @@ -888,18 +600,6 @@ flat_atomic_add v0, v[1:2], v2 offset:4095 glc flat_atomic_add v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00] -flat_atomic_add v[1:2], v2 nv -// CHECK: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_add v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_add v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00] - flat_atomic_sub v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00] @@ -1497,18 +1197,6 @@ global_load_ubyte v5, v1, s[4:5] offset:-1 glc global_load_ubyte v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x04,0x05] -global_load_ubyte v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05] - global_load_sbyte v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x04,0x05] @@ -1554,18 +1242,6 @@ global_load_sbyte v5, v1, s[4:5] offset:-1 glc global_load_sbyte v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x04,0x05] -global_load_sbyte v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05] - global_load_ushort v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x04,0x05] @@ -1611,18 +1287,6 @@ global_load_ushort v5, v1, s[4:5] offset:-1 glc global_load_ushort v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x04,0x05] -global_load_ushort v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05] - -global_load_ushort v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05] - -global_load_ushort v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05] - -global_load_ushort v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05] - global_load_sshort v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x04,0x05] @@ -1668,18 +1332,6 @@ global_load_sshort v5, v1, s[4:5] offset:-1 glc global_load_sshort v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x04,0x05] -global_load_sshort v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05] - -global_load_sshort v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05] - -global_load_sshort v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05] - -global_load_sshort v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05] - global_load_dword v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x04,0x05] @@ -1725,18 +1377,6 @@ global_load_dword v5, v1, s[4:5] offset:-1 glc global_load_dword v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x04,0x05] -global_load_dword v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05] - -global_load_dword v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05] - -global_load_dword v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05] - -global_load_dword v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05] - global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x04,0x05] @@ -1782,18 +1422,6 @@ global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x04,0x05] -global_load_dwordx2 v[5:6], v1, s[4:5] nv -// CHECK: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05] - -global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05] - -global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05] - -global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05] - global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x04,0x05] @@ -1839,15 +1467,6 @@ global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x04,0x05] -global_load_dwordx3 v[5:7], v1, s[4:5] nv -// CHECK: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05] - global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x04,0x05] @@ -1893,15 +1512,6 @@ global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x04,0x05] -global_load_dwordx4 v[5:8], v1, s[4:5] nv -// CHECK: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05] - global_store_byte v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x06,0x00] @@ -1947,18 +1557,6 @@ global_store_byte v1, v2, s[6:7] offset:-1 glc global_store_byte v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x06,0x00] -global_store_byte v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00] - global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x06,0x00] @@ -2004,18 +1602,6 @@ global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x06,0x00] -global_store_byte_d16_hi v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00] - global_store_short v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x06,0x00] @@ -2061,18 +1647,6 @@ global_store_short v1, v2, s[6:7] offset:-1 glc global_store_short v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x06,0x00] -global_store_short v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00] - -global_store_short v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00] - -global_store_short v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00] - -global_store_short v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00] - global_store_short_d16_hi v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x06,0x00] @@ -2118,18 +1692,6 @@ global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x06,0x00] -global_store_short_d16_hi v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00] - -global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00] - -global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00] - -global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00] - global_store_dword v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x06,0x00] @@ -2175,18 +1737,6 @@ global_store_dword v1, v2, s[6:7] offset:-1 glc global_store_dword v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x06,0x00] -global_store_dword v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00] - -global_store_dword v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00] - -global_store_dword v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00] - -global_store_dword v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00] - global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x06,0x00] @@ -2232,18 +1782,6 @@ global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x06,0x00] -global_store_dwordx2 v1, v[2:3], s[6:7] nv -// CHECK: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00] - global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x06,0x00] @@ -2289,18 +1827,6 @@ global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x06,0x00] -global_store_dwordx3 v1, v[2:4], s[6:7] nv -// CHECK: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00] - global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x06,0x00] @@ -2346,18 +1872,6 @@ global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x06,0x00] -global_store_dwordx4 v1, v[2:5], s[6:7] nv -// CHECK: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00] - global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x04,0x05] @@ -2403,18 +1917,6 @@ global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x04,0x05] -global_load_ubyte_d16 v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05] - global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x04,0x05] @@ -2460,18 +1962,6 @@ global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x04,0x05] -global_load_ubyte_d16_hi v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05] - global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x04,0x05] @@ -2517,18 +2007,6 @@ global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x04,0x05] -global_load_sbyte_d16 v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05] - global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x04,0x05] @@ -2574,18 +2052,6 @@ global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x04,0x05] -global_load_sbyte_d16_hi v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05] - global_load_short_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x04,0x05] @@ -2631,18 +2097,6 @@ global_load_short_d16 v5, v1, s[4:5] offset:-1 glc global_load_short_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x04,0x05] -global_load_short_d16 v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16 v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05] - global_load_short_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x04,0x05] @@ -2688,18 +2142,6 @@ global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x04,0x05] -global_load_short_d16_hi v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05] - global_atomic_swap v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x06,0x00] @@ -2745,18 +2187,6 @@ global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc global_atomic_swap v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x06,0x00] -global_atomic_swap v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_swap v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00] - global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x06,0x00] @@ -2802,18 +2232,6 @@ global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x06,0x00] -global_atomic_cmpswap v1, v[2:3], s[6:7] nv -// CHECK: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00] - global_atomic_add v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x06,0x00] @@ -2859,18 +2277,6 @@ global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc global_atomic_add v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x06,0x00] -global_atomic_add v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_add v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_add v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00] - global_atomic_sub v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x06,0x00] @@ -3951,18 +3357,6 @@ scratch_load_ubyte v5, off, s2 offset:-1 glc scratch_load_ubyte v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05] -scratch_load_ubyte v5, off, s2 nv -// CHECK: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05] - scratch_load_sbyte v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05] @@ -4008,18 +3402,6 @@ scratch_load_sbyte v5, off, s2 offset:-1 glc scratch_load_sbyte v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05] -scratch_load_sbyte v5, off, s2 nv -// CHECK: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05] - scratch_load_ushort v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05] @@ -4065,18 +3447,6 @@ scratch_load_ushort v5, off, s2 offset:-1 glc scratch_load_ushort v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05] -scratch_load_ushort v5, off, s2 nv -// CHECK: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ushort v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ushort v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ushort v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05] - scratch_load_sshort v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05] @@ -4122,18 +3492,6 @@ scratch_load_sshort v5, off, s2 offset:-1 glc scratch_load_sshort v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05] -scratch_load_sshort v5, off, s2 nv -// CHECK: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sshort v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sshort v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sshort v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05] - scratch_load_dword v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05] @@ -4179,18 +3537,6 @@ scratch_load_dword v5, off, s2 offset:-1 glc scratch_load_dword v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05] -scratch_load_dword v5, off, s2 nv -// CHECK: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dword v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dword v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dword v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05] - scratch_load_dwordx2 v[5:6], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05] @@ -4236,18 +3582,6 @@ scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05] -scratch_load_dwordx2 v[5:6], off, s2 nv -// CHECK: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05] - scratch_load_dwordx3 v[5:7], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05] @@ -4293,18 +3627,6 @@ scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05] -scratch_load_dwordx3 v[5:7], off, s2 nv -// CHECK: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05] - scratch_load_dwordx4 v[5:8], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05] @@ -4350,18 +3672,6 @@ scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05] -scratch_load_dwordx4 v[5:8], off, s2 nv -// CHECK: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05] - scratch_store_byte off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00] @@ -4407,18 +3717,6 @@ scratch_store_byte off, v2, s3 offset:-1 glc scratch_store_byte off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00] -scratch_store_byte off, v2, s3 nv -// CHECK: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte off, v2, s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte off, v2, s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte off, v2, s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00] - scratch_store_byte_d16_hi off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00] @@ -4464,18 +3762,6 @@ scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00] -scratch_store_byte_d16_hi off, v2, s3 nv -// CHECK: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00] - scratch_store_short off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00] @@ -4521,18 +3807,6 @@ scratch_store_short off, v2, s3 offset:-1 glc scratch_store_short off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00] -scratch_store_short off, v2, s3 nv -// CHECK: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short off, v2, s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short off, v2, s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short off, v2, s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00] - scratch_store_short_d16_hi off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00] @@ -4578,18 +3852,6 @@ scratch_store_short_d16_hi off, v2, s3 offset:-1 glc scratch_store_short_d16_hi off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00] -scratch_store_short_d16_hi off, v2, s3 nv -// CHECK: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short_d16_hi off, v2, s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00] - scratch_store_dword off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00] @@ -4635,18 +3897,6 @@ scratch_store_dword off, v2, s3 offset:-1 glc scratch_store_dword off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00] -scratch_store_dword off, v2, s3 nv -// CHECK: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dword off, v2, s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dword off, v2, s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dword off, v2, s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00] - scratch_store_dwordx2 off, v[2:3], s3 offset:-1 // CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00] @@ -4692,18 +3942,6 @@ scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00] -scratch_store_dwordx2 off, v[2:3], s3 nv -// CHECK: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00] - scratch_store_dwordx3 off, v[2:4], s3 offset:-1 // CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00] @@ -4749,18 +3987,6 @@ scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00] -scratch_store_dwordx3 off, v[2:4], s3 nv -// CHECK: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00] - scratch_store_dwordx4 off, v[2:5], s3 offset:-1 // CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00] @@ -4806,18 +4032,6 @@ scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00] -scratch_store_dwordx4 off, v[2:5], s3 nv -// CHECK: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00] - scratch_load_ubyte_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05] @@ -4863,18 +4077,6 @@ scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05] -scratch_load_ubyte_d16 v5, off, s2 nv -// CHECK: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05] - scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05] @@ -4920,18 +4122,6 @@ scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05] -scratch_load_ubyte_d16_hi v5, off, s2 nv -// CHECK: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05] - scratch_load_sbyte_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05] @@ -4977,18 +4167,6 @@ scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05] -scratch_load_sbyte_d16 v5, off, s2 nv -// CHECK: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05] - scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05] @@ -5034,18 +4212,6 @@ scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05] -scratch_load_sbyte_d16_hi v5, off, s2 nv -// CHECK: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05] - scratch_load_short_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05] @@ -5088,18 +4254,6 @@ scratch_load_short_d16 v5, off, s2 offset:-4096 scratch_load_short_d16 v5, off, s2 offset:-1 glc // CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x02,0x05] -scratch_load_short_d16 v5, off, s2 nv -// CHECK: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16 v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16 v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16 v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05] - scratch_load_short_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05] @@ -5148,18 +4302,6 @@ scratch_load_short_d16_hi v5, off, s2 offset:-1 glc scratch_load_short_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05] -scratch_load_short_d16_hi v5, off, s2 nv -// CHECK: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16_hi v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05] - global_load_dword v[2:3], off lds // CHECK: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt index 4c06585a4c2e..0ee659e207c9 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt @@ -21,18 +21,6 @@ # CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_ubyte v5, v[1:2] nv ; encoding: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_sbyte v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05 @@ -54,18 +42,6 @@ # CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_sbyte v5, v[1:2] nv ; encoding: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_ushort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05 @@ -87,18 +63,6 @@ # CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_ushort v5, v[1:2] nv ; encoding: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ushort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ushort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_sshort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05 @@ -120,18 +84,6 @@ # CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_sshort v5, v[1:2] nv ; encoding: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sshort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sshort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_dword v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05 @@ -153,18 +105,6 @@ # CHECK: flat_load_dword v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_dword v5, v[1:2] nv ; encoding: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dword v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dword v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dword v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05 @@ -186,18 +126,6 @@ # CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_dwordx2 v[5:6], v[1:2] nv ; encoding: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05 @@ -219,18 +147,6 @@ # CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_dwordx3 v[5:7], v[1:2] nv ; encoding: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05 @@ -252,18 +168,6 @@ # CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_dwordx4 v[5:8], v[1:2] nv ; encoding: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_store_byte v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00 @@ -285,18 +189,6 @@ # CHECK: flat_store_byte v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_byte v[1:2], v2 nv ; encoding: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00 @@ -318,18 +210,6 @@ # CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_byte_d16_hi v[1:2], v2 nv ; encoding: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_short v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00 @@ -351,18 +231,6 @@ # CHECK: flat_store_short v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_short v[1:2], v2 nv ; encoding: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00 @@ -384,18 +252,6 @@ # CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_short_d16_hi v[1:2], v2 nv ; encoding: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_dword v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00 @@ -417,18 +273,6 @@ # CHECK: flat_store_dword v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_dword v[1:2], v2 nv ; encoding: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dword v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dword v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dword v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00 @@ -450,18 +294,6 @@ # CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_dwordx2 v[1:2], v[2:3] nv ; encoding: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00 @@ -483,18 +315,6 @@ # CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_dwordx3 v[1:2], v[2:4] nv ; encoding: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv ; encoding: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv ; encoding: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00 @@ -516,18 +336,6 @@ # CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_dwordx4 v[1:2], v[2:5] nv ; encoding: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv ; encoding: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv ; encoding: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05 @@ -549,18 +357,6 @@ # CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_ubyte_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05 @@ -582,18 +378,6 @@ # CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05 @@ -615,18 +399,6 @@ # CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_sbyte_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05 @@ -648,18 +420,6 @@ # CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05 @@ -681,18 +441,6 @@ # CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_short_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05 @@ -714,18 +462,6 @@ # CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_short_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_atomic_swap v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00 @@ -747,18 +483,6 @@ # CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00 -# CHECK: flat_atomic_swap v[1:2], v2 nv ; encoding: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] -0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_swap v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] -0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00 - # CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00 @@ -780,18 +504,6 @@ # CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00 -# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] nv ; encoding: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] -0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] -0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00 - # CHECK: flat_atomic_add v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00 @@ -813,18 +525,6 @@ # CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00 -# CHECK: flat_atomic_add v[1:2], v2 nv ; encoding: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] -0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_add v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] -0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00 - # CHECK: flat_atomic_sub v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00 @@ -1317,18 +1017,6 @@ # CHECK: global_load_ubyte v5, v[1:2], off ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_ubyte v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_sbyte v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05 @@ -1338,18 +1026,6 @@ # CHECK: global_load_sbyte v5, v[1:2], off ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_sbyte v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_ushort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05 @@ -1359,18 +1035,6 @@ # CHECK: global_load_ushort v5, v[1:2], off ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_ushort v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_sshort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05 @@ -1380,18 +1044,6 @@ # CHECK: global_load_sshort v5, v[1:2], off ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_sshort v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_dword v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05 @@ -1401,18 +1053,6 @@ # CHECK: global_load_dword v5, v[1:2], off ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_dword v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_dwordx2 v[5:6], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05 @@ -1422,18 +1062,6 @@ # CHECK: global_load_dwordx2 v[5:6], v[1:2], off ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] nv ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_dwordx3 v[5:7], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05 @@ -1443,18 +1071,6 @@ # CHECK: global_load_dwordx3 v[5:7], v[1:2], off ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] nv ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_dwordx4 v[5:8], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05 @@ -1464,18 +1080,6 @@ # CHECK: global_load_dwordx4 v[5:8], v[1:2], off ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] nv ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_store_byte v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00 @@ -1485,18 +1089,6 @@ # CHECK: global_store_byte v[1:2], v2, off ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_byte v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_byte_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00 @@ -1506,18 +1098,6 @@ # CHECK: global_store_byte_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_short v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00 @@ -1527,18 +1107,6 @@ # CHECK: global_store_short v[1:2], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_short v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_short_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00 @@ -1548,18 +1116,6 @@ # CHECK: global_store_short_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_short_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_dword v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00 @@ -1569,18 +1125,6 @@ # CHECK: global_store_dword v[1:2], v2, off ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_dword v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_dwordx2 v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00 @@ -1590,18 +1134,6 @@ # CHECK: global_store_dwordx2 v[1:2], v[2:3], off ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_dwordx3 v[1:2], v[2:4], off offset:-1 ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00 @@ -1611,18 +1143,6 @@ # CHECK: global_store_dwordx3 v[1:2], v[2:4], off ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] nv ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_dwordx4 v[1:2], v[2:5], off offset:-1 ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00 @@ -1632,18 +1152,6 @@ # CHECK: global_store_dwordx4 v[1:2], v[2:5], off ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] nv ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_load_ubyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05 @@ -1653,18 +1161,6 @@ # CHECK: global_load_ubyte_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_ubyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05 @@ -1674,18 +1170,6 @@ # CHECK: global_load_ubyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_sbyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05 @@ -1695,18 +1179,6 @@ # CHECK: global_load_sbyte_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_sbyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05 @@ -1716,18 +1188,6 @@ # CHECK: global_load_sbyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_short_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05 @@ -1737,18 +1197,6 @@ # CHECK: global_load_short_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_short_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_short_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05 @@ -1758,18 +1206,6 @@ # CHECK: global_load_short_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_short_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_atomic_swap v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00 @@ -1779,18 +1215,6 @@ # CHECK: global_atomic_swap v[1:2], v2, off ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00] 0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00 -# CHECK: global_atomic_swap v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00] -0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00 - # CHECK: global_atomic_cmpswap v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00 @@ -1812,18 +1236,6 @@ # CHECK: global_atomic_cmpswap v1, v[2:3], v[4:5], off glc ; encoding: [0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01] 0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01 -# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00] -0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00 - # CHECK: global_atomic_add v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00 @@ -1833,18 +1245,6 @@ # CHECK: global_atomic_add v[1:2], v2, off ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00] 0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00 -# CHECK: global_atomic_add v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00] -0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00 - # CHECK: global_atomic_sub v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00 @@ -2103,18 +1503,6 @@ # CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_ubyte v5, off, s2 nv ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_sbyte v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05 @@ -2154,18 +1542,6 @@ # CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_sbyte v5, off, s2 nv ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_ushort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05 @@ -2205,18 +1581,6 @@ # CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_ushort v5, off, s2 nv ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ushort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ushort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_sshort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05 @@ -2256,18 +1620,6 @@ # CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_sshort v5, off, s2 nv ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sshort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sshort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_dword v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05 @@ -2307,18 +1659,6 @@ # CHECK: scratch_load_dword v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_dword v5, off, s2 nv ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dword v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dword v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dword v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05 @@ -2358,18 +1698,6 @@ # CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_dwordx2 v[5:6], off, s2 nv ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05 @@ -2409,18 +1737,6 @@ # CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_dwordx3 v[5:7], off, s2 nv ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05 @@ -2460,18 +1776,6 @@ # CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_dwordx4 v[5:8], off, s2 nv ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_store_byte off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00 @@ -2511,18 +1815,6 @@ # CHECK: scratch_store_byte off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_byte off, v2, s3 nv ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00 @@ -2562,18 +1854,6 @@ # CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_byte_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_short off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00 @@ -2613,18 +1893,6 @@ # CHECK: scratch_store_short off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_short off, v2, s3 nv ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00 @@ -2664,18 +1932,6 @@ # CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_short_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_dword off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00 @@ -2715,18 +1971,6 @@ # CHECK: scratch_store_dword off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_dword off, v2, s3 nv ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dword off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dword off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dword off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00 @@ -2766,18 +2010,6 @@ # CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_dwordx2 off, v[2:3], s3 nv ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00 @@ -2817,18 +2049,6 @@ # CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_dwordx3 off, v[2:4], s3 nv ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00 @@ -2868,18 +2088,6 @@ # CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_dwordx4 off, v[2:5], s3 nv ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05 @@ -2919,18 +2127,6 @@ # CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_ubyte_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05 @@ -2970,18 +2166,6 @@ # CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05 @@ -3021,18 +2205,6 @@ # CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_sbyte_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05 @@ -3072,18 +2244,6 @@ # CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05 @@ -3123,18 +2283,6 @@ # CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_short_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05 @@ -3174,18 +2322,6 @@ # CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_short_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05 - # CHECK: global_load_dword v[2:3], off lds ; encoding: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00] 0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll new file mode 100644 index 000000000000..fe7f43f7f4b0 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll @@ -0,0 +1,187 @@ +; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + +define void @wombat(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, i8 %arg6) #0 { +; CHECK-LABEL: define void @wombat( +; CHECK-SAME: i32 [[ARG:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]], ptr [[ARG3:%.*]], ptr [[ARG4:%.*]], ptr [[ARG5:%.*]], i8 [[ARG6:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[ARG]], 0 +; CHECK-NEXT: br i1 [[ICMP]], label %[[BB7:.*]], label %[[BB25:.*]] +; CHECK: [[BB7]]: +; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[ARG]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG1]], i64 [[ZEXT]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[ZEXT]] +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[ARG5]], i64 [[ZEXT]] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[ARG3]], i64 [[ZEXT]] +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[ARG4]], i64 [[ZEXT]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[BOUND05:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP2]] +; CHECK-NEXT: [[BOUND16:%.*]] = icmp ult ptr [[ARG5]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT7:%.*]] = and i1 [[BOUND05]], [[BOUND16]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT7]] +; CHECK-NEXT: [[BOUND08:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP3]] +; CHECK-NEXT: [[BOUND19:%.*]] = icmp ult ptr [[ARG3]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]] +; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT10]] +; CHECK-NEXT: [[BOUND012:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP4]] +; CHECK-NEXT: [[BOUND113:%.*]] = icmp ult ptr [[ARG4]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]] +; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX11]], [[FOUND_CONFLICT14]] +; CHECK-NEXT: [[BOUND016:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP2]] +; CHECK-NEXT: [[BOUND117:%.*]] = icmp ult ptr [[ARG5]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] +; CHECK-NEXT: [[CONFLICT_RDX19:%.*]] = or i1 [[CONFLICT_RDX15]], [[FOUND_CONFLICT18]] +; CHECK-NEXT: [[BOUND020:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP3]] +; CHECK-NEXT: [[BOUND121:%.*]] = icmp ult ptr [[ARG3]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT22:%.*]] = and i1 [[BOUND020]], [[BOUND121]] +; CHECK-NEXT: [[CONFLICT_RDX23:%.*]] = or i1 [[CONFLICT_RDX19]], [[FOUND_CONFLICT22]] +; CHECK-NEXT: [[BOUND024:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP4]] +; CHECK-NEXT: [[BOUND125:%.*]] = icmp ult ptr [[ARG4]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT26:%.*]] = and i1 [[BOUND024]], [[BOUND125]] +; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX23]], [[FOUND_CONFLICT26]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX27]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[ARG6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG5]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP4]], align 1, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge <vscale x 16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[ARG1]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP6]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META3:![0-9]+]], !noalias [[META5:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[ARG3]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD28:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP7]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META9:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[ARG4]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP8]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD29]], [[WIDE_MASKED_LOAD28]] +; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 16 x i8> [[TMP9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP10]], ptr align 1 [[TMP6]], <vscale x 16 x i1> [[TMP5]]), !alias.scope [[META3]], !noalias [[META5]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP11]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META11:![0-9]+]], !noalias [[META12:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD28]], [[WIDE_MASKED_LOAD28]] +; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD30]], [[TMP12]] +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP13]], ptr align 1 [[TMP11]], <vscale x 16 x i1> [[TMP5]]), !alias.scope [[META11]], !noalias [[META12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[BB24:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[BB7]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label %[[BB8:.*]] +; CHECK: [[BB8]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD22:%.*]], %[[BB21:.*]] ] +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG5]], i64 [[PHI]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GETELEMENTPTR]], align 1 +; CHECK-NEXT: [[ICMP9:%.*]] = icmp ult i8 [[LOAD]], [[ARG6]] +; CHECK-NEXT: br i1 [[ICMP9]], label %[[BB21]], label %[[BB10:.*]] +; CHECK: [[BB10]]: +; CHECK-NEXT: [[GETELEMENTPTR11:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG1]], i64 [[PHI]] +; CHECK-NEXT: [[LOAD12:%.*]] = load i8, ptr [[GETELEMENTPTR11]], align 1 +; CHECK-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG3]], i64 [[PHI]] +; CHECK-NEXT: [[LOAD14:%.*]] = load i8, ptr [[GETELEMENTPTR13]], align 1 +; CHECK-NEXT: [[GETELEMENTPTR15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG4]], i64 [[PHI]] +; CHECK-NEXT: [[LOAD16:%.*]] = load i8, ptr [[GETELEMENTPTR15]], align 1 +; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[LOAD16]], [[LOAD14]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[LOAD12]] +; CHECK-NEXT: store i8 [[ADD]], ptr [[GETELEMENTPTR11]], align 1 +; CHECK-NEXT: [[GETELEMENTPTR17:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG2]], i64 [[PHI]] +; CHECK-NEXT: [[LOAD18:%.*]] = load i8, ptr [[GETELEMENTPTR17]], align 1 +; CHECK-NEXT: [[MUL19:%.*]] = mul i8 [[LOAD14]], [[LOAD14]] +; CHECK-NEXT: [[ADD20:%.*]] = add i8 [[LOAD18]], [[MUL19]] +; CHECK-NEXT: store i8 [[ADD20]], ptr [[GETELEMENTPTR17]], align 1 +; CHECK-NEXT: br label %[[BB21]] +; CHECK: [[BB21]]: +; CHECK-NEXT: [[ADD22]] = add nuw nsw i64 [[PHI]], 1 +; CHECK-NEXT: [[ICMP23:%.*]] = icmp eq i64 [[ADD22]], [[ZEXT]] +; CHECK-NEXT: br i1 [[ICMP23]], label %[[BB24]], label %[[BB8]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: [[BB24]]: +; CHECK-NEXT: br label %[[BB25]] +; CHECK: [[BB25]]: +; CHECK-NEXT: ret void +; +bb: + %icmp = icmp sgt i32 %arg, 0 + br i1 %icmp, label %bb7, label %bb25 + +bb7: ; preds = %bb + %zext = zext nneg i32 %arg to i64 + br label %bb8 + +bb8: ; preds = %bb21, %bb7 + %phi = phi i64 [ 0, %bb7 ], [ %add22, %bb21 ] + %getelementptr = getelementptr inbounds nuw i8, ptr %arg5, i64 %phi + %load = load i8, ptr %getelementptr, align 1 + %icmp9 = icmp ult i8 %load, %arg6 + br i1 %icmp9, label %bb21, label %bb10 + +bb10: ; preds = %bb8 + %getelementptr11 = getelementptr inbounds nuw i8, ptr %arg1, i64 %phi + %load12 = load i8, ptr %getelementptr11, align 1 + %getelementptr13 = getelementptr inbounds nuw i8, ptr %arg3, i64 %phi + %load14 = load i8, ptr %getelementptr13, align 1 + %getelementptr15 = getelementptr inbounds nuw i8, ptr %arg4, i64 %phi + %load16 = load i8, ptr %getelementptr15, align 1 + %mul = mul i8 %load16, %load14 + %add = add i8 %mul, %load12 + store i8 %add, ptr %getelementptr11, align 1 + %getelementptr17 = getelementptr inbounds nuw i8, ptr %arg2, i64 %phi + %load18 = load i8, ptr %getelementptr17, align 1 + %mul19 = mul i8 %load14, %load14 + %add20 = add i8 %load18, %mul19 + store i8 %add20, ptr %getelementptr17, align 1 + br label %bb21 + +bb21: ; preds = %bb10, %bb8 + %add22 = add nuw nsw i64 %phi, 1 + %icmp23 = icmp eq i64 %add22, %zext + br i1 %icmp23, label %bb24, label %bb8, !llvm.loop !0 + +bb24: ; preds = %bb21 + br label %bb25 + +bb25: ; preds = %bb24, %bb + ret void +} + +attributes #0 = { uwtable vscale_range(1,16) "aarch64_pstate_sm_body" "target-features"="+fp-armv8,+neon,+sme,+v8a,-fmv" } + +!0 = distinct !{!0, !1, !2, !3, !4} +!1 = !{!"llvm.loop.mustprogress"} +!2 = !{!"llvm.loop.vectorize.width", i32 16} +!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} +!4 = !{!"llvm.loop.vectorize.enable", i1 true} +;. +; CHECK: [[META0]] = !{[[META1:![0-9]+]]} +; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} +; CHECK: [[META2]] = distinct !{[[META2]], !"LVerDomain"} +; CHECK: [[META3]] = !{[[META4:![0-9]+]]} +; CHECK: [[META4]] = distinct !{[[META4]], [[META2]]} +; CHECK: [[META5]] = !{[[META6:![0-9]+]], [[META1]], [[META7:![0-9]+]], [[META8:![0-9]+]]} +; CHECK: [[META6]] = distinct !{[[META6]], [[META2]]} +; CHECK: [[META7]] = distinct !{[[META7]], [[META2]]} +; CHECK: [[META8]] = distinct !{[[META8]], [[META2]]} +; CHECK: [[META9]] = !{[[META7]]} +; CHECK: [[META10]] = !{[[META8]]} +; CHECK: [[META11]] = !{[[META6]]} +; CHECK: [[META12]] = !{[[META1]], [[META7]], [[META8]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META14:![0-9]+]], [[META15:![0-9]+]], [[META16:![0-9]+]]} +; CHECK: [[META14]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[META15]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META16]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META14]], [[META15]]} +;. diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index cd9512f6eae8..b1f20a73c3b2 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -1,8 +1,3 @@ -Analysis/LoopAccessAnalysis/memcheck-ni.ll -Analysis/MemorySSA/pr116227.ll -Analysis/MemorySSA/pr43641.ll -Analysis/MemorySSA/pr46574.ll -Analysis/MemorySSA/update-remove-dead-blocks.ll Bitcode/fcmp-fast.ll Bitcode/flags.ll CodeGen/AArch64/cgdata-merge-local.ll @@ -26,27 +21,12 @@ CodeGen/X86/nocfivalue.ll DebugInfo/AArch64/ir-outliner.ll DebugInfo/assignment-tracking/X86/hotcoldsplit.ll DebugInfo/Generic/block-asan.ll -DebugInfo/KeyInstructions/Generic/loop-unswitch.ll DebugInfo/X86/asan_debug_info.ll LTO/X86/diagnostic-handler-remarks-with-hotness.ll Other/optimization-remarks-auto.ll Other/X86/debugcounter-partiallyinlinelibcalls.ll -Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll -Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll -Transforms/AtomicExpand/AArch64/pcsections.ll Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll -Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll -Transforms/AtomicExpand/ARM/atomicrmw-fp.ll -Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll -Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll -Transforms/AtomicExpand/Mips/atomicrmw-fp.ll -Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll -Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll -Transforms/AtomicExpand/SPARC/libcalls.ll Transforms/AtomicExpand/SPARC/partword.ll -Transforms/AtomicExpand/X86/expand-atomic-rmw-fp.ll -Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll -Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll Transforms/Attributor/align.ll Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll @@ -265,14 +245,13 @@ Transforms/InstCombine/and2.ll Transforms/InstCombine/and-fcmp.ll Transforms/InstCombine/and.ll Transforms/InstCombine/and-or-icmps.ll -Transforms/InstCombine/and-or-implied-cond-not.ll Transforms/InstCombine/apint-div1.ll Transforms/InstCombine/apint-div2.ll Transforms/InstCombine/ashr-demand.ll Transforms/InstCombine/atomic.ll Transforms/InstCombine/binop-cast.ll -Transforms/InstCombine/binop-select.ll Transforms/InstCombine/binop-select-cast-of-select-cond.ll +Transforms/InstCombine/binop-select.ll Transforms/InstCombine/bit-checks.ll Transforms/InstCombine/bitreverse.ll Transforms/InstCombine/branch.ll @@ -298,7 +277,6 @@ Transforms/InstCombine/fold-ctpop-of-not.ll Transforms/InstCombine/fold-ext-eq-c-with-op.ll Transforms/InstCombine/free-inversion.ll Transforms/InstCombine/icmp-and-lowbit-mask.ll -Transforms/InstCombine/icmp-equality-test.ll Transforms/InstCombine/icmp.ll Transforms/InstCombine/icmp-mul-and.ll Transforms/InstCombine/icmp-of-and-x.ll @@ -307,7 +285,6 @@ Transforms/InstCombine/icmp-select-implies-common-op.ll Transforms/InstCombine/icmp-select.ll Transforms/InstCombine/icmp-with-selects.ll Transforms/InstCombine/intrinsic-select.ll -Transforms/InstCombine/known-never-nan.ll Transforms/InstCombine/ldexp-ext.ll Transforms/InstCombine/ldexp.ll Transforms/InstCombine/load-bitcast-select.ll @@ -347,13 +324,11 @@ Transforms/InstCombine/or.ll Transforms/InstCombine/pow-1.ll Transforms/InstCombine/pow-3.ll Transforms/InstCombine/pow-sqrt.ll -Transforms/InstCombine/pr24354.ll Transforms/InstCombine/pull-conditional-binop-through-shift.ll Transforms/InstCombine/rem.ll Transforms/InstCombine/sdiv-canonicalize.ll Transforms/InstCombine/sdiv-guard.ll Transforms/InstCombine/select-and-or.ll -Transforms/InstCombine/select-bitext.ll Transforms/InstCombine/select-cmp-br.ll Transforms/InstCombine/select-cmp.ll Transforms/InstCombine/select-factorize.ll @@ -362,7 +337,6 @@ Transforms/InstCombine/select.ll Transforms/InstCombine/select-min-max.ll Transforms/InstCombine/select-of-symmetric-selects.ll Transforms/InstCombine/select-select.ll -Transforms/InstCombine/select-with-extreme-eq-cond.ll Transforms/InstCombine/shift.ll Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll Transforms/InstCombine/shuffle-select-narrow.ll @@ -512,66 +486,12 @@ Transforms/LoopBoundSplit/bug51866.ll Transforms/LoopBoundSplit/bug-loop-bound-split-phi-in-exit-block.ll Transforms/LoopBoundSplit/loop-bound-split.ll Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll -Transforms/LoopDistribute/basic-with-memchecks.ll -Transforms/LoopDistribute/bounds-expansion-bug.ll -Transforms/LoopDistribute/cross-partition-access.ll -Transforms/LoopDistribute/debug-loc.ll -Transforms/LoopDistribute/debug-print.ll -Transforms/LoopDistribute/diagnostics.ll -Transforms/LoopDistribute/followup.ll -Transforms/LoopDistribute/laa-invalidation.ll -Transforms/LoopDistribute/outside-use.ll -Transforms/LoopDistribute/pointer-phi-in-loop.ll -Transforms/LoopDistribute/scev-inserted-runtime-check.ll -Transforms/LoopDistribute/symbolic-stride.ll -Transforms/LoopFlatten/loop-flatten-version.ll Transforms/LoopIdiom/AArch64/byte-compare-index.ll Transforms/LoopIdiom/AArch64/find-first-byte.ll Transforms/LoopIdiom/RISCV/byte-compare-index.ll -Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll -Transforms/LoopIdiom/X86/left-shift-until-bittest.ll -Transforms/LoopIdiom/X86/left-shift-until-zero.ll -Transforms/LoopIdiom/X86/logical-right-shift-until-zero-debuginfo.ll -Transforms/LoopIdiom/X86/logical-right-shift-until-zero.ll -Transforms/LoopLoadElim/forward.ll -Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll -Transforms/LoopLoadElim/memcheck.ll -Transforms/LoopLoadElim/pr47457.ll -Transforms/LoopLoadElim/symbolic-stride.ll -Transforms/LoopLoadElim/unknown-stride-known-dep.ll -Transforms/LoopLoadElim/versioning-scev-invalidation.ll -Transforms/LoopPredication/preserve-bpi.ll -Transforms/LoopSimplifyCFG/constant-fold-branch.ll -Transforms/LoopSimplifyCFG/handle_dead_exits.ll -Transforms/LoopSimplifyCFG/invalidate-scev-dispositions-2.ll -Transforms/LoopSimplifyCFG/invalidate-scev-dispositions.ll -Transforms/LoopSimplifyCFG/lcssa.ll -Transforms/LoopSimplifyCFG/live_block_marking.ll -Transforms/LoopSimplifyCFG/mssa_update.ll -Transforms/LoopSimplifyCFG/pr117537.ll -Transforms/LoopSimplifyCFG/update_parents.ll Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll Transforms/LoopUnroll/peel-last-iteration-with-guards.ll Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll -Transforms/LoopUnroll/runtime-loop-multiple-exits.ll -Transforms/LoopVersioning/add-phi-update-users.ll -Transforms/LoopVersioning/basic.ll -Transforms/LoopVersioning/bound-check-partially-known.ll -Transforms/LoopVersioning/crash-36998.ll -Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll -Transforms/LoopVersioning/incorrect-phi.ll -Transforms/LoopVersioning/invalidate-laa-after-versioning.ll -Transforms/LoopVersioning/lcssa.ll -Transforms/LoopVersioningLICM/load-from-unknown-address.ll -Transforms/LoopVersioningLICM/loopversioningLICM1.ll -Transforms/LoopVersioningLICM/loopversioningLICM2.ll -Transforms/LoopVersioningLICM/metadata.ll -Transforms/LoopVersioning/loop-invariant-bound.ll -Transforms/LoopVersioning/noalias.ll -Transforms/LoopVersioning/noalias-version-twice.ll -Transforms/LoopVersioning/single-iteration.ll -Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll -Transforms/LoopVersioning/wrapping-pointer-versioning.ll Transforms/LowerAtomic/atomic-load.ll Transforms/LowerAtomic/atomic-swap.ll Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll @@ -740,27 +660,6 @@ Transforms/Scalarizer/scatter-order.ll Transforms/Scalarizer/variable-extractelement.ll Transforms/Scalarizer/variable-insertelement.ll Transforms/Scalarizer/vector-of-pointer-to-vector.ll -Transforms/SimpleLoopUnswitch/debuginfo.ll -Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll -Transforms/SimpleLoopUnswitch/endless-unswitch.ll -Transforms/SimpleLoopUnswitch/guards.ll -Transforms/SimpleLoopUnswitch/inject-invariant-conditions-exponential.ll -Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll -Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll -Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll -Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll -Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll -Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll -Transforms/SimpleLoopUnswitch/partial-unswitch.ll -Transforms/SimpleLoopUnswitch/partial-unswitch-loop-and-block-dispositions.ll -Transforms/SimpleLoopUnswitch/partial-unswitch-mssa-threshold.ll -Transforms/SimpleLoopUnswitch/partial-unswitch-update-memoryssa.ll -Transforms/SimpleLoopUnswitch/pr138509.ll -Transforms/SimpleLoopUnswitch/pr59546.ll -Transforms/SimpleLoopUnswitch/pr60736.ll -Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll -Transforms/SimpleLoopUnswitch/trivial-unswitch.ll -Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll Transforms/StructurizeCFG/callbr.ll Transforms/StructurizeCFG/hoist-zerocost.ll diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md index a920d57c7cd2..8d20b496cd3a 100644 --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -835,6 +835,12 @@ each pass, the generator produces a `registerPassName` where generates a `registerGroupPasses`, where `Group` is the tag provided via the `-name` input parameter, that registers all of the passes present. +These declarations can be enabled for the whole group of passes by +defining the `GEN_PASS_REGISTRATION` macro, or on a per-pass basis by +defining `GEN_PASS_REGISTRATION_PASSNAME` where `PASSNAME` is the +uppercase version of the name of the pass (similar to pass def and +decls). + ```c++ // Tablegen options: -gen-pass-decls -name="Example" diff --git a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp index 1e56810ff7aa..7420412f0936 100644 --- a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp +++ b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp @@ -328,7 +328,6 @@ static bool traverseRegionGraph(Region *begin, << nextRegion->getRegionNumber() << ", returning true"; return true; } - llvm::dbgs() << "Region: " << nextRegion << "\n"; if (!nextRegion->getParentOp()) { llvm::errs() << "Region " << *nextRegion << " has no parent op\n"; return false; diff --git a/mlir/tools/mlir-tblgen/PassGen.cpp b/mlir/tools/mlir-tblgen/PassGen.cpp index f7134ce02b72..f4b8eb43b49b 100644 --- a/mlir/tools/mlir-tblgen/PassGen.cpp +++ b/mlir/tools/mlir-tblgen/PassGen.cpp @@ -57,19 +57,23 @@ const char *const passRegistrationCode = R"( //===----------------------------------------------------------------------===// // {0} Registration //===----------------------------------------------------------------------===// +#ifdef {1} inline void register{0}() {{ ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {{ - return {1}; + return {2}; }); } // Old registration code, kept for temporary backwards compatibility. inline void register{0}Pass() {{ ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {{ - return {1}; + return {2}; }); } + +#undef {1} +#endif // {1} )"; /// The code snippet used to generate a function to register all passes in a @@ -116,6 +120,10 @@ static std::string getPassDeclVarName(const Pass &pass) { return "GEN_PASS_DECL_" + pass.getDef()->getName().upper(); } +static std::string getPassRegistrationVarName(const Pass &pass) { + return "GEN_PASS_REGISTRATION_" + pass.getDef()->getName().upper(); +} + /// Emit the code to be included in the public header of the pass. static void emitPassDecls(const Pass &pass, raw_ostream &os) { StringRef passName = pass.getDef()->getName(); @@ -143,18 +151,25 @@ static void emitPassDecls(const Pass &pass, raw_ostream &os) { /// PassRegistry. static void emitRegistrations(llvm::ArrayRef<Pass> passes, raw_ostream &os) { os << "#ifdef GEN_PASS_REGISTRATION\n"; + os << "// Generate registrations for all passes.\n"; + for (const Pass &pass : passes) + os << "#define " << getPassRegistrationVarName(pass) << "\n"; + os << "#endif // GEN_PASS_REGISTRATION\n"; for (const Pass &pass : passes) { + std::string passName = pass.getDef()->getName().str(); + std::string passEnableVarName = getPassRegistrationVarName(pass); + std::string constructorCall; if (StringRef constructor = pass.getConstructor(); !constructor.empty()) constructorCall = constructor.str(); else - constructorCall = formatv("create{0}()", pass.getDef()->getName()).str(); - - os << formatv(passRegistrationCode, pass.getDef()->getName(), + constructorCall = formatv("create{0}()", passName).str(); + os << formatv(passRegistrationCode, passName, passEnableVarName, constructorCall); } + os << "#ifdef GEN_PASS_REGISTRATION\n"; os << formatv(passGroupRegistrationCode, groupName); for (const Pass &pass : passes) |
