summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAiden Grossman <aidengrossman@google.com>2025-11-06 08:06:41 +0000
committerAiden Grossman <aidengrossman@google.com>2025-11-06 08:06:41 +0000
commit0b8d9fc2999f630c32ddeb8c8376bf24a4106156 (patch)
treea5e9e2efa16d001e7f60b13559081e050fb85be4
parent1b232e544b601ebe6a07c38bb081ec06d9ffa15c (diff)
parent9f5811ec6bd5e9f99dd22c4a06e6e984cb15ae4b (diff)
Created using spr 1.3.7 [skip ci]
-rw-r--r--clang/include/clang/Basic/LangOptions.def2
-rw-r--r--clang/test/ClangScanDeps/strip-codegen-args.m6
-rw-r--r--flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp8
-rw-r--r--flang/test/Lower/CUDA/cuda-device-proc.cuf4
-rw-r--r--libcxx/utils/ci/buildkite-pipeline.yml2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h19
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp33
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td12
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td140
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp29
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp149
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.h6
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp9
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp81
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll556
-rw-r--r--llvm/test/CodeGen/AMDGPU/true16-fold.mir35
-rw-r--r--llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll29
-rw-r--r--llvm/test/CodeGen/ARM/ldexp-fp128.ll66
-rw-r--r--llvm/test/CodeGen/LoongArch/ldptr.ll10
-rw-r--r--llvm/test/CodeGen/LoongArch/sink-fold-addi.ll88
-rw-r--r--llvm/test/CodeGen/LoongArch/stptr.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/rv64xtheadba.ll19
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zba.ll209
-rw-r--r--llvm/test/CodeGen/RISCV/zicond-opts.ll32
-rw-r--r--llvm/test/MC/AMDGPU/gfx90a_err.s43
-rw-r--r--llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s52
-rw-r--r--llvm/test/MC/AMDGPU/gfx942_err.s28
-rw-r--r--llvm/test/MC/AMDGPU/gfx9_asm_flat.s858
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt864
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll187
-rw-r--r--llvm/utils/profcheck-xfail.txt103
-rw-r--r--mlir/docs/PassManagement.md6
-rw-r--r--mlir/lib/Interfaces/ControlFlowInterfaces.cpp1
-rw-r--r--mlir/tools/mlir-tblgen/PassGen.cpp25
39 files changed, 1305 insertions, 2442 deletions
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index d3cca82b4bdf..40fc66ea12e3 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -454,7 +454,7 @@ LANGOPT(BranchTargetEnforcement, 1, 0, NotCompatible, "Branch-target enforcement
LANGOPT(BranchProtectionPAuthLR, 1, 0, NotCompatible, "Use PC as a diversifier using PAuthLR NOP instructions.")
LANGOPT(GuardedControlStack, 1, 0, NotCompatible, "Guarded control stack enabled")
-LANGOPT(SpeculativeLoadHardening, 1, 0, NotCompatible, "Speculative load hardening enabled")
+LANGOPT(SpeculativeLoadHardening, 1, 0, Benign, "Speculative load hardening enabled")
LANGOPT(RelativeCXXABIVTables, 1, 0, NotCompatible,
"Use an ABI-incompatible v-table layout that uses relative references")
diff --git a/clang/test/ClangScanDeps/strip-codegen-args.m b/clang/test/ClangScanDeps/strip-codegen-args.m
index 71171f498338..f2cec6281f7d 100644
--- a/clang/test/ClangScanDeps/strip-codegen-args.m
+++ b/clang/test/ClangScanDeps/strip-codegen-args.m
@@ -16,6 +16,7 @@
// CHECK-NOT: "-flto"
// CHECK-NOT: "-fno-autolink"
// CHECK-NOT: "-mrelax-relocations=no"
+// CHECK-NOT: "-mspeculative-load-hardening"
// CHECK: ]
// CHECK: "name": "A"
// CHECK: }
@@ -39,6 +40,11 @@
"command": "clang -Imodules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-modules -O2 -flto=full -fsyntax-only DIR/t3.m",
"file": "DIR/t2.m"
}
+ {
+ "directory": "DIR",
+ "command": "clang -Imodules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-modules -O2 -mspeculative-load-hardening -fsyntax-only DIR/t3.m",
+ "file": "DIR/t3.m"
+ }
]
//--- modules/A/module.modulemap
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index 6312e61f5e62..4c0d26642863 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -1122,13 +1122,7 @@ CUDAIntrinsicLibrary::genSyncThreadsOr(mlir::Type resultType,
void CUDAIntrinsicLibrary::genSyncWarp(
llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 1);
- constexpr llvm::StringLiteral funcName = "llvm.nvvm.bar.warp.sync";
- mlir::Value mask = fir::getBase(args[0]);
- mlir::FunctionType funcType =
- mlir::FunctionType::get(builder.getContext(), {mask.getType()}, {});
- auto funcOp = builder.createFunction(loc, funcName, funcType);
- llvm::SmallVector<mlir::Value> argsList{mask};
- fir::CallOp::create(builder, loc, funcOp, argsList);
+ mlir::NVVM::SyncWarpOp::create(builder, loc, fir::getBase(args[0]));
}
// THIS_GRID
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 2d2c801b48f4..9f8f74a0c7b5 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -105,7 +105,7 @@ end
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: nvvm.barrier0
-! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath<contract> : (i32) -> ()
+! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32
! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath<contract> : () -> ()
! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath<contract> : () -> ()
! CHECK: fir.call @llvm.nvvm.membar.sys() fastmath<contract> : () -> ()
@@ -219,7 +219,7 @@ end
! CHECK-LABEL: func.func @_QPhost1()
! CHECK: cuf.kernel
! CHECK: nvvm.barrier0
-! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath<contract> : (i32) -> ()
+! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32
! CHECK: fir.call @llvm.nvvm.barrier0.and(%c1{{.*}}) fastmath<contract> : (i32) -> i32
! CHECK: fir.call @llvm.nvvm.barrier0.popc(%c1{{.*}}) fastmath<contract> : (i32) -> i32
! CHECK: fir.call @llvm.nvvm.barrier0.or(%c1{{.*}}) fastmath<contract> : (i32) -> i32
diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml
index ca83af9824b8..2ac69c38ebff 100644
--- a/libcxx/utils/ci/buildkite-pipeline.yml
+++ b/libcxx/utils/ci/buildkite-pipeline.yml
@@ -103,7 +103,6 @@ steps:
queue: libcxx-builders
os: aix
<<: *common
- skip: "https://github.com/llvm/llvm-project/issues/162516"
- label: AIX (64-bit)
command: libcxx/utils/ci/run-buildbot aix
@@ -115,7 +114,6 @@ steps:
queue: libcxx-builders
os: aix
<<: *common
- skip: "https://github.com/llvm/llvm-project/issues/162516"
- group: ':freebsd: FreeBSD'
steps:
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h
index 7cc78d4be279..fc41641fd5cf 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h
@@ -211,6 +211,21 @@ public:
return FilteredView(Libraries.begin(), Libraries.end(), S, K);
}
+ using LibraryFilterFn = std::function<bool(const LibraryInfo &)>;
+ void getLibraries(LibState S, PathType K,
+ std::vector<std::shared_ptr<LibraryInfo>> &Outs,
+ LibraryFilterFn Filter = nullptr) const {
+ std::shared_lock<std::shared_mutex> Lock(Mtx);
+ for (const auto &[_, Entry] : Libraries) {
+ const auto &Info = *Entry;
+ if (Info.getKind() != K || Info.getState() != S)
+ continue;
+ if (Filter && !Filter(Info))
+ continue;
+ Outs.push_back(Entry);
+ }
+ }
+
void forEachLibrary(const LibraryVisitor &visitor) const {
std::unique_lock<std::shared_mutex> Lock(Mtx);
for (const auto &[_, entry] : Libraries) {
@@ -220,14 +235,14 @@ public:
}
bool isLoaded(StringRef Path) const {
- std::unique_lock<std::shared_mutex> Lock(Mtx);
+ std::shared_lock<std::shared_mutex> Lock(Mtx);
if (auto It = Libraries.find(Path.str()); It != Libraries.end())
return It->second->getState() == LibState::Loaded;
return false;
}
bool isQueried(StringRef Path) const {
- std::unique_lock<std::shared_mutex> Lock(Mtx);
+ std::shared_lock<std::shared_mutex> Lock(Mtx);
if (auto It = Libraries.find(Path.str()); It != Libraries.end())
return It->second->getState() == LibState::Queried;
return false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 46c4bb85a742..816b7ba92bd8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4046,6 +4046,8 @@ static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) {
m_ConstInt(AndMask)))) {
// Type Legalisation Pattern:
// (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
+ if (BitWidthDiff.getZExtValue() >= BitWidth)
+ return SDValue();
unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
return SDValue();
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp
index 35da82a10306..7e1d5285463c 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp
@@ -184,9 +184,9 @@ class SymbolSearchContext {
public:
SymbolSearchContext(SymbolQuery &Q) : Q(Q) {}
- bool hasSearched(LibraryInfo *Lib) const { return Searched.count(Lib); }
+ bool hasSearched(const LibraryInfo *Lib) const { return Searched.count(Lib); }
- void markSearched(LibraryInfo *Lib) { Searched.insert(Lib); }
+ void markSearched(const LibraryInfo *Lib) { Searched.insert(Lib); }
inline bool allResolved() const { return Q.allResolved(); }
@@ -194,7 +194,7 @@ public:
private:
SymbolQuery &Q;
- DenseSet<LibraryInfo *> Searched;
+ DenseSet<const LibraryInfo *> Searched;
};
void LibraryResolver::resolveSymbolsInLibrary(
@@ -226,19 +226,18 @@ void LibraryResolver::resolveSymbolsInLibrary(
return EnumerateResult::Continue;
},
Opts);
+ };
+ if (!Lib.hasFilter()) {
+ LLVM_DEBUG(dbgs() << "Building filter for library: " << Lib.getFullPath()
+ << "\n";);
+ enumerateSymbolsIfNeeded();
if (DiscoveredSymbols.empty()) {
LLVM_DEBUG(dbgs() << " No symbols and remove library : "
<< Lib.getFullPath() << "\n";);
LibMgr.removeLibrary(Lib.getFullPath());
return;
}
- };
-
- if (!Lib.hasFilter()) {
- LLVM_DEBUG(dbgs() << "Building filter for library: " << Lib.getFullPath()
- << "\n";);
- enumerateSymbolsIfNeeded();
SmallVector<StringRef> SymbolVec;
SymbolVec.reserve(DiscoveredSymbols.size());
for (const auto &KV : DiscoveredSymbols)
@@ -288,11 +287,15 @@ void LibraryResolver::searchSymbolsInLibraries(
SymbolSearchContext Ctx(Q);
while (!Ctx.allResolved()) {
+ std::vector<std::shared_ptr<LibraryInfo>> Libs;
+ LibMgr.getLibraries(S, K, Libs, [&](const LibraryInfo &Lib) {
+ return !Ctx.hasSearched(&Lib);
+ });
- for (auto &Lib : LibMgr.getView(S, K)) {
- if (Ctx.hasSearched(Lib.get()))
- continue;
+ if (Libs.empty() && !scanLibrariesIfNeeded(K, scanBatchSize))
+ break; // no more new libs to scan
+ for (auto &Lib : Libs) {
// can use Async here?
resolveSymbolsInLibrary(*Lib, Ctx.query(), Config.Options);
Ctx.markSearched(Lib.get());
@@ -300,12 +303,6 @@ void LibraryResolver::searchSymbolsInLibraries(
if (Ctx.allResolved())
return;
}
-
- if (Ctx.allResolved())
- return;
-
- if (!scanLibrariesIfNeeded(K, scanBatchSize))
- break; // no more new libs to scan
}
};
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp
index d93f68622fcc..32f6dbefb848 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp
@@ -50,7 +50,7 @@ void handleError(Error Err, StringRef context = "") {
}
bool ObjectFileLoader::isArchitectureCompatible(const object::ObjectFile &Obj) {
- Triple HostTriple(sys::getDefaultTargetTriple());
+ Triple HostTriple(sys::getProcessTriple());
Triple ObjTriple = Obj.makeTriple();
LLVM_DEBUG({
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index e3b0a1bec53e..e62fdb678684 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -312,7 +312,7 @@ public:
}
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const {
- if (!ST->hasSVE())
+ if (!ST->isSVEorStreamingSVEAvailable())
return false;
// For fixed vectors, avoid scalarization if using SVE for them.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 4fe194c813c4..54d94b1f8682 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2366,18 +2366,6 @@ def isGFX8GFX9NotGFX90A :
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
-// Pre-90A GFX9s allow the NV bit in FLAT instructions.
-def isNVAllowedInFlat :
- Predicate<"!Subtarget->hasGFX90AInsts() &&"
- " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
- AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX90AInsts), (not FeatureGFX10Insts))>;
-
-// GFX8 or GFX90A+ do not allow the NV bit in FLAT instructions.
-def isNVNotAllowedInFlat :
- Predicate<"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) ||"
- " ((Subtarget->getGeneration() == AMDGPUSubtarget::GFX9) && Subtarget->hasGFX90AInsts())">,
- AssemblerPredicate <(any_of FeatureVolcanicIslands, FeatureGFX90AInsts)>;
-
def isGFX90AOnly :
Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2808c44c59c1..09338c533fdf 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1602,11 +1602,6 @@ public:
bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
- bool isFlatInstAndNVAllowed(const MCInst &Inst) const {
- uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
- return (TSFlags & SIInstrFlags::FLAT) && isGFX9() && !isGFX90A();
- }
-
AMDGPUTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AMDGPUTargetStreamer &>(TS);
@@ -5375,7 +5370,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
Error(S, "scale_offset is not supported on this GPU");
}
- if ((CPol & CPol::NV) && !isFlatInstAndNVAllowed(Inst)) {
+ if (CPol & CPol::NV) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
@@ -7150,13 +7145,6 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
unsigned Enabled = 0, Seen = 0;
for (;;) {
SMLoc S = getLoc();
-
- if (isGFX9() && trySkipId("nv")) {
- Enabled |= CPol::NV;
- Seen |= CPol::NV;
- continue;
- }
-
bool Disabling;
unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
if (!CPol)
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 6ef224148e44..8ea64d17417f 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -125,7 +125,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
bits<7> saddr;
bits<10> vdst;
- bits<6> cpol;
+ bits<5> cpol;
// Only valid on gfx9
bits<1> lds = ps.lds; // LDS DMA for global and scratch
@@ -2693,52 +2693,29 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
}
-class FLAT_Real_vi_ex_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
- FLAT_Real_vi <op, ps, has_sccb> {
- let AssemblerPredicate = isNVNotAllowedInFlat;
-}
-
-class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
- FLAT_Real_vi <op, ps, has_sccb> {
- let AssemblerPredicate = isNVAllowedInFlat;
- let Subtarget = SIEncodingFamily.GFX9;
- let DecoderNamespace = "GFX9";
- let Inst{55} = cpol{CPolBit.NV}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
-}
-
-multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
- def _vi: FLAT_Real_vi_ex_gfx9<op, ps, has_sccb>;
- def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
-}
-
multiclass FLAT_Real_AllAddr_vi<bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
- defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
- defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
-}
-
-multiclass FLAT_Real_AllAddr_vi_ex_gfx9<bits<7> op,
- bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
- def _vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
- def _SADDR_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
+ def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
+ def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
}
class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
FLAT_Real <op, ps>,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
let AssemblerPredicate = isGFX940Plus;
- let DecoderNamespace = "GFX940";
+ let DecoderNamespace = "GFX9";
let Inst{13} = ps.sve;
let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
}
multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
- let OtherPredicates = [isGFX8GFX9NotGFX940] in {
- defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
+ let AssemblerPredicate = isGFX8GFX9NotGFX940;
+ let OtherPredicates = [isGFX8GFX9NotGFX940];
+ }
+ def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
+ let DecoderNamespace = "GFX9";
}
-
- defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
-
let AssemblerPredicate = isGFX940Plus in {
def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
@@ -2751,11 +2728,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
let OtherPredicates = [isGFX8GFX9NotGFX940] in {
- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in {
- defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
+ def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
}
- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in {
- defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
+ def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
}
}
@@ -2771,66 +2748,47 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
}
-defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>;
-defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>;
-defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>;
-defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>;
-defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>;
-defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>;
-defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>;
-defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>;
-
-defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>;
-defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
-defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>;
-defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
-defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>;
-defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>;
-defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>;
-defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>;
-
-defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>;
-defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
-defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>;
-defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
-defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>;
-defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
+def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
+def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
+def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
+def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
+def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
+def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
+def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
+def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
+
+def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
+def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
+def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
+def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
+def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
+def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
+def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
+def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
+
+def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
+def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
+def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
+def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
+def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
+def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
multiclass FLAT_Real_Atomics_vi <bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
defvar ps = !cast<FLAT_Pseudo>(NAME);
- defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
- defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
- def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
-}
-
-multiclass FLAT_Real_Atomics_vi_ex_gfx9 <bits<7> op,
- bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
- defvar ps = !cast<FLAT_Pseudo>(NAME);
- def _vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
- def _RTN_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
-
- def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
+ def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
+ def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
+ def _RTN_agpr_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
}
multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
FLAT_Real_AllAddr_vi<op, has_sccb> {
- defm _RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
- defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
-
- def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
- def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
-}
-
-multiclass FLAT_Global_Real_Atomics_vi_ex_gfx9<bits<7> op,
- bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
- FLAT_Real_AllAddr_vi_ex_gfx9<op, has_sccb> {
- def _RTN_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
- def _SADDR_RTN_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
+ def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
+ def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
- def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
- def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
+ def _RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
+ def _SADDR_RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
}
defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>;
@@ -2992,10 +2950,10 @@ let AssemblerPredicate = isGFX940Plus in {
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
- defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_ex_gfx9<0x4d>;
- defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_ex_gfx9<0x4e>;
- defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_ex_gfx9<0x52>;
- defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_ex_gfx9<0x52>;
+ defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>;
+ defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>;
+ defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>;
+ defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
} // End AssemblerPredicate = isGFX940Plus
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 3e6f35dbf5e5..703ec0a4befa 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -186,12 +186,8 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
O << " dlc";
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
- if (Imm & ~CPol::ALL_pregfx12) {
- if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI))
- O << " nv";
- else
- O << " /* unexpected cache policy bit */";
- }
+ if (Imm & ~CPol::ALL_pregfx12)
+ O << " /* unexpected cache policy bit */";
}
void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 6616b3041059..84984a0871da 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1129,40 +1129,11 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
return false;
- MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
return true;
}
- // TODO: Verify the following code handles subregisters correctly.
- // TODO: Handle extract of global reference
- if (UseOp.getSubReg())
- return false;
-
- if (!OpToFold.isReg())
- return false;
-
- Register UseReg = OpToFold.getReg();
- if (!UseReg.isVirtual())
- return false;
-
- // Maybe it is just a COPY of an immediate itself.
-
- // FIXME: Remove this handling. There is already special case folding of
- // immediate into copy in foldOperand. This is looking for the def of the
- // value the folding started from in the first place.
- MachineInstr *Def = MRI->getVRegDef(UseReg);
- if (Def && TII->isFoldableCopy(*Def)) {
- MachineOperand &DefOp = Def->getOperand(1);
- if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
- FoldableDef FoldableImm(DefOp.getImm(), OpToFold.DefRC,
- OpToFold.DefSubReg);
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, FoldableImm);
- return true;
- }
- }
-
return false;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index c89212dae72d..90a4723c9a3e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -756,6 +756,155 @@ LoongArchInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
return ArrayRef(TargetFlags);
}
+bool LoongArchInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
+ Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const {
+ enum MemIOffsetType {
+ Imm14Shift2,
+ Imm12,
+ Imm11Shift1,
+ Imm10Shift2,
+ Imm9Shift3,
+ Imm8,
+ Imm8Shift1,
+ Imm8Shift2,
+ Imm8Shift3
+ };
+
+ MemIOffsetType OT;
+ switch (MemI.getOpcode()) {
+ default:
+ return false;
+ case LoongArch::LDPTR_W:
+ case LoongArch::LDPTR_D:
+ case LoongArch::STPTR_W:
+ case LoongArch::STPTR_D:
+ OT = Imm14Shift2;
+ break;
+ case LoongArch::LD_B:
+ case LoongArch::LD_H:
+ case LoongArch::LD_W:
+ case LoongArch::LD_D:
+ case LoongArch::LD_BU:
+ case LoongArch::LD_HU:
+ case LoongArch::LD_WU:
+ case LoongArch::ST_B:
+ case LoongArch::ST_H:
+ case LoongArch::ST_W:
+ case LoongArch::ST_D:
+ case LoongArch::FLD_S:
+ case LoongArch::FLD_D:
+ case LoongArch::FST_S:
+ case LoongArch::FST_D:
+ case LoongArch::VLD:
+ case LoongArch::VST:
+ case LoongArch::XVLD:
+ case LoongArch::XVST:
+ case LoongArch::VLDREPL_B:
+ case LoongArch::XVLDREPL_B:
+ OT = Imm12;
+ break;
+ case LoongArch::VLDREPL_H:
+ case LoongArch::XVLDREPL_H:
+ OT = Imm11Shift1;
+ break;
+ case LoongArch::VLDREPL_W:
+ case LoongArch::XVLDREPL_W:
+ OT = Imm10Shift2;
+ break;
+ case LoongArch::VLDREPL_D:
+ case LoongArch::XVLDREPL_D:
+ OT = Imm9Shift3;
+ break;
+ case LoongArch::VSTELM_B:
+ case LoongArch::XVSTELM_B:
+ OT = Imm8;
+ break;
+ case LoongArch::VSTELM_H:
+ case LoongArch::XVSTELM_H:
+ OT = Imm8Shift1;
+ break;
+ case LoongArch::VSTELM_W:
+ case LoongArch::XVSTELM_W:
+ OT = Imm8Shift2;
+ break;
+ case LoongArch::VSTELM_D:
+ case LoongArch::XVSTELM_D:
+ OT = Imm8Shift3;
+ break;
+ }
+
+ if (MemI.getOperand(0).getReg() == Reg)
+ return false;
+
+ if ((AddrI.getOpcode() != LoongArch::ADDI_W &&
+ AddrI.getOpcode() != LoongArch::ADDI_D) ||
+ !AddrI.getOperand(1).isReg() || !AddrI.getOperand(2).isImm())
+ return false;
+
+ int64_t OldOffset = MemI.getOperand(2).getImm();
+ int64_t Disp = AddrI.getOperand(2).getImm();
+ int64_t NewOffset = OldOffset + Disp;
+ if (!STI.is64Bit())
+ NewOffset = SignExtend64<32>(NewOffset);
+
+ if (!(OT == Imm14Shift2 && isShiftedInt<14, 2>(NewOffset) && STI.hasUAL()) &&
+ !(OT == Imm12 && isInt<12>(NewOffset)) &&
+ !(OT == Imm11Shift1 && isShiftedInt<11, 1>(NewOffset)) &&
+ !(OT == Imm10Shift2 && isShiftedInt<10, 2>(NewOffset)) &&
+ !(OT == Imm9Shift3 && isShiftedInt<9, 3>(NewOffset)) &&
+ !(OT == Imm8 && isInt<8>(NewOffset)) &&
+ !(OT == Imm8Shift1 && isShiftedInt<8, 1>(NewOffset)) &&
+ !(OT == Imm8Shift2 && isShiftedInt<8, 2>(NewOffset)) &&
+ !(OT == Imm8Shift3 && isShiftedInt<8, 3>(NewOffset)))
+ return false;
+
+ AM.BaseReg = AddrI.getOperand(1).getReg();
+ AM.ScaledReg = 0;
+ AM.Scale = 0;
+ AM.Displacement = NewOffset;
+ AM.Form = ExtAddrMode::Formula::Basic;
+ return true;
+}
+
+MachineInstr *
+LoongArchInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const {
+ const DebugLoc &DL = MemI.getDebugLoc();
+ MachineBasicBlock &MBB = *MemI.getParent();
+
+ assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
+ "Addressing mode not supported for folding");
+
+ unsigned MemIOp = MemI.getOpcode();
+ switch (MemIOp) {
+ default:
+ return BuildMI(MBB, MemI, DL, get(MemIOp))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ case LoongArch::VSTELM_B:
+ case LoongArch::VSTELM_H:
+ case LoongArch::VSTELM_W:
+ case LoongArch::VSTELM_D:
+ case LoongArch::XVSTELM_B:
+ case LoongArch::XVSTELM_H:
+ case LoongArch::XVSTELM_W:
+ case LoongArch::XVSTELM_D:
+ return BuildMI(MBB, MemI, DL, get(MemIOp))
+ .addReg(MemI.getOperand(0).getReg(), 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement)
+ .addImm(MemI.getOperand(3).getImm())
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ }
+}
+
// Returns true if this is the sext.w pattern, addi.w rd, rs, 0.
bool LoongArch::isSEXT_W(const MachineInstr &MI) {
return MI.getOpcode() == LoongArch::ADDI_W && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index f25958a32bec..f69a558bdeca 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -93,6 +93,12 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const override;
+ MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const override;
+
protected:
const LoongArchSubtarget &STI;
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 9de4c9d83792..92a9388e5cb7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -62,6 +62,11 @@ static cl::opt<bool>
cl::desc("Enable the merge base offset pass"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableSinkFold("loongarch-enable-sink-fold",
+ cl::desc("Enable sinking and folding of instruction copies"),
+ cl::init(true), cl::Hidden);
+
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
return RM.value_or(Reloc::Static);
}
@@ -146,7 +151,9 @@ namespace {
class LoongArchPassConfig : public TargetPassConfig {
public:
LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ setEnableSinkAndFold(EnableSinkFold);
+ }
LoongArchTargetMachine &getLoongArchTargetMachine() const {
return getTM<LoongArchTargetMachine>();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c3f100e3197b..995ae75da1c3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16496,32 +16496,42 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
}
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
- unsigned ShY) {
+ unsigned ShY, bool AddX) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue X = N->getOperand(0);
SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getTargetConstant(ShY, DL, VT), X);
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getTargetConstant(ShX, DL, VT), Mul359);
+ DAG.getTargetConstant(ShX, DL, VT), AddX ? X : Mul359);
}
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt) {
+ // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
switch (MulAmt) {
case 5 * 3:
- return getShlAddShlAdd(N, DAG, 2, 1);
+ return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false);
case 9 * 3:
- return getShlAddShlAdd(N, DAG, 3, 1);
+ return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false);
case 5 * 5:
- return getShlAddShlAdd(N, DAG, 2, 2);
+ return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false);
case 9 * 5:
- return getShlAddShlAdd(N, DAG, 3, 2);
+ return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false);
case 9 * 9:
- return getShlAddShlAdd(N, DAG, 3, 3);
+ return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false);
default:
- return SDValue();
+ break;
}
+
+ // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
+ int ShX;
+ if (int ShY = isShifted359(MulAmt - 1, ShX)) {
+ assert(ShX != 0 && "MulAmt=4,6,10 handled before");
+ if (ShX <= 3)
+ return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true);
+ }
+ return SDValue();
}
// Try to expand a scalar multiply to a faster sequence.
@@ -16581,41 +16591,30 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(Shift, DL, VT));
}
- // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
- if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt))
- return V;
+ // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
+ // of 25 which happen to be quite common.
+ // (2/4/8 * 3/5/9 + 1) * 2^N
+ Shift = llvm::countr_zero(MulAmt);
+ if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
+ if (Shift == 0)
+ return V;
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
+ }
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
// shXadd. First check if this a sum of two power of 2s because that's
// easy. Then count how many zeros are up to the first bit.
- if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
- unsigned ScaleShift = llvm::countr_zero(MulAmt);
- if (ScaleShift >= 1 && ScaleShift < 4) {
- unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
- SDLoc DL(N);
- SDValue Shift1 =
- DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getTargetConstant(ScaleShift, DL, VT), Shift1);
- }
+ if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
+ unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
+ SDLoc DL(N);
+ SDValue Shift1 =
+ DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
+ return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getTargetConstant(Shift, DL, VT), Shift1);
}
- // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
- // This is the two instruction form, there are also three instruction
- // variants we could implement. e.g.
- // (2^(1,2,3) * 3,5,9 + 1) << C2
- // 2^(C1>3) * 3,5,9 +/- 1
- if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) {
- assert(Shift != 0 && "MulAmt=4,6,10 handled before");
- if (Shift <= 3) {
- SDLoc DL(N);
- SDValue Mul359 =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getTargetConstant(ShXAmount, DL, VT), X);
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getTargetConstant(Shift, DL, VT), X);
- }
- }
+ // TODO: 2^(C1>3) * 3,5,9 +/- 1
// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
@@ -16647,14 +16646,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
}
}
-
- // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
- // of 25 which happen to be quite common.
- Shift = llvm::countr_zero(MulAmt);
- if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
- SDLoc DL(N);
- return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
- }
}
if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 18c462ffd0ff..dd2cffd7bd16 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -77,17 +77,53 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10CHECK-NEXT: s_endpgm
;
-; GFX11CHECK-LABEL: sgpr_isnan_f16:
-; GFX11CHECK: ; %bb.0:
-; GFX11CHECK-NEXT: s_clause 0x1
-; GFX11CHECK-NEXT: s_load_b32 s2, s[4:5], 0x2c
-; GFX11CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0
-; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3
-; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11CHECK-NEXT: s_endpgm
+; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16:
+; GFX11SELDAG-TRUE16: ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT: s_clause 0x1
+; GFX11SELDAG-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX11SELDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11SELDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0
+; GFX11SELDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s2, v0.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11SELDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11SELDAG-FAKE16-LABEL: sgpr_isnan_f16:
+; GFX11SELDAG-FAKE16: ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT: s_clause 0x1
+; GFX11SELDAG-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX11SELDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11SELDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11SELDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s2, 3
+; GFX11SELDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11SELDAG-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11SELDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16:
+; GFX11GLISEL-TRUE16: ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT: s_clause 0x1
+; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0
+; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s2, v0.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11GLISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11GLISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16:
+; GFX11GLISEL-FAKE16: ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT: s_clause 0x1
+; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX11GLISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s2, 3
+; GFX11GLISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11GLISEL-FAKE16-NEXT: s_endpgm
%result = call i1 @llvm.is.fpclass.f16(half %x, i32 3)
%sext = sext i1 %result to i32
store i32 %sext, ptr addrspace(1) %out, align 4
@@ -212,8 +248,9 @@ define i1 @snan_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: snan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 1
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 1
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: snan_f16:
@@ -226,8 +263,9 @@ define i1 @snan_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: snan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 1
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 1
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: snan_f16:
@@ -285,8 +323,9 @@ define i1 @qnan_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: qnan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 2
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 2
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: qnan_f16:
@@ -299,8 +338,9 @@ define i1 @qnan_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: qnan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 2
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 2
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: qnan_f16:
@@ -358,8 +398,9 @@ define i1 @posinf_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: posinf_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x200
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x200
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: posinf_f16:
@@ -372,8 +413,9 @@ define i1 @posinf_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: posinf_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x200
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x200
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: posinf_f16:
@@ -429,8 +471,9 @@ define i1 @neginf_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: neginf_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 4
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 4
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: neginf_f16:
@@ -443,8 +486,9 @@ define i1 @neginf_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: neginf_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 4
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 4
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: neginf_f16:
@@ -514,8 +558,9 @@ define i1 @posnormal_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: posnormal_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x100
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x100
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: posnormal_f16:
@@ -528,8 +573,9 @@ define i1 @posnormal_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: posnormal_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x100
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x100
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: posnormal_f16:
@@ -597,8 +643,9 @@ define i1 @negnormal_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: negnormal_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 8
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 8
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: negnormal_f16:
@@ -611,8 +658,9 @@ define i1 @negnormal_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: negnormal_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 8
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 8
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: negnormal_f16:
@@ -673,8 +721,9 @@ define i1 @possubnormal_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: possubnormal_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x80
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x80
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: possubnormal_f16:
@@ -687,8 +736,9 @@ define i1 @possubnormal_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: possubnormal_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x80
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x80
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: possubnormal_f16:
@@ -755,8 +805,9 @@ define i1 @negsubnormal_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: negsubnormal_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 16
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 16
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: negsubnormal_f16:
@@ -769,8 +820,9 @@ define i1 @negsubnormal_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: negsubnormal_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 16
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 16
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: negsubnormal_f16:
@@ -824,8 +876,9 @@ define i1 @poszero_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: poszero_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 64
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 64
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: poszero_f16:
@@ -838,8 +891,9 @@ define i1 @poszero_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: poszero_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 64
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 64
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: poszero_f16:
@@ -895,8 +949,9 @@ define i1 @negzero_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: negzero_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 32
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 32
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: negzero_f16:
@@ -909,8 +964,9 @@ define i1 @negzero_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: negzero_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 32
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 32
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: negzero_f16:
@@ -968,8 +1024,9 @@ define i1 @posfinite_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: posfinite_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1c0
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1c0
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: posfinite_f16:
@@ -982,8 +1039,9 @@ define i1 @posfinite_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: posfinite_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1c0
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1c0
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: posfinite_f16:
@@ -1047,8 +1105,9 @@ define i1 @negfinite_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: negfinite_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 56
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 56
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: negfinite_f16:
@@ -1061,8 +1120,9 @@ define i1 @negfinite_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: negfinite_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 56
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 56
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: negfinite_f16:
@@ -1120,8 +1180,9 @@ define i1 @isnan_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: isnan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 3
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: isnan_f16:
@@ -1134,8 +1195,9 @@ define i1 @isnan_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: isnan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 3
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isnan_f16:
@@ -1195,8 +1257,9 @@ define i1 @not_isnan_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_isnan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3fc
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3fc
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_isnan_f16:
@@ -1209,8 +1272,9 @@ define i1 @not_isnan_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_isnan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3fc
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3fc
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_isnan_f16:
@@ -1336,11 +1400,13 @@ define <2 x i1> @isnan_v2f16(<2 x half> %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: isnan_v2f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.h, 3
+; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v1, 3 :: v_dual_mov_b32 v2, 3
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.h, v3.l
; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v2
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isnan_v2f16:
@@ -1499,13 +1565,17 @@ define <3 x i1> @isnan_v3f16(<3 x half> %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: isnan_v3f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.h, 3
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v1.l, 3
-; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v3
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 3
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v4, 3
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v2.l
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.h, v3.l
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v4
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v1.l, v5.l
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, v3
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isnan_v3f16:
@@ -1693,16 +1763,20 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: isnan_v4f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.h, 3
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v1.l, 3
+; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 3
+; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v4, 3 :: v_dual_mov_b32 v5, 3
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v2.l
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.l
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.h, v3.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v1.l, v6.l
; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v4
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v1.h, 3
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v1.h, v7.l
; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, v5
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isnan_v4f16:
@@ -1771,8 +1845,9 @@ define i1 @isnan_f16_strictfp(half %x) strictfp nounwind {
; GFX11SELDAG-TRUE16-LABEL: isnan_f16_strictfp:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 3
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: isnan_f16_strictfp:
@@ -1785,8 +1860,9 @@ define i1 @isnan_f16_strictfp(half %x) strictfp nounwind {
; GFX11GLISEL-TRUE16-LABEL: isnan_f16_strictfp:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 3
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 3
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isnan_f16_strictfp:
@@ -1846,8 +1922,9 @@ define i1 @isinf_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: isinf_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x204
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x204
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: isinf_f16:
@@ -1860,8 +1937,9 @@ define i1 @isinf_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: isinf_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x204
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x204
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isinf_f16:
@@ -1921,8 +1999,9 @@ define i1 @isfinite_f16(half %x) nounwind {
; GFX11SELDAG-TRUE16-LABEL: isfinite_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1f8
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1f8
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: isfinite_f16:
@@ -1935,8 +2014,9 @@ define i1 @isfinite_f16(half %x) nounwind {
; GFX11GLISEL-TRUE16-LABEL: isfinite_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1f8
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1f8
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isfinite_f16:
@@ -1994,8 +2074,9 @@ define i1 @issubnormal_or_zero_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: issubnormal_or_zero_f16:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0xf0
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0xf0
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: issubnormal_or_zero_f16:
@@ -2008,8 +2089,9 @@ define i1 @issubnormal_or_zero_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: issubnormal_or_zero_f16:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0xf0
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0xf0
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: issubnormal_or_zero_f16:
@@ -2074,8 +2156,9 @@ define i1 @not_issubnormal_or_zero_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_issubnormal_or_zero_f16:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x30f
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x30f
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_issubnormal_or_zero_f16:
@@ -2088,8 +2171,9 @@ define i1 @not_issubnormal_or_zero_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_issubnormal_or_zero_f16:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x30f
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x30f
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_issubnormal_or_zero_f16:
@@ -2153,8 +2237,9 @@ define i1 @isnormal_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: isnormal_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x108
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x108
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: isnormal_f16:
@@ -2167,8 +2252,9 @@ define i1 @isnormal_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: isnormal_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x108
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x108
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isnormal_f16:
@@ -2236,8 +2322,9 @@ define i1 @not_isnormal_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_isnormal_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x2f7
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x2f7
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_isnormal_f16:
@@ -2250,8 +2337,9 @@ define i1 @not_isnormal_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_isnormal_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x2f7
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x2f7
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_isnormal_f16:
@@ -2330,8 +2418,9 @@ define i1 @not_is_plus_normal_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_is_plus_normal_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x2ff
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x2ff
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_is_plus_normal_f16:
@@ -2344,8 +2433,9 @@ define i1 @not_is_plus_normal_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_is_plus_normal_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x2ff
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x2ff
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_is_plus_normal_f16:
@@ -2424,8 +2514,9 @@ define i1 @not_is_neg_normal_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_is_neg_normal_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3f7
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3f7
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_is_neg_normal_f16:
@@ -2438,8 +2529,9 @@ define i1 @not_is_neg_normal_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_is_neg_normal_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3f7
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3f7
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_is_neg_normal_f16:
@@ -2501,8 +2593,9 @@ define i1 @issubnormal_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: issubnormal_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x90
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x90
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: issubnormal_f16:
@@ -2515,8 +2608,9 @@ define i1 @issubnormal_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: issubnormal_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x90
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x90
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: issubnormal_f16:
@@ -2586,8 +2680,9 @@ define i1 @not_issubnormal_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_issubnormal_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x36f
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x36f
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_issubnormal_f16:
@@ -2600,8 +2695,9 @@ define i1 @not_issubnormal_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_issubnormal_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x36f
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x36f
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_issubnormal_f16:
@@ -2659,8 +2755,9 @@ define i1 @iszero_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: iszero_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x60
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x60
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: iszero_f16:
@@ -2673,8 +2770,9 @@ define i1 @iszero_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: iszero_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x60
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x60
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: iszero_f16:
@@ -2745,8 +2843,9 @@ define i1 @not_iszero_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_iszero_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39f
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39f
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_iszero_f16:
@@ -2759,8 +2858,9 @@ define i1 @not_iszero_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_iszero_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39f
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39f
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_iszero_f16:
@@ -2818,8 +2918,9 @@ define i1 @ispositive_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: ispositive_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3c0
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3c0
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: ispositive_f16:
@@ -2832,8 +2933,9 @@ define i1 @ispositive_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: ispositive_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3c0
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3c0
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: ispositive_f16:
@@ -2907,8 +3009,9 @@ define i1 @not_ispositive_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_ispositive_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 63
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 63
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_ispositive_f16:
@@ -2921,8 +3024,9 @@ define i1 @not_ispositive_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_ispositive_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 63
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 63
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_ispositive_f16:
@@ -2992,8 +3096,9 @@ define i1 @isnegative_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: isnegative_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 60
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 60
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: isnegative_f16:
@@ -3006,8 +3111,9 @@ define i1 @isnegative_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: isnegative_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 60
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 60
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isnegative_f16:
@@ -3074,8 +3180,9 @@ define i1 @not_isnegative_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_isnegative_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3c3
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3c3
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_isnegative_f16:
@@ -3088,8 +3195,9 @@ define i1 @not_isnegative_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_isnegative_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x3c3
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x3c3
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_isnegative_f16:
@@ -3152,8 +3260,9 @@ define i1 @iszero_or_nan_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f16:
@@ -3166,8 +3275,9 @@ define i1 @iszero_or_nan_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: iszero_or_nan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: iszero_or_nan_f16:
@@ -3231,8 +3341,9 @@ define i1 @iszero_or_nan_f_daz(half %x) #0 {
; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f_daz:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f_daz:
@@ -3245,8 +3356,9 @@ define i1 @iszero_or_nan_f_daz(half %x) #0 {
; GFX11GLISEL-TRUE16-LABEL: iszero_or_nan_f_daz:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: iszero_or_nan_f_daz:
@@ -3310,8 +3422,9 @@ define i1 @iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f_maybe_daz:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f_maybe_daz:
@@ -3324,8 +3437,9 @@ define i1 @iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX11GLISEL-TRUE16-LABEL: iszero_or_nan_f_maybe_daz:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x63
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x63
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: iszero_or_nan_f_maybe_daz:
@@ -3398,8 +3512,9 @@ define i1 @not_iszero_or_nan_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f16:
@@ -3412,8 +3527,9 @@ define i1 @not_iszero_or_nan_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_nan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_nan_f16:
@@ -3486,8 +3602,9 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f_daz:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f_daz:
@@ -3500,8 +3617,9 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_nan_f_daz:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_nan_f_daz:
@@ -3574,8 +3692,9 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f_maybe_daz:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f_maybe_daz:
@@ -3588,8 +3707,9 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_nan_f_maybe_daz:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39c
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39c
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_nan_f_maybe_daz:
@@ -3653,8 +3773,9 @@ define i1 @iszero_or_qnan_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: iszero_or_qnan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x62
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x62
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: iszero_or_qnan_f16:
@@ -3667,8 +3788,9 @@ define i1 @iszero_or_qnan_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: iszero_or_qnan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x62
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x62
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: iszero_or_qnan_f16:
@@ -3737,8 +3859,9 @@ define i1 @iszero_or_snan_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: iszero_or_snan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x61
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x61
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: iszero_or_snan_f16:
@@ -3751,8 +3874,9 @@ define i1 @iszero_or_snan_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: iszero_or_snan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x61
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x61
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: iszero_or_snan_f16:
@@ -3841,8 +3965,9 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_qnan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39d
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39d
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_qnan_f16:
@@ -3855,8 +3980,9 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_qnan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39d
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39d
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_qnan_f16:
@@ -3942,8 +4068,9 @@ define i1 @not_iszero_or_snan_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_snan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39e
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39e
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_snan_f16:
@@ -3956,8 +4083,9 @@ define i1 @not_iszero_or_snan_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_snan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x39e
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x39e
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_snan_f16:
@@ -4018,8 +4146,9 @@ define i1 @isinf_or_nan_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: isinf_or_nan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x207
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x207
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: isinf_or_nan_f16:
@@ -4032,8 +4161,9 @@ define i1 @isinf_or_nan_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: isinf_or_nan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x207
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x207
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isinf_or_nan_f16:
@@ -4094,8 +4224,9 @@ define i1 @not_isinf_or_nan_f16(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_isinf_or_nan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1f8
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1f8
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_isinf_or_nan_f16:
@@ -4108,8 +4239,9 @@ define i1 @not_isinf_or_nan_f16(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_isinf_or_nan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1f8
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1f8
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_isinf_or_nan_f16:
@@ -4170,8 +4302,9 @@ define i1 @isfinite_or_nan_f(half %x) {
; GFX11SELDAG-TRUE16-LABEL: isfinite_or_nan_f:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1fb
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1fb
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: isfinite_or_nan_f:
@@ -4184,8 +4317,9 @@ define i1 @isfinite_or_nan_f(half %x) {
; GFX11GLISEL-TRUE16-LABEL: isfinite_or_nan_f:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x1fb
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x1fb
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: isfinite_or_nan_f:
@@ -4246,8 +4380,9 @@ define i1 @not_isfinite_or_nan_f(half %x) {
; GFX11SELDAG-TRUE16-LABEL: not_isfinite_or_nan_f:
; GFX11SELDAG-TRUE16: ; %bb.0: ; %entry
; GFX11SELDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x204
-; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0x204
+; GFX11SELDAG-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11SELDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11SELDAG-FAKE16-LABEL: not_isfinite_or_nan_f:
@@ -4260,8 +4395,9 @@ define i1 @not_isfinite_or_nan_f(half %x) {
; GFX11GLISEL-TRUE16-LABEL: not_isfinite_or_nan_f:
; GFX11GLISEL-TRUE16: ; %bb.0: ; %entry
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e64 s0, v0.l, 0x204
-; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x204
+; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11GLISEL-FAKE16-LABEL: not_isfinite_or_nan_f:
diff --git a/llvm/test/CodeGen/AMDGPU/true16-fold.mir b/llvm/test/CodeGen/AMDGPU/true16-fold.mir
index 9484417e63c9..6706de13bb89 100644
--- a/llvm/test/CodeGen/AMDGPU/true16-fold.mir
+++ b/llvm/test/CodeGen/AMDGPU/true16-fold.mir
@@ -48,7 +48,9 @@ body: |
; CHECK-LABEL: name: sgpr_lo16
; CHECK: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_ALIGNBIT_B32_t16_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, [[DEF]], 0, killed [[DEF1]], 0, 30, 0, 0, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 30
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_16 = COPY [[S_MOV_B32_]]
+ ; CHECK-NEXT: [[V_ALIGNBIT_B32_t16_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, [[DEF]], 0, killed [[DEF1]], 0, killed [[COPY]], 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_t16_e64_]]
%0:sreg_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
@@ -232,3 +234,34 @@ body: |
$vgpr0 = COPY %3
S_ENDPGM 0, implicit $vgpr0
...
+
+# Make sure the immediate materialized by the v_mov_b16 isn't
+# incorrectly folded into the bfi as 0.
+
+# FIXME: %4:vgpr_32 = COPY %3 is a direct copy from v16 to v32 and
+# should probably fail the verifier
+---
+name: mov_v16_copy_v32_fold_b32_regression
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: mov_v16_copy_v32_fold_b32_regression
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[V_MOV_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, 15360, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B16_t16_e64_]]
+ ; CHECK-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 32767, [[COPY2]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_BFI_B32_e64_]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr_32 = COPY $vgpr1
+ %1:vgpr_32 = COPY $vgpr0
+ %3:vgpr_16 = V_MOV_B16_t16_e64 0, 15360, 0, implicit $exec
+ %4:vgpr_32 = COPY %3
+ %5:vgpr_32 = V_BFI_B32_e64 32767, %4, %1, implicit $exec
+ $vgpr0 = COPY %5
+ SI_RETURN implicit $vgpr0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll b/llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll
new file mode 100644
index 000000000000..0bebb5849ed8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s
+
+; Make sure that the 16-bit constant 0x3c00 isn't folded as 0 into
+; v_bfi_b32.
+define i32 @mov16_bfi_fold_regression(half %arg, i32 %arg1) {
+; CHECK-LABEL: bfi_fold_regression:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b16_e32 v2.l, 0x3c00
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; CHECK-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0
+; CHECK-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_pack_b32_f16 v0, v0.l, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %cmp = icmp eq i32 %arg1, 0
+ %call = call half @llvm.copysign.f16(half 0xH3C00, half %arg)
+ %select = select i1 %cmp, half 0xH3C00, half %call
+ %insertelement = insertelement <2 x half> zeroinitializer, half %select, i64 0
+ %bitcast = bitcast <2 x half> %insertelement to i32
+ ret i32 %bitcast
+}
+
+declare half @llvm.copysign.f16(half, half) #0
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/ARM/ldexp-fp128.ll b/llvm/test/CodeGen/ARM/ldexp-fp128.ll
new file mode 100644
index 000000000000..93fcd39e824f
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ldexp-fp128.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=armv7-unknown-linux < %s | FileCheck -check-prefix=LINUX %s
+
+define fp128 @testExpl(fp128 %val, i32 %a) {
+; LINUX-LABEL: testExpl:
+; LINUX: @ %bb.0:
+; LINUX-NEXT: push {r11, lr}
+; LINUX-NEXT: sub sp, sp, #8
+; LINUX-NEXT: ldr r12, [sp, #16]
+; LINUX-NEXT: str r12, [sp]
+; LINUX-NEXT: bl ldexpl
+; LINUX-NEXT: add sp, sp, #8
+; LINUX-NEXT: pop {r11, pc}
+ %call = tail call fp128 @ldexpl(fp128 %val, i32 %a)
+ ret fp128 %call
+}
+
+declare fp128 @ldexpl(fp128, i32) memory(none)
+
+define fp128 @test_ldexp_f128_i32(fp128 %val, i32 %a) {
+; LINUX-LABEL: test_ldexp_f128_i32:
+; LINUX: @ %bb.0:
+; LINUX-NEXT: push {r11, lr}
+; LINUX-NEXT: sub sp, sp, #8
+; LINUX-NEXT: ldr r12, [sp, #16]
+; LINUX-NEXT: str r12, [sp]
+; LINUX-NEXT: bl ldexpl
+; LINUX-NEXT: add sp, sp, #8
+; LINUX-NEXT: pop {r11, pc}
+ %call = tail call fp128 @llvm.ldexp.f128.i32(fp128 %val, i32 %a)
+ ret fp128 %call
+}
+
+define <2 x fp128> @test_ldexp_v2f128_v2i32(<2 x fp128> %val, <2 x i32> %a) {
+; LINUX-LABEL: test_ldexp_v2f128_v2i32:
+; LINUX: @ %bb.0:
+; LINUX-NEXT: push {r4, r5, r6, lr}
+; LINUX-NEXT: vpush {d8}
+; LINUX-NEXT: sub sp, sp, #8
+; LINUX-NEXT: mov r5, r3
+; LINUX-NEXT: add r3, sp, #40
+; LINUX-NEXT: mov r6, r2
+; LINUX-NEXT: mov r4, r0
+; LINUX-NEXT: ldm r3, {r0, r1, r2, r3}
+; LINUX-NEXT: vldr d8, [sp, #56]
+; LINUX-NEXT: vst1.32 {d8[1]}, [sp:32]
+; LINUX-NEXT: bl ldexpl
+; LINUX-NEXT: ldr r12, [sp, #32]
+; LINUX-NEXT: vst1.32 {d8[0]}, [sp:32]
+; LINUX-NEXT: ldr lr, [sp, #36]
+; LINUX-NEXT: str r0, [r4, #16]
+; LINUX-NEXT: mov r0, r6
+; LINUX-NEXT: str r1, [r4, #20]
+; LINUX-NEXT: mov r1, r5
+; LINUX-NEXT: str r2, [r4, #24]
+; LINUX-NEXT: mov r2, r12
+; LINUX-NEXT: str r3, [r4, #28]
+; LINUX-NEXT: mov r3, lr
+; LINUX-NEXT: bl ldexpl
+; LINUX-NEXT: stm r4, {r0, r1, r2, r3}
+; LINUX-NEXT: add sp, sp, #8
+; LINUX-NEXT: vpop {d8}
+; LINUX-NEXT: pop {r4, r5, r6, pc}
+ %call = tail call <2 x fp128> @llvm.ldexp.v2f128.v2i32(<2 x fp128> %val, <2 x i32> %a)
+ ret <2 x fp128> %call
+}
diff --git a/llvm/test/CodeGen/LoongArch/ldptr.ll b/llvm/test/CodeGen/LoongArch/ldptr.ll
index c3656a6bdafb..9bafa10c47e3 100644
--- a/llvm/test/CodeGen/LoongArch/ldptr.ll
+++ b/llvm/test/CodeGen/LoongArch/ldptr.ll
@@ -24,8 +24,7 @@ define signext i32 @ldptr_w(ptr %p) nounwind {
; LA32-LABEL: ldptr_w:
; LA32: # %bb.0: # %entry
; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 1
-; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: ldptr_w:
@@ -81,10 +80,9 @@ entry:
define i64 @ldptr_d(ptr %p) nounwind {
; LA32-LABEL: ldptr_d:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a1, $a0, 1
-; LA32-NEXT: ld.w $a0, $a1, 0
-; LA32-NEXT: ld.w $a1, $a1, 4
+; LA32-NEXT: addi.w $a1, $a0, 2047
+; LA32-NEXT: ld.w $a0, $a1, 1
+; LA32-NEXT: ld.w $a1, $a1, 5
; LA32-NEXT: ret
;
; LA64-LABEL: ldptr_d:
diff --git a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
index 9a806a12f7de..93f73e5cd30f 100644
--- a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
+++ b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
@@ -25,14 +25,13 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a1, $a0, 4
; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 8
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB0_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -45,8 +44,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: ld.w $a0, $s2, 4
-; LA32-NEXT: ld.w $a1, $s2, 0
+; LA32-NEXT: ld.w $a0, $s2, 12
+; LA32-NEXT: ld.w $a1, $s2, 8
; LA32-NEXT: add.w $a0, $a0, $s6
; LA32-NEXT: add.w $s3, $a1, $s3
; LA32-NEXT: sltu $a1, $s3, $a1
@@ -63,8 +62,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s3, $zero
; LA32-NEXT: move $s6, $zero
; LA32-NEXT: .LBB0_4: # %for.cond.cleanup
-; LA32-NEXT: st.w $s3, $s2, 0
-; LA32-NEXT: st.w $s6, $s2, 4
+; LA32-NEXT: st.w $s3, $s2, 8
+; LA32-NEXT: st.w $s6, $s2, 12
; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
@@ -88,8 +87,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $s0, $a1
; LA64-NEXT: slli.d $a1, $a0, 4
; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 8
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $s0, .LBB0_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -100,7 +98,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: ld.d $a0, $s1, 0
+; LA64-NEXT: ld.d $a0, $s1, 8
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: add.d $s2, $a0, $s2
; LA64-NEXT: bnez $s0, .LBB0_2
@@ -108,7 +106,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB0_3:
; LA64-NEXT: move $s2, $zero
; LA64-NEXT: .LBB0_4: # %for.cond.cleanup
-; LA64-NEXT: st.d $s2, $s1, 0
+; LA64-NEXT: st.d $s2, $s1, 8
; LA64-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
@@ -153,14 +151,13 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a1, $a0, 4
; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 16
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB1_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -172,7 +169,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: fld.s $fa0, $s2, 0
+; LA32-NEXT: fld.s $fa0, $s2, 16
; LA32-NEXT: addi.w $s3, $s3, 1
; LA32-NEXT: sltui $a0, $s3, 1
; LA32-NEXT: add.w $s4, $s4, $a0
@@ -185,7 +182,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: .LBB1_3:
; LA32-NEXT: movgr2fr.w $fs0, $zero
; LA32-NEXT: .LBB1_4: # %for.cond.cleanup
-; LA32-NEXT: fst.s $fs0, $s2, 0
+; LA32-NEXT: fst.s $fs0, $s2, 16
; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
@@ -208,8 +205,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $s0, $a1
; LA64-NEXT: slli.d $a1, $a0, 4
; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 16
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $s0, .LBB1_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -220,7 +216,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: fld.s $fa0, $s1, 0
+; LA64-NEXT: fld.s $fa0, $s1, 16
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: fadd.s $fs0, $fa0, $fs0
; LA64-NEXT: bnez $s0, .LBB1_2
@@ -228,7 +224,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB1_3:
; LA64-NEXT: movgr2fr.w $fs0, $zero
; LA64-NEXT: .LBB1_4: # %for.cond.cleanup
-; LA64-NEXT: fst.s $fs0, $s1, 0
+; LA64-NEXT: fst.s $fs0, $s1, 16
; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
@@ -271,14 +267,13 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s0, $a3
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a0, $a0, 6
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 16
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB2_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -291,7 +286,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: vld $vr0, $s2, 0
+; LA32-NEXT: vld $vr0, $s2, 16
; LA32-NEXT: addi.w $s3, $s3, 1
; LA32-NEXT: sltui $a0, $s3, 1
; LA32-NEXT: add.w $s4, $s4, $a0
@@ -307,7 +302,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: .LBB2_3:
; LA32-NEXT: vrepli.b $vr0, 0
; LA32-NEXT: .LBB2_4: # %for.cond.cleanup
-; LA32-NEXT: vst $vr0, $s2, 0
+; LA32-NEXT: vst $vr0, $s2, 16
; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
@@ -326,8 +321,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
; LA64-NEXT: slli.d $a0, $a0, 6
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 16
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $a1, .LBB2_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -340,7 +334,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: vld $vr0, $s1, 0
+; LA64-NEXT: vld $vr0, $s1, 16
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
; LA64-NEXT: vadd.w $vr1, $vr0, $vr1
@@ -351,7 +345,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB2_3:
; LA64-NEXT: vrepli.b $vr0, 0
; LA64-NEXT: .LBB2_4: # %for.cond.cleanup
-; LA64-NEXT: vst $vr0, $s1, 0
+; LA64-NEXT: vst $vr0, $s1, 16
; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -393,14 +387,13 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s0, $a3
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a0, $a0, 6
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 32
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB3_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -413,7 +406,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: xvld $xr0, $s2, 0
+; LA32-NEXT: xvld $xr0, $s2, 32
; LA32-NEXT: addi.w $s3, $s3, 1
; LA32-NEXT: sltui $a0, $s3, 1
; LA32-NEXT: add.w $s4, $s4, $a0
@@ -429,7 +422,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: .LBB3_3:
; LA32-NEXT: xvrepli.b $xr0, 0
; LA32-NEXT: .LBB3_4: # %for.cond.cleanup
-; LA32-NEXT: xvst $xr0, $s2, 0
+; LA32-NEXT: xvst $xr0, $s2, 32
; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload
@@ -448,8 +441,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
; LA64-NEXT: slli.d $a0, $a0, 6
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 32
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $a1, .LBB3_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -462,7 +454,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: xvld $xr0, $s1, 0
+; LA64-NEXT: xvld $xr0, $s1, 32
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
; LA64-NEXT: xvadd.h $xr1, $xr0, $xr1
@@ -473,7 +465,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB3_3:
; LA64-NEXT: xvrepli.b $xr0, 0
; LA64-NEXT: .LBB3_4: # %for.cond.cleanup
-; LA64-NEXT: xvst $xr0, $s1, 0
+; LA64-NEXT: xvst $xr0, $s1, 32
; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
@@ -516,14 +508,13 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a1, $a0, 4
; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 16
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB4_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -536,7 +527,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: vldrepl.b $vr0, $s2, 0
+; LA32-NEXT: vldrepl.b $vr0, $s2, 16
; LA32-NEXT: addi.w $s3, $s3, 1
; LA32-NEXT: sltui $a0, $s3, 1
; LA32-NEXT: add.w $s4, $s4, $a0
@@ -552,7 +543,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: .LBB4_3:
; LA32-NEXT: vrepli.b $vr0, 0
; LA32-NEXT: .LBB4_4: # %for.cond.cleanup
-; LA32-NEXT: vstelm.b $vr0, $s2, 0, 1
+; LA32-NEXT: vstelm.b $vr0, $s2, 16, 1
; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
@@ -573,8 +564,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $s0, $a1
; LA64-NEXT: slli.d $a1, $a0, 4
; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 16
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $s0, .LBB4_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -586,7 +576,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: vldrepl.b $vr0, $s1, 0
+; LA64-NEXT: vldrepl.b $vr0, $s1, 16
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
; LA64-NEXT: vadd.b $vr1, $vr0, $vr1
@@ -597,7 +587,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB4_3:
; LA64-NEXT: vrepli.b $vr0, 0
; LA64-NEXT: .LBB4_4: # %for.cond.cleanup
-; LA64-NEXT: vstelm.b $vr0, $s1, 0, 1
+; LA64-NEXT: vstelm.b $vr0, $s1, 16, 1
; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -643,14 +633,13 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a1, $a0, 4
; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 8
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB5_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -663,7 +652,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: xvldrepl.d $xr0, $s2, 0
+; LA32-NEXT: xvldrepl.d $xr0, $s2, 8
; LA32-NEXT: addi.w $s3, $s3, 1
; LA32-NEXT: sltui $a0, $s3, 1
; LA32-NEXT: add.w $s4, $s4, $a0
@@ -679,7 +668,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: .LBB5_3:
; LA32-NEXT: xvrepli.b $xr0, 0
; LA32-NEXT: .LBB5_4: # %for.cond.cleanup
-; LA32-NEXT: xvstelm.d $xr0, $s2, 0, 1
+; LA32-NEXT: xvstelm.d $xr0, $s2, 8, 1
; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload
@@ -700,8 +689,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $s0, $a1
; LA64-NEXT: slli.d $a1, $a0, 4
; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 8
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $s0, .LBB5_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -713,7 +701,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: xvldrepl.d $xr0, $s1, 0
+; LA64-NEXT: xvldrepl.d $xr0, $s1, 8
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
; LA64-NEXT: xvfadd.d $xr1, $xr0, $xr1
@@ -724,7 +712,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB5_3:
; LA64-NEXT: xvrepli.b $xr0, 0
; LA64-NEXT: .LBB5_4: # %for.cond.cleanup
-; LA64-NEXT: xvstelm.d $xr0, $s1, 0, 1
+; LA64-NEXT: xvstelm.d $xr0, $s1, 8, 1
; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/LoongArch/stptr.ll b/llvm/test/CodeGen/LoongArch/stptr.ll
index d70f9f4ba160..23b433aa1585 100644
--- a/llvm/test/CodeGen/LoongArch/stptr.ll
+++ b/llvm/test/CodeGen/LoongArch/stptr.ll
@@ -23,8 +23,7 @@ define void @stptr_w(ptr %p, i32 signext %val) nounwind {
; LA32-LABEL: stptr_w:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 1
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: stptr_w:
@@ -77,9 +76,8 @@ define void @stptr_d(ptr %p, i64 %val) nounwind {
; LA32-LABEL: stptr_d:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 1
-; LA32-NEXT: st.w $a2, $a0, 4
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a2, $a0, 5
+; LA32-NEXT: st.w $a1, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: stptr_d:
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
index 50bd22bf5fd6..f4964288e354 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
@@ -205,12 +205,19 @@ define i64 @addmul20(i64 %a, i64 %b) {
}
define i64 @addmul22(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul22:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 22
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul22:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 22
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBA-LABEL: addmul22:
+; RV64XTHEADBA: # %bb.0:
+; RV64XTHEADBA-NEXT: th.addsl a2, a0, a0, 2
+; RV64XTHEADBA-NEXT: th.addsl a0, a0, a2, 1
+; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1
+; RV64XTHEADBA-NEXT: ret
%c = mul i64 %a, 22
%d = add i64 %c, %b
ret i64 %d
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 7fd76262d547..d4b228828c04 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -585,6 +585,33 @@ define i64 @addmul12(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul14(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul14:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a2, a0, 1
+; RV64I-NEXT: slli a0, a0, 4
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul14:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh1add a2, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul14:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 14
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @addmul18(i64 %a, i64 %b) {
; RV64I-LABEL: addmul18:
; RV64I: # %bb.0:
@@ -636,12 +663,26 @@ define i64 @addmul20(i64 %a, i64 %b) {
}
define i64 @addmul22(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul22:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 22
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul22:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 22
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul22:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a2, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul22:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 22
%d = add i64 %c, %b
ret i64 %d
@@ -672,6 +713,32 @@ define i64 @addmul24(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul26(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul26:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 26
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul26:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh1add a2, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul26:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 26
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @addmul36(i64 %a, i64 %b) {
; RV64I-LABEL: addmul36:
; RV64I: # %bb.0:
@@ -722,6 +789,58 @@ define i64 @addmul40(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul38(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul38:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 38
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul38:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a2, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul38:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 38
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
+define i64 @addmul42(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul42:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 42
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul42:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a2, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul42:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 42
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @addmul72(i64 %a, i64 %b) {
; RV64I-LABEL: addmul72:
; RV64I: # %bb.0:
@@ -747,6 +866,84 @@ define i64 @addmul72(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul74(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul74:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 74
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul74:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a2, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul74:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 74
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
+define i64 @addmul82(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul82:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 82
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul82:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a2, a0, a0
+; RV64ZBA-NEXT: sh3add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul82:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 82
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
+define i64 @addmul146(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul146:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 146
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul146:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a2, a0, a0
+; RV64ZBA-NEXT: sh3add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul146:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 146
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @mul50(i64 %a) {
; RV64I-LABEL: mul50:
; RV64I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
index d8e2b2c2bf58..305ab934e44a 100644
--- a/llvm/test/CodeGen/RISCV/zicond-opts.ll
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -263,3 +263,35 @@ define i64 @test_inv_and_eqz(i64 %f, i64 %x, i1 %cond) {
%7 = and i64 %6, %f
ret i64 %7
}
+
+define i32 @pr166596(i32 %conv.i, i1 %iszero) #0 {
+; RV32ZICOND-LABEL: pr166596:
+; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND-NEXT: andi a1, a1, 1
+; RV32ZICOND-NEXT: xori a0, a0, 1
+; RV32ZICOND-NEXT: zext.h a0, a0
+; RV32ZICOND-NEXT: clz a0, a0
+; RV32ZICOND-NEXT: addi a0, a0, 41
+; RV32ZICOND-NEXT: czero.nez a0, a0, a1
+; RV32ZICOND-NEXT: addi a0, a0, -9
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: pr166596:
+; RV64ZICOND: # %bb.0: # %entry
+; RV64ZICOND-NEXT: andi a1, a1, 1
+; RV64ZICOND-NEXT: xori a0, a0, 1
+; RV64ZICOND-NEXT: zext.h a0, a0
+; RV64ZICOND-NEXT: clz a0, a0
+; RV64ZICOND-NEXT: addi a0, a0, 9
+; RV64ZICOND-NEXT: czero.nez a0, a0, a1
+; RV64ZICOND-NEXT: addi a0, a0, -9
+; RV64ZICOND-NEXT: ret
+entry:
+ %not.i = xor i32 %conv.i, 1
+ %conv2.i = trunc i32 %not.i to i16
+ %conv22 = zext i16 %conv2.i to i64
+ %0 = call i64 @llvm.ctlz.i64(i64 %conv22, i1 false)
+ %cast = trunc i64 %0 to i32
+ %clzg = select i1 %iszero, i32 -9, i32 %cast
+ ret i32 %clzg
+}
diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s
index 78e4f86ec1b9..ff0dfb371bbb 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_err.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_err.s
@@ -674,46 +674,3 @@ v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
-// nv bit in FLAT instructions
-flat_load_ubyte v5, v[2:3] offset:4095 nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-flat_load_ubyte a5, v[2:3] offset:4095 nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-flat_store_dword v[2:3], v5 offset:4095 nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-flat_store_dword v[2:3], a5 offset:4095 nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-global_load_ubyte v5, v[2:3], off offset:-1 nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-global_store_byte v[2:3], v5, off offset:-1 nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-global_atomic_add v[2:3], v5, off nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-global_atomic_swap a1, v[2:3], a2, off glc nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-global_atomic_swap_x2 v[2:3], v[4:5], off nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-global_atomic_swap_x2 v[2:3], a[4:5], off nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-scratch_load_ubyte v5, off, s2 offset:-1 nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-scratch_load_ubyte a5, off, s2 offset:-1 nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-scratch_store_dword v2, v3, off nv
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
index 3af0d83fb305..c96a72ddc257 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
@@ -706,107 +706,107 @@ flat_load_short_d16_hi a5, v[2:3] offset:4095 glc
flat_load_short_d16_hi a5, v[2:3] offset:4095 slc
// GFX90A: flat_atomic_swap a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x01,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_swap a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x05,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_add a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x09,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_add a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_sub a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x0d,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_sub a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_smin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x11,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_smin a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_umin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x15,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_umin a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_smax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x19,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_smax a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_umax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x1d,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_umax a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_and a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x21,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_and a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_or a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x25,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_or a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_xor a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x29,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_xor a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_inc a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x2d,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_inc a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_dec a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x31,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_dec a0, v[2:3], a2 offset:4095 glc
// GFX90A: flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x81,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc ; encoding: [0xff,0x0f,0x85,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc
// GFX90A: flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x89,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x8d,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x91,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x95,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x99,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x9d,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa1,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa5,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa9,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xad,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xb1,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
// GFX90A: flat_atomic_swap v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x02,0x02,0x80,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx942_err.s b/llvm/test/MC/AMDGPU/gfx942_err.s
index dc51bab65aa0..fd59a01b34a0 100644
--- a/llvm/test/MC/AMDGPU/gfx942_err.s
+++ b/llvm/test/MC/AMDGPU/gfx942_err.s
@@ -125,31 +125,3 @@ global_load_dword v[2:3], off lds
scratch_load_dword v2, off lds
// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-// nv bit in FLAT instructions
-flat_load_ubyte v5, v[2:3] offset:4095 nv
-// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-flat_store_dword v[2:3], v5 offset:4095 nv
-// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-flat_atomic_add_f32 v[2:3], v5 nv
-// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-global_load_dword v2, v[2:3], off sc0 nv
-// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-global_store_dword v[2:3], v5 off sc0 nv
-// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-global_atomic_add_f64 v[0:1], v[2:3], off sc1 nv
-// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-global_atomic_swap v0, v[2:3], v5 off sc0 nv
-// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-scratch_load_lds_dword v2, off nv
-// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
-
-scratch_store_dword v2, v3, off nv
-// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s
index 7687c0a478bd..5cc3d2533a14 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s
@@ -24,18 +24,6 @@ flat_load_ubyte v5, v[1:2] offset:4095 glc
flat_load_ubyte v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05]
-flat_load_ubyte v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ubyte v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ubyte v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ubyte v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_sbyte v5, v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05]
@@ -60,18 +48,6 @@ flat_load_sbyte v5, v[1:2] offset:4095 glc
flat_load_sbyte v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05]
-flat_load_sbyte v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sbyte v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sbyte v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sbyte v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_ushort v5, v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05]
@@ -96,18 +72,6 @@ flat_load_ushort v5, v[1:2] offset:4095 glc
flat_load_ushort v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05]
-flat_load_ushort v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ushort v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ushort v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ushort v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_sshort v5, v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05]
@@ -132,18 +96,6 @@ flat_load_sshort v5, v[1:2] offset:4095 glc
flat_load_sshort v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05]
-flat_load_sshort v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sshort v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sshort v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sshort v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_dword v5, v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05]
@@ -168,18 +120,6 @@ flat_load_dword v5, v[1:2] offset:4095 glc
flat_load_dword v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05]
-flat_load_dword v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dword v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dword v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dword v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_dwordx2 v[5:6], v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05]
@@ -204,18 +144,6 @@ flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc
flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05]
-flat_load_dwordx2 v[5:6], v[1:2] nv
-// CHECK: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_dwordx3 v[5:7], v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05]
@@ -240,18 +168,6 @@ flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc
flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05]
-flat_load_dwordx3 v[5:7], v[1:2] nv
-// CHECK: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_dwordx4 v[5:8], v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05]
@@ -276,18 +192,6 @@ flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc
flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05]
-flat_load_dwordx4 v[5:8], v[1:2] nv
-// CHECK: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05]
-
flat_store_byte v[1:2], v2 offset:4095
// CHECK: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00]
@@ -312,18 +216,6 @@ flat_store_byte v[1:2], v2 offset:4095 glc
flat_store_byte v[1:2], v2 offset:4095 slc
// CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00]
-flat_store_byte v[1:2], v2 nv
-// CHECK: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_byte v[1:2], v2 offset:7 nv
-// CHECK: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_byte v[1:2], v2 offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_byte v[1:2], v2 offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00]
-
flat_store_byte_d16_hi v[1:2], v2 offset:4095
// CHECK: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00]
@@ -348,18 +240,6 @@ flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc
flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc
// CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00]
-flat_store_byte_d16_hi v[1:2], v2 nv
-// CHECK: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_byte_d16_hi v[1:2], v2 offset:7 nv
-// CHECK: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00]
-
flat_store_short v[1:2], v2 offset:4095
// CHECK: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00]
@@ -384,18 +264,6 @@ flat_store_short v[1:2], v2 offset:4095 glc
flat_store_short v[1:2], v2 offset:4095 slc
// CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00]
-flat_store_short v[1:2], v2 nv
-// CHECK: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_short v[1:2], v2 offset:7 nv
-// CHECK: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_short v[1:2], v2 offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_short v[1:2], v2 offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00]
-
flat_store_short_d16_hi v[1:2], v2 offset:4095
// CHECK: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00]
@@ -420,18 +288,6 @@ flat_store_short_d16_hi v[1:2], v2 offset:4095 glc
flat_store_short_d16_hi v[1:2], v2 offset:4095 slc
// CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00]
-flat_store_short_d16_hi v[1:2], v2 nv
-// CHECK: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_short_d16_hi v[1:2], v2 offset:7 nv
-// CHECK: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00]
-
flat_store_dword v[1:2], v2 offset:4095
// CHECK: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00]
@@ -456,18 +312,6 @@ flat_store_dword v[1:2], v2 offset:4095 glc
flat_store_dword v[1:2], v2 offset:4095 slc
// CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00]
-flat_store_dword v[1:2], v2 nv
-// CHECK: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dword v[1:2], v2 offset:7 nv
-// CHECK: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dword v[1:2], v2 offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dword v[1:2], v2 offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00]
-
flat_store_dwordx2 v[1:2], v[2:3] offset:4095
// CHECK: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00]
@@ -492,18 +336,6 @@ flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc
flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc
// CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00]
-flat_store_dwordx2 v[1:2], v[2:3] nv
-// CHECK: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv
-// CHECK: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00]
-
flat_store_dwordx3 v[1:2], v[2:4] offset:4095
// CHECK: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00]
@@ -528,18 +360,6 @@ flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc
flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc
// CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00]
-flat_store_dwordx3 v[1:2], v[2:4] nv
-// CHECK: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv
-// CHECK: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00]
-
flat_store_dwordx4 v[1:2], v[2:5] offset:4095
// CHECK: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00]
@@ -564,18 +384,6 @@ flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc
flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc
// CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00]
-flat_store_dwordx4 v[1:2], v[2:5] nv
-// CHECK: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv
-// CHECK: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00]
-
-flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00]
-
flat_load_ubyte_d16 v5, v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05]
@@ -600,18 +408,6 @@ flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc
flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05]
-flat_load_ubyte_d16 v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ubyte_d16 v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_ubyte_d16_hi v5, v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05]
@@ -636,18 +432,6 @@ flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc
flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05]
-flat_load_ubyte_d16_hi v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_sbyte_d16 v5, v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05]
@@ -672,18 +456,6 @@ flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc
flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05]
-flat_load_sbyte_d16 v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sbyte_d16 v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_sbyte_d16_hi v5, v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05]
@@ -708,18 +480,6 @@ flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc
flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05]
-flat_load_sbyte_d16_hi v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_short_d16 v5, v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05]
@@ -744,18 +504,6 @@ flat_load_short_d16 v5, v[1:2] offset:4095 glc
flat_load_short_d16 v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05]
-flat_load_short_d16 v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_short_d16 v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_short_d16 v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_short_d16 v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05]
-
flat_load_short_d16_hi v5, v[1:2] offset:4095
// CHECK: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05]
@@ -780,18 +528,6 @@ flat_load_short_d16_hi v5, v[1:2] offset:4095 glc
flat_load_short_d16_hi v5, v[1:2] offset:4095 slc
// CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05]
-flat_load_short_d16_hi v5, v[1:2] nv
-// CHECK: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_short_d16_hi v5, v[1:2] offset:7 nv
-// CHECK: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05]
-
-flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05]
-
flat_atomic_swap v[1:2], v2 offset:4095
// CHECK: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00]
@@ -816,18 +552,6 @@ flat_atomic_swap v0, v[1:2], v2 offset:4095 glc
flat_atomic_swap v[1:2], v2 offset:4095 slc
// CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00]
-flat_atomic_swap v[1:2], v2 nv
-// CHECK: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00]
-
-flat_atomic_swap v[1:2], v2 offset:7 nv
-// CHECK: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00]
-
-flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00]
-
-flat_atomic_swap v[1:2], v2 offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00]
-
flat_atomic_cmpswap v[1:2], v[2:3] offset:4095
// CHECK: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00]
@@ -852,18 +576,6 @@ flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc
flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc
// CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00]
-flat_atomic_cmpswap v[1:2], v[2:3] nv
-// CHECK: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00]
-
-flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv
-// CHECK: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00]
-
-flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00]
-
-flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00]
-
flat_atomic_add v[1:2], v2 offset:4095
// CHECK: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00]
@@ -888,18 +600,6 @@ flat_atomic_add v0, v[1:2], v2 offset:4095 glc
flat_atomic_add v[1:2], v2 offset:4095 slc
// CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00]
-flat_atomic_add v[1:2], v2 nv
-// CHECK: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00]
-
-flat_atomic_add v[1:2], v2 offset:7 nv
-// CHECK: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00]
-
-flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv
-// CHECK: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00]
-
-flat_atomic_add v[1:2], v2 offset:4095 slc nv
-// CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00]
-
flat_atomic_sub v[1:2], v2 offset:4095
// CHECK: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00]
@@ -1497,18 +1197,6 @@ global_load_ubyte v5, v1, s[4:5] offset:-1 glc
global_load_ubyte v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x04,0x05]
-global_load_ubyte v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ubyte v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05]
-
global_load_sbyte v5, v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x04,0x05]
@@ -1554,18 +1242,6 @@ global_load_sbyte v5, v1, s[4:5] offset:-1 glc
global_load_sbyte v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x04,0x05]
-global_load_sbyte v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sbyte v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05]
-
global_load_ushort v5, v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x04,0x05]
@@ -1611,18 +1287,6 @@ global_load_ushort v5, v1, s[4:5] offset:-1 glc
global_load_ushort v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x04,0x05]
-global_load_ushort v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ushort v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ushort v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ushort v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05]
-
global_load_sshort v5, v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x04,0x05]
@@ -1668,18 +1332,6 @@ global_load_sshort v5, v1, s[4:5] offset:-1 glc
global_load_sshort v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x04,0x05]
-global_load_sshort v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sshort v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sshort v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sshort v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05]
-
global_load_dword v5, v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x04,0x05]
@@ -1725,18 +1377,6 @@ global_load_dword v5, v1, s[4:5] offset:-1 glc
global_load_dword v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x04,0x05]
-global_load_dword v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_dword v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_dword v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_dword v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05]
-
global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x04,0x05]
@@ -1782,18 +1422,6 @@ global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc
global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x04,0x05]
-global_load_dwordx2 v[5:6], v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05]
-
global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x04,0x05]
@@ -1839,15 +1467,6 @@ global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc
global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x04,0x05]
-global_load_dwordx3 v[5:7], v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05]
-global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05]
-global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05]
-global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05]
-
global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x04,0x05]
@@ -1893,15 +1512,6 @@ global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc
global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x04,0x05]
-global_load_dwordx4 v[5:8], v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05]
-global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05]
-global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05]
-global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05]
-
global_store_byte v1, v2, s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x06,0x00]
@@ -1947,18 +1557,6 @@ global_store_byte v1, v2, s[6:7] offset:-1 glc
global_store_byte v1, v2, s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x06,0x00]
-global_store_byte v1, v2, s[6:7] nv
-// CHECK: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_byte v1, v2, s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_byte v1, v2, s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_byte v1, v2, s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00]
-
global_store_byte_d16_hi v1, v2, s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x06,0x00]
@@ -2004,18 +1602,6 @@ global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc
global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x06,0x00]
-global_store_byte_d16_hi v1, v2, s[6:7] nv
-// CHECK: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00]
-
global_store_short v1, v2, s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x06,0x00]
@@ -2061,18 +1647,6 @@ global_store_short v1, v2, s[6:7] offset:-1 glc
global_store_short v1, v2, s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x06,0x00]
-global_store_short v1, v2, s[6:7] nv
-// CHECK: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_short v1, v2, s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_short v1, v2, s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_short v1, v2, s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00]
-
global_store_short_d16_hi v1, v2, s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x06,0x00]
@@ -2118,18 +1692,6 @@ global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc
global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x06,0x00]
-global_store_short_d16_hi v1, v2, s[6:7] nv
-// CHECK: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00]
-
global_store_dword v1, v2, s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x06,0x00]
@@ -2175,18 +1737,6 @@ global_store_dword v1, v2, s[6:7] offset:-1 glc
global_store_dword v1, v2, s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x06,0x00]
-global_store_dword v1, v2, s[6:7] nv
-// CHECK: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dword v1, v2, s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dword v1, v2, s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dword v1, v2, s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00]
-
global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x06,0x00]
@@ -2232,18 +1782,6 @@ global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc
global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x06,0x00]
-global_store_dwordx2 v1, v[2:3], s[6:7] nv
-// CHECK: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00]
-
global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x06,0x00]
@@ -2289,18 +1827,6 @@ global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc
global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x06,0x00]
-global_store_dwordx3 v1, v[2:4], s[6:7] nv
-// CHECK: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00]
-
global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x06,0x00]
@@ -2346,18 +1872,6 @@ global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc
global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x06,0x00]
-global_store_dwordx4 v1, v[2:5], s[6:7] nv
-// CHECK: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00]
-
-global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00]
-
global_load_ubyte_d16 v5, v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x04,0x05]
@@ -2403,18 +1917,6 @@ global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc
global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x04,0x05]
-global_load_ubyte_d16 v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05]
-
global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x04,0x05]
@@ -2460,18 +1962,6 @@ global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc
global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x04,0x05]
-global_load_ubyte_d16_hi v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05]
-
global_load_sbyte_d16 v5, v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x04,0x05]
@@ -2517,18 +2007,6 @@ global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc
global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x04,0x05]
-global_load_sbyte_d16 v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05]
-
global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x04,0x05]
@@ -2574,18 +2052,6 @@ global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc
global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x04,0x05]
-global_load_sbyte_d16_hi v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05]
-
global_load_short_d16 v5, v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x04,0x05]
@@ -2631,18 +2097,6 @@ global_load_short_d16 v5, v1, s[4:5] offset:-1 glc
global_load_short_d16 v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x04,0x05]
-global_load_short_d16 v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_short_d16 v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05]
-
global_load_short_d16_hi v5, v1, s[4:5] offset:-1
// CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x04,0x05]
@@ -2688,18 +2142,6 @@ global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc
global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc
// CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x04,0x05]
-global_load_short_d16_hi v5, v1, s[4:5] nv
-// CHECK: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv
-// CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05]
-
-global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05]
-
global_atomic_swap v1, v2, s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x06,0x00]
@@ -2745,18 +2187,6 @@ global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc
global_atomic_swap v1, v2, s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x06,0x00]
-global_atomic_swap v1, v2, s[6:7] nv
-// CHECK: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00]
-
-global_atomic_swap v1, v2, s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00]
-
-global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00]
-
-global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00]
-
global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x06,0x00]
@@ -2802,18 +2232,6 @@ global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc
global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x06,0x00]
-global_atomic_cmpswap v1, v[2:3], s[6:7] nv
-// CHECK: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00]
-
-global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00]
-
-global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00]
-
-global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00]
-
global_atomic_add v1, v2, s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x06,0x00]
@@ -2859,18 +2277,6 @@ global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc
global_atomic_add v1, v2, s[6:7] offset:-1 slc
// CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x06,0x00]
-global_atomic_add v1, v2, s[6:7] nv
-// CHECK: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00]
-
-global_atomic_add v1, v2, s[6:7] offset:-1 nv
-// CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00]
-
-global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv
-// CHECK: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00]
-
-global_atomic_add v1, v2, s[6:7] offset:-1 slc nv
-// CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00]
-
global_atomic_sub v1, v2, s[6:7] offset:-1
// CHECK: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x06,0x00]
@@ -3951,18 +3357,6 @@ scratch_load_ubyte v5, off, s2 offset:-1 glc
scratch_load_ubyte v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_ubyte v5, off, s2 nv
-// CHECK: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ubyte v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ubyte v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ubyte v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_sbyte v5, off, s2 offset:-1
// CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05]
@@ -4008,18 +3402,6 @@ scratch_load_sbyte v5, off, s2 offset:-1 glc
scratch_load_sbyte v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_sbyte v5, off, s2 nv
-// CHECK: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sbyte v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sbyte v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sbyte v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_ushort v5, off, s2 offset:-1
// CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05]
@@ -4065,18 +3447,6 @@ scratch_load_ushort v5, off, s2 offset:-1 glc
scratch_load_ushort v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_ushort v5, off, s2 nv
-// CHECK: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ushort v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ushort v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ushort v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_sshort v5, off, s2 offset:-1
// CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05]
@@ -4122,18 +3492,6 @@ scratch_load_sshort v5, off, s2 offset:-1 glc
scratch_load_sshort v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_sshort v5, off, s2 nv
-// CHECK: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sshort v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sshort v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sshort v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_dword v5, off, s2 offset:-1
// CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05]
@@ -4179,18 +3537,6 @@ scratch_load_dword v5, off, s2 offset:-1 glc
scratch_load_dword v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_dword v5, off, s2 nv
-// CHECK: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dword v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dword v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dword v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_dwordx2 v[5:6], off, s2 offset:-1
// CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05]
@@ -4236,18 +3582,6 @@ scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc
scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_dwordx2 v[5:6], off, s2 nv
-// CHECK: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_dwordx3 v[5:7], off, s2 offset:-1
// CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05]
@@ -4293,18 +3627,6 @@ scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc
scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_dwordx3 v[5:7], off, s2 nv
-// CHECK: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_dwordx4 v[5:8], off, s2 offset:-1
// CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05]
@@ -4350,18 +3672,6 @@ scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc
scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_dwordx4 v[5:8], off, s2 nv
-// CHECK: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05]
-
scratch_store_byte off, v2, s3 offset:-1
// CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00]
@@ -4407,18 +3717,6 @@ scratch_store_byte off, v2, s3 offset:-1 glc
scratch_store_byte off, v2, s3 offset:-1 slc
// CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00]
-scratch_store_byte off, v2, s3 nv
-// CHECK: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_byte off, v2, s3 offset:-1 nv
-// CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_byte off, v2, s3 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_byte off, v2, s3 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00]
-
scratch_store_byte_d16_hi off, v2, s3 offset:-1
// CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00]
@@ -4464,18 +3762,6 @@ scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc
scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc
// CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00]
-scratch_store_byte_d16_hi off, v2, s3 nv
-// CHECK: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv
-// CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00]
-
scratch_store_short off, v2, s3 offset:-1
// CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00]
@@ -4521,18 +3807,6 @@ scratch_store_short off, v2, s3 offset:-1 glc
scratch_store_short off, v2, s3 offset:-1 slc
// CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00]
-scratch_store_short off, v2, s3 nv
-// CHECK: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_short off, v2, s3 offset:-1 nv
-// CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_short off, v2, s3 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_short off, v2, s3 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00]
-
scratch_store_short_d16_hi off, v2, s3 offset:-1
// CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00]
@@ -4578,18 +3852,6 @@ scratch_store_short_d16_hi off, v2, s3 offset:-1 glc
scratch_store_short_d16_hi off, v2, s3 offset:-1 slc
// CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00]
-scratch_store_short_d16_hi off, v2, s3 nv
-// CHECK: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_short_d16_hi off, v2, s3 offset:-1 nv
-// CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00]
-
scratch_store_dword off, v2, s3 offset:-1
// CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00]
@@ -4635,18 +3897,6 @@ scratch_store_dword off, v2, s3 offset:-1 glc
scratch_store_dword off, v2, s3 offset:-1 slc
// CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00]
-scratch_store_dword off, v2, s3 nv
-// CHECK: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dword off, v2, s3 offset:-1 nv
-// CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dword off, v2, s3 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dword off, v2, s3 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00]
-
scratch_store_dwordx2 off, v[2:3], s3 offset:-1
// CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00]
@@ -4692,18 +3942,6 @@ scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc
scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc
// CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00]
-scratch_store_dwordx2 off, v[2:3], s3 nv
-// CHECK: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv
-// CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00]
-
scratch_store_dwordx3 off, v[2:4], s3 offset:-1
// CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00]
@@ -4749,18 +3987,6 @@ scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc
scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc
// CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00]
-scratch_store_dwordx3 off, v[2:4], s3 nv
-// CHECK: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv
-// CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00]
-
scratch_store_dwordx4 off, v[2:5], s3 offset:-1
// CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00]
@@ -4806,18 +4032,6 @@ scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc
scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc
// CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00]
-scratch_store_dwordx4 off, v[2:5], s3 nv
-// CHECK: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv
-// CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00]
-
-scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00]
-
scratch_load_ubyte_d16 v5, off, s2 offset:-1
// CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05]
@@ -4863,18 +4077,6 @@ scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc
scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_ubyte_d16 v5, off, s2 nv
-// CHECK: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_ubyte_d16_hi v5, off, s2 offset:-1
// CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05]
@@ -4920,18 +4122,6 @@ scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc
scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_ubyte_d16_hi v5, off, s2 nv
-// CHECK: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_sbyte_d16 v5, off, s2 offset:-1
// CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05]
@@ -4977,18 +4167,6 @@ scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc
scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_sbyte_d16 v5, off, s2 nv
-// CHECK: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_sbyte_d16_hi v5, off, s2 offset:-1
// CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05]
@@ -5034,18 +4212,6 @@ scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc
scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_sbyte_d16_hi v5, off, s2 nv
-// CHECK: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_short_d16 v5, off, s2 offset:-1
// CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05]
@@ -5088,18 +4254,6 @@ scratch_load_short_d16 v5, off, s2 offset:-4096
scratch_load_short_d16 v5, off, s2 offset:-1 glc
// CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_short_d16 v5, off, s2 nv
-// CHECK: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_short_d16 v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_short_d16 v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_short_d16 v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05]
-
scratch_load_short_d16 v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05]
@@ -5148,18 +4302,6 @@ scratch_load_short_d16_hi v5, off, s2 offset:-1 glc
scratch_load_short_d16_hi v5, off, s2 offset:-1 slc
// CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05]
-scratch_load_short_d16_hi v5, off, s2 nv
-// CHECK: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_short_d16_hi v5, off, s2 offset:-1 nv
-// CHECK: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv
-// CHECK: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05]
-
-scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv
-// CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05]
-
global_load_dword v[2:3], off lds
// CHECK: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt
index 4c06585a4c2e..0ee659e207c9 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt
@@ -21,18 +21,6 @@
# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_ubyte v5, v[1:2] nv ; encoding: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ubyte v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05
@@ -54,18 +42,6 @@
# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_sbyte v5, v[1:2] nv ; encoding: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sbyte v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_ushort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05
@@ -87,18 +63,6 @@
# CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_ushort v5, v[1:2] nv ; encoding: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ushort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ushort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_sshort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05
@@ -120,18 +84,6 @@
# CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_sshort v5, v[1:2] nv ; encoding: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sshort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sshort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_dword v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05
@@ -153,18 +105,6 @@
# CHECK: flat_load_dword v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_dword v5, v[1:2] nv ; encoding: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dword v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dword v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dword v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05
@@ -186,18 +126,6 @@
# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_dwordx2 v[5:6], v[1:2] nv ; encoding: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05
@@ -219,18 +147,6 @@
# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_dwordx3 v[5:7], v[1:2] nv ; encoding: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05
@@ -252,18 +168,6 @@
# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_dwordx4 v[5:8], v[1:2] nv ; encoding: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_store_byte v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00
@@ -285,18 +189,6 @@
# CHECK: flat_store_byte v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00
-# CHECK: flat_store_byte v[1:2], v2 nv ; encoding: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00]
-0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_byte v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00]
-0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_byte v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_byte v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00
-
# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00
@@ -318,18 +210,6 @@
# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00
-# CHECK: flat_store_byte_d16_hi v[1:2], v2 nv ; encoding: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00]
-0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00]
-0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00
-
# CHECK: flat_store_short v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00
@@ -351,18 +231,6 @@
# CHECK: flat_store_short v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00
-# CHECK: flat_store_short v[1:2], v2 nv ; encoding: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00]
-0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_short v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00]
-0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_short v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_short v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00
-
# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00
@@ -384,18 +252,6 @@
# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00
-# CHECK: flat_store_short_d16_hi v[1:2], v2 nv ; encoding: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00]
-0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00]
-0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00
-
# CHECK: flat_store_dword v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00
@@ -417,18 +273,6 @@
# CHECK: flat_store_dword v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00
-# CHECK: flat_store_dword v[1:2], v2 nv ; encoding: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00]
-0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dword v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00]
-0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dword v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dword v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00
-
# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00
@@ -450,18 +294,6 @@
# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00
-# CHECK: flat_store_dwordx2 v[1:2], v[2:3] nv ; encoding: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00]
-0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00]
-0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00
-
# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00
@@ -483,18 +315,6 @@
# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00
-# CHECK: flat_store_dwordx3 v[1:2], v[2:4] nv ; encoding: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00]
-0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv ; encoding: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00]
-0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv ; encoding: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00
-
# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00
@@ -516,18 +336,6 @@
# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00]
0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00
-# CHECK: flat_store_dwordx4 v[1:2], v[2:5] nv ; encoding: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00]
-0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv ; encoding: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00]
-0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv ; encoding: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00
-
-# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00
-
# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05
@@ -549,18 +357,6 @@
# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_ubyte_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05
@@ -582,18 +378,6 @@
# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05
@@ -615,18 +399,6 @@
# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_sbyte_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05
@@ -648,18 +420,6 @@
# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05
@@ -681,18 +441,6 @@
# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_short_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_short_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05
@@ -714,18 +462,6 @@
# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05]
0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05
-# CHECK: flat_load_short_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05]
-0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05]
-0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05
-
-# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05]
-0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05
-
# CHECK: flat_atomic_swap v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00]
0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00
@@ -747,18 +483,6 @@
# CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00]
0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00
-# CHECK: flat_atomic_swap v[1:2], v2 nv ; encoding: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00]
-0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00
-
-# CHECK: flat_atomic_swap v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00]
-0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00
-
-# CHECK: flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00
-
-# CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00
-
# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00]
0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00
@@ -780,18 +504,6 @@
# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00]
0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00
-# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] nv ; encoding: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00]
-0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00
-
-# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00]
-0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00
-
-# CHECK: flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00
-
-# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00
-
# CHECK: flat_atomic_add v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00]
0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00
@@ -813,18 +525,6 @@
# CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00]
0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00
-# CHECK: flat_atomic_add v[1:2], v2 nv ; encoding: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00]
-0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00
-
-# CHECK: flat_atomic_add v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00]
-0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00
-
-# CHECK: flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00
-
-# CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00]
-0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00
-
# CHECK: flat_atomic_sub v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00]
0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00
@@ -1317,18 +1017,6 @@
# CHECK: global_load_ubyte v5, v[1:2], off ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_ubyte v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_sbyte v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05
@@ -1338,18 +1026,6 @@
# CHECK: global_load_sbyte v5, v[1:2], off ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_sbyte v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_ushort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05
@@ -1359,18 +1035,6 @@
# CHECK: global_load_ushort v5, v[1:2], off ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_ushort v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_sshort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05
@@ -1380,18 +1044,6 @@
# CHECK: global_load_sshort v5, v[1:2], off ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_sshort v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_dword v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05
@@ -1401,18 +1053,6 @@
# CHECK: global_load_dword v5, v[1:2], off ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_dword v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_dwordx2 v[5:6], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05
@@ -1422,18 +1062,6 @@
# CHECK: global_load_dwordx2 v[5:6], v[1:2], off ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] nv ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_dwordx3 v[5:7], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05
@@ -1443,18 +1071,6 @@
# CHECK: global_load_dwordx3 v[5:7], v[1:2], off ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] nv ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_dwordx4 v[5:8], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05
@@ -1464,18 +1080,6 @@
# CHECK: global_load_dwordx4 v[5:8], v[1:2], off ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] nv ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_store_byte v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00
@@ -1485,18 +1089,6 @@
# CHECK: global_store_byte v[1:2], v2, off ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00]
0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00
-# CHECK: global_store_byte v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00]
-0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00
-
# CHECK: global_store_byte_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00
@@ -1506,18 +1098,6 @@
# CHECK: global_store_byte_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00]
0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00
-# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00]
-0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00
-
# CHECK: global_store_short v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00
@@ -1527,18 +1107,6 @@
# CHECK: global_store_short v[1:2], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00]
0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00
-# CHECK: global_store_short v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00]
-0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_short v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_short v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_short v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00
-
# CHECK: global_store_short_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00
@@ -1548,18 +1116,6 @@
# CHECK: global_store_short_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00]
0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00
-# CHECK: global_store_short_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00]
-0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00
-
# CHECK: global_store_dword v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00
@@ -1569,18 +1125,6 @@
# CHECK: global_store_dword v[1:2], v2, off ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00]
0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00
-# CHECK: global_store_dword v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00]
-0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00
-
# CHECK: global_store_dwordx2 v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00
@@ -1590,18 +1134,6 @@
# CHECK: global_store_dwordx2 v[1:2], v[2:3], off ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00]
0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00
-# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00]
-0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00
-
# CHECK: global_store_dwordx3 v[1:2], v[2:4], off offset:-1 ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00
@@ -1611,18 +1143,6 @@
# CHECK: global_store_dwordx3 v[1:2], v[2:4], off ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00]
0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00
-# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] nv ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00]
-0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00
-
# CHECK: global_store_dwordx4 v[1:2], v[2:5], off offset:-1 ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00
@@ -1632,18 +1152,6 @@
# CHECK: global_store_dwordx4 v[1:2], v[2:5], off ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00]
0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00
-# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] nv ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00]
-0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00
-
-# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00
-
# CHECK: global_load_ubyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05
@@ -1653,18 +1161,6 @@
# CHECK: global_load_ubyte_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_ubyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05
@@ -1674,18 +1170,6 @@
# CHECK: global_load_ubyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_sbyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05
@@ -1695,18 +1179,6 @@
# CHECK: global_load_sbyte_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_sbyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05
@@ -1716,18 +1188,6 @@
# CHECK: global_load_sbyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_short_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05
@@ -1737,18 +1197,6 @@
# CHECK: global_load_short_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_short_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_load_short_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05]
0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05
@@ -1758,18 +1206,6 @@
# CHECK: global_load_short_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05]
0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05
-# CHECK: global_load_short_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05]
-0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05
-
-# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05]
-0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05
-
# CHECK: global_atomic_swap v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00
@@ -1779,18 +1215,6 @@
# CHECK: global_atomic_swap v[1:2], v2, off ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00]
0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00
-# CHECK: global_atomic_swap v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00]
-0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00
-
-# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00
-
-# CHECK: global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00
-
-# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00
-
# CHECK: global_atomic_cmpswap v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00
@@ -1812,18 +1236,6 @@
# CHECK: global_atomic_cmpswap v1, v[2:3], v[4:5], off glc ; encoding: [0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01]
0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01
-# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00]
-0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00
-
-# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00
-
-# CHECK: global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00
-
-# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00
-
# CHECK: global_atomic_add v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00
@@ -1833,18 +1245,6 @@
# CHECK: global_atomic_add v[1:2], v2, off ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00]
0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00
-# CHECK: global_atomic_add v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00]
-0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00
-
-# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00
-
-# CHECK: global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00
-
-# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00]
-0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00
-
# CHECK: global_atomic_sub v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00]
0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00
@@ -2103,18 +1503,6 @@
# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_ubyte v5, off, s2 nv ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05
@@ -2154,18 +1542,6 @@
# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_sbyte v5, off, s2 nv ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_ushort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05
@@ -2205,18 +1581,6 @@
# CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_ushort v5, off, s2 nv ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ushort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ushort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_sshort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05
@@ -2256,18 +1620,6 @@
# CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_sshort v5, off, s2 nv ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sshort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sshort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_dword v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05
@@ -2307,18 +1659,6 @@
# CHECK: scratch_load_dword v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_dword v5, off, s2 nv ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dword v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dword v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dword v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05
@@ -2358,18 +1698,6 @@
# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_dwordx2 v[5:6], off, s2 nv ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05
@@ -2409,18 +1737,6 @@
# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_dwordx3 v[5:7], off, s2 nv ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05
@@ -2460,18 +1776,6 @@
# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_dwordx4 v[5:8], off, s2 nv ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_store_byte off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00
@@ -2511,18 +1815,6 @@
# CHECK: scratch_store_byte off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00
-# CHECK: scratch_store_byte off, v2, s3 nv ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00]
-0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_byte off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_byte off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_byte off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00
-
# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00
@@ -2562,18 +1854,6 @@
# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00
-# CHECK: scratch_store_byte_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00]
-0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00
-
# CHECK: scratch_store_short off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00
@@ -2613,18 +1893,6 @@
# CHECK: scratch_store_short off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00
-# CHECK: scratch_store_short off, v2, s3 nv ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00]
-0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_short off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_short off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_short off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00
-
# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00
@@ -2664,18 +1932,6 @@
# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00
-# CHECK: scratch_store_short_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00]
-0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00
-
# CHECK: scratch_store_dword off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00
@@ -2715,18 +1971,6 @@
# CHECK: scratch_store_dword off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00
-# CHECK: scratch_store_dword off, v2, s3 nv ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00]
-0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dword off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dword off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dword off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00
-
# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00
@@ -2766,18 +2010,6 @@
# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00
-# CHECK: scratch_store_dwordx2 off, v[2:3], s3 nv ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00]
-0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00
-
# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00
@@ -2817,18 +2049,6 @@
# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00
-# CHECK: scratch_store_dwordx3 off, v[2:4], s3 nv ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00]
-0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00
-
# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00
@@ -2868,18 +2088,6 @@
# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00]
0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00
-# CHECK: scratch_store_dwordx4 off, v[2:5], s3 nv ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00]
-0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00
-
-# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00]
-0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00
-
# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05
@@ -2919,18 +2127,6 @@
# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_ubyte_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05
@@ -2970,18 +2166,6 @@
# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05
@@ -3021,18 +2205,6 @@
# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_sbyte_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05
@@ -3072,18 +2244,6 @@
# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05
@@ -3123,18 +2283,6 @@
# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_short_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05
@@ -3174,18 +2322,6 @@
# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05]
0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05
-# CHECK: scratch_load_short_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05]
-0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05
-
-# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05]
-0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05
-
# CHECK: global_load_dword v[2:3], off lds ; encoding: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00]
0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll
new file mode 100644
index 000000000000..fe7f43f7f4b0
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll
@@ -0,0 +1,187 @@
+; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @wombat(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, i8 %arg6) #0 {
+; CHECK-LABEL: define void @wombat(
+; CHECK-SAME: i32 [[ARG:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]], ptr [[ARG3:%.*]], ptr [[ARG4:%.*]], ptr [[ARG5:%.*]], i8 [[ARG6:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[ARG]], 0
+; CHECK-NEXT: br i1 [[ICMP]], label %[[BB7:.*]], label %[[BB25:.*]]
+; CHECK: [[BB7]]:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[ARG]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG1]], i64 [[ZEXT]]
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[ZEXT]]
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[ARG5]], i64 [[ZEXT]]
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[ARG3]], i64 [[ZEXT]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[ARG4]], i64 [[ZEXT]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND05:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP2]]
+; CHECK-NEXT: [[BOUND16:%.*]] = icmp ult ptr [[ARG5]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT7:%.*]] = and i1 [[BOUND05]], [[BOUND16]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT7]]
+; CHECK-NEXT: [[BOUND08:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP3]]
+; CHECK-NEXT: [[BOUND19:%.*]] = icmp ult ptr [[ARG3]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]]
+; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT10]]
+; CHECK-NEXT: [[BOUND012:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP4]]
+; CHECK-NEXT: [[BOUND113:%.*]] = icmp ult ptr [[ARG4]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]]
+; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX11]], [[FOUND_CONFLICT14]]
+; CHECK-NEXT: [[BOUND016:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP2]]
+; CHECK-NEXT: [[BOUND117:%.*]] = icmp ult ptr [[ARG5]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]]
+; CHECK-NEXT: [[CONFLICT_RDX19:%.*]] = or i1 [[CONFLICT_RDX15]], [[FOUND_CONFLICT18]]
+; CHECK-NEXT: [[BOUND020:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP3]]
+; CHECK-NEXT: [[BOUND121:%.*]] = icmp ult ptr [[ARG3]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT22:%.*]] = and i1 [[BOUND020]], [[BOUND121]]
+; CHECK-NEXT: [[CONFLICT_RDX23:%.*]] = or i1 [[CONFLICT_RDX19]], [[FOUND_CONFLICT22]]
+; CHECK-NEXT: [[BOUND024:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP4]]
+; CHECK-NEXT: [[BOUND125:%.*]] = icmp ult ptr [[ARG4]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT26:%.*]] = and i1 [[BOUND024]], [[BOUND125]]
+; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX23]], [[FOUND_CONFLICT26]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX27]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[ARG6]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG5]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP4]], align 1, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp uge <vscale x 16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[ARG1]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP6]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META3:![0-9]+]], !noalias [[META5:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[ARG3]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_MASKED_LOAD28:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP7]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META9:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[ARG4]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP8]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META10:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD29]], [[WIDE_MASKED_LOAD28]]
+; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 16 x i8> [[TMP9]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP10]], ptr align 1 [[TMP6]], <vscale x 16 x i1> [[TMP5]]), !alias.scope [[META3]], !noalias [[META5]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP11]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META11:![0-9]+]], !noalias [[META12:![0-9]+]]
+; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD28]], [[WIDE_MASKED_LOAD28]]
+; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD30]], [[TMP12]]
+; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP13]], ptr align 1 [[TMP11]], <vscale x 16 x i1> [[TMP5]]), !alias.scope [[META11]], !noalias [[META12]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[BB24:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[BB7]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label %[[BB8:.*]]
+; CHECK: [[BB8]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD22:%.*]], %[[BB21:.*]] ]
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG5]], i64 [[PHI]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GETELEMENTPTR]], align 1
+; CHECK-NEXT: [[ICMP9:%.*]] = icmp ult i8 [[LOAD]], [[ARG6]]
+; CHECK-NEXT: br i1 [[ICMP9]], label %[[BB21]], label %[[BB10:.*]]
+; CHECK: [[BB10]]:
+; CHECK-NEXT: [[GETELEMENTPTR11:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG1]], i64 [[PHI]]
+; CHECK-NEXT: [[LOAD12:%.*]] = load i8, ptr [[GETELEMENTPTR11]], align 1
+; CHECK-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG3]], i64 [[PHI]]
+; CHECK-NEXT: [[LOAD14:%.*]] = load i8, ptr [[GETELEMENTPTR13]], align 1
+; CHECK-NEXT: [[GETELEMENTPTR15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG4]], i64 [[PHI]]
+; CHECK-NEXT: [[LOAD16:%.*]] = load i8, ptr [[GETELEMENTPTR15]], align 1
+; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[LOAD16]], [[LOAD14]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[LOAD12]]
+; CHECK-NEXT: store i8 [[ADD]], ptr [[GETELEMENTPTR11]], align 1
+; CHECK-NEXT: [[GETELEMENTPTR17:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG2]], i64 [[PHI]]
+; CHECK-NEXT: [[LOAD18:%.*]] = load i8, ptr [[GETELEMENTPTR17]], align 1
+; CHECK-NEXT: [[MUL19:%.*]] = mul i8 [[LOAD14]], [[LOAD14]]
+; CHECK-NEXT: [[ADD20:%.*]] = add i8 [[LOAD18]], [[MUL19]]
+; CHECK-NEXT: store i8 [[ADD20]], ptr [[GETELEMENTPTR17]], align 1
+; CHECK-NEXT: br label %[[BB21]]
+; CHECK: [[BB21]]:
+; CHECK-NEXT: [[ADD22]] = add nuw nsw i64 [[PHI]], 1
+; CHECK-NEXT: [[ICMP23:%.*]] = icmp eq i64 [[ADD22]], [[ZEXT]]
+; CHECK-NEXT: br i1 [[ICMP23]], label %[[BB24]], label %[[BB8]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK: [[BB24]]:
+; CHECK-NEXT: br label %[[BB25]]
+; CHECK: [[BB25]]:
+; CHECK-NEXT: ret void
+;
+bb:
+ %icmp = icmp sgt i32 %arg, 0
+ br i1 %icmp, label %bb7, label %bb25
+
+bb7: ; preds = %bb
+ %zext = zext nneg i32 %arg to i64
+ br label %bb8
+
+bb8: ; preds = %bb21, %bb7
+ %phi = phi i64 [ 0, %bb7 ], [ %add22, %bb21 ]
+ %getelementptr = getelementptr inbounds nuw i8, ptr %arg5, i64 %phi
+ %load = load i8, ptr %getelementptr, align 1
+ %icmp9 = icmp ult i8 %load, %arg6
+ br i1 %icmp9, label %bb21, label %bb10
+
+bb10: ; preds = %bb8
+ %getelementptr11 = getelementptr inbounds nuw i8, ptr %arg1, i64 %phi
+ %load12 = load i8, ptr %getelementptr11, align 1
+ %getelementptr13 = getelementptr inbounds nuw i8, ptr %arg3, i64 %phi
+ %load14 = load i8, ptr %getelementptr13, align 1
+ %getelementptr15 = getelementptr inbounds nuw i8, ptr %arg4, i64 %phi
+ %load16 = load i8, ptr %getelementptr15, align 1
+ %mul = mul i8 %load16, %load14
+ %add = add i8 %mul, %load12
+ store i8 %add, ptr %getelementptr11, align 1
+ %getelementptr17 = getelementptr inbounds nuw i8, ptr %arg2, i64 %phi
+ %load18 = load i8, ptr %getelementptr17, align 1
+ %mul19 = mul i8 %load14, %load14
+ %add20 = add i8 %load18, %mul19
+ store i8 %add20, ptr %getelementptr17, align 1
+ br label %bb21
+
+bb21: ; preds = %bb10, %bb8
+ %add22 = add nuw nsw i64 %phi, 1
+ %icmp23 = icmp eq i64 %add22, %zext
+ br i1 %icmp23, label %bb24, label %bb8, !llvm.loop !0
+
+bb24: ; preds = %bb21
+ br label %bb25
+
+bb25: ; preds = %bb24, %bb
+ ret void
+}
+
+attributes #0 = { uwtable vscale_range(1,16) "aarch64_pstate_sm_body" "target-features"="+fp-armv8,+neon,+sme,+v8a,-fmv" }
+
+!0 = distinct !{!0, !1, !2, !3, !4}
+!1 = !{!"llvm.loop.mustprogress"}
+!2 = !{!"llvm.loop.vectorize.width", i32 16}
+!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
+; CHECK: [[META2]] = distinct !{[[META2]], !"LVerDomain"}
+; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
+; CHECK: [[META4]] = distinct !{[[META4]], [[META2]]}
+; CHECK: [[META5]] = !{[[META6:![0-9]+]], [[META1]], [[META7:![0-9]+]], [[META8:![0-9]+]]}
+; CHECK: [[META6]] = distinct !{[[META6]], [[META2]]}
+; CHECK: [[META7]] = distinct !{[[META7]], [[META2]]}
+; CHECK: [[META8]] = distinct !{[[META8]], [[META2]]}
+; CHECK: [[META9]] = !{[[META7]]}
+; CHECK: [[META10]] = !{[[META8]]}
+; CHECK: [[META11]] = !{[[META6]]}
+; CHECK: [[META12]] = !{[[META1]], [[META7]], [[META8]]}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META14:![0-9]+]], [[META15:![0-9]+]], [[META16:![0-9]+]]}
+; CHECK: [[META14]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META15]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META16]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META14]], [[META15]]}
+;.
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index cd9512f6eae8..b1f20a73c3b2 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -1,8 +1,3 @@
-Analysis/LoopAccessAnalysis/memcheck-ni.ll
-Analysis/MemorySSA/pr116227.ll
-Analysis/MemorySSA/pr43641.ll
-Analysis/MemorySSA/pr46574.ll
-Analysis/MemorySSA/update-remove-dead-blocks.ll
Bitcode/fcmp-fast.ll
Bitcode/flags.ll
CodeGen/AArch64/cgdata-merge-local.ll
@@ -26,27 +21,12 @@ CodeGen/X86/nocfivalue.ll
DebugInfo/AArch64/ir-outliner.ll
DebugInfo/assignment-tracking/X86/hotcoldsplit.ll
DebugInfo/Generic/block-asan.ll
-DebugInfo/KeyInstructions/Generic/loop-unswitch.ll
DebugInfo/X86/asan_debug_info.ll
LTO/X86/diagnostic-handler-remarks-with-hotness.ll
Other/optimization-remarks-auto.ll
Other/X86/debugcounter-partiallyinlinelibcalls.ll
-Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll
-Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
-Transforms/AtomicExpand/AArch64/pcsections.ll
Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
-Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
-Transforms/AtomicExpand/ARM/atomicrmw-fp.ll
-Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll
-Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll
-Transforms/AtomicExpand/Mips/atomicrmw-fp.ll
-Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll
-Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
-Transforms/AtomicExpand/SPARC/libcalls.ll
Transforms/AtomicExpand/SPARC/partword.ll
-Transforms/AtomicExpand/X86/expand-atomic-rmw-fp.ll
-Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
-Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll
Transforms/Attributor/align.ll
Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll
@@ -265,14 +245,13 @@ Transforms/InstCombine/and2.ll
Transforms/InstCombine/and-fcmp.ll
Transforms/InstCombine/and.ll
Transforms/InstCombine/and-or-icmps.ll
-Transforms/InstCombine/and-or-implied-cond-not.ll
Transforms/InstCombine/apint-div1.ll
Transforms/InstCombine/apint-div2.ll
Transforms/InstCombine/ashr-demand.ll
Transforms/InstCombine/atomic.ll
Transforms/InstCombine/binop-cast.ll
-Transforms/InstCombine/binop-select.ll
Transforms/InstCombine/binop-select-cast-of-select-cond.ll
+Transforms/InstCombine/binop-select.ll
Transforms/InstCombine/bit-checks.ll
Transforms/InstCombine/bitreverse.ll
Transforms/InstCombine/branch.ll
@@ -298,7 +277,6 @@ Transforms/InstCombine/fold-ctpop-of-not.ll
Transforms/InstCombine/fold-ext-eq-c-with-op.ll
Transforms/InstCombine/free-inversion.ll
Transforms/InstCombine/icmp-and-lowbit-mask.ll
-Transforms/InstCombine/icmp-equality-test.ll
Transforms/InstCombine/icmp.ll
Transforms/InstCombine/icmp-mul-and.ll
Transforms/InstCombine/icmp-of-and-x.ll
@@ -307,7 +285,6 @@ Transforms/InstCombine/icmp-select-implies-common-op.ll
Transforms/InstCombine/icmp-select.ll
Transforms/InstCombine/icmp-with-selects.ll
Transforms/InstCombine/intrinsic-select.ll
-Transforms/InstCombine/known-never-nan.ll
Transforms/InstCombine/ldexp-ext.ll
Transforms/InstCombine/ldexp.ll
Transforms/InstCombine/load-bitcast-select.ll
@@ -347,13 +324,11 @@ Transforms/InstCombine/or.ll
Transforms/InstCombine/pow-1.ll
Transforms/InstCombine/pow-3.ll
Transforms/InstCombine/pow-sqrt.ll
-Transforms/InstCombine/pr24354.ll
Transforms/InstCombine/pull-conditional-binop-through-shift.ll
Transforms/InstCombine/rem.ll
Transforms/InstCombine/sdiv-canonicalize.ll
Transforms/InstCombine/sdiv-guard.ll
Transforms/InstCombine/select-and-or.ll
-Transforms/InstCombine/select-bitext.ll
Transforms/InstCombine/select-cmp-br.ll
Transforms/InstCombine/select-cmp.ll
Transforms/InstCombine/select-factorize.ll
@@ -362,7 +337,6 @@ Transforms/InstCombine/select.ll
Transforms/InstCombine/select-min-max.ll
Transforms/InstCombine/select-of-symmetric-selects.ll
Transforms/InstCombine/select-select.ll
-Transforms/InstCombine/select-with-extreme-eq-cond.ll
Transforms/InstCombine/shift.ll
Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll
Transforms/InstCombine/shuffle-select-narrow.ll
@@ -512,66 +486,12 @@ Transforms/LoopBoundSplit/bug51866.ll
Transforms/LoopBoundSplit/bug-loop-bound-split-phi-in-exit-block.ll
Transforms/LoopBoundSplit/loop-bound-split.ll
Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
-Transforms/LoopDistribute/basic-with-memchecks.ll
-Transforms/LoopDistribute/bounds-expansion-bug.ll
-Transforms/LoopDistribute/cross-partition-access.ll
-Transforms/LoopDistribute/debug-loc.ll
-Transforms/LoopDistribute/debug-print.ll
-Transforms/LoopDistribute/diagnostics.ll
-Transforms/LoopDistribute/followup.ll
-Transforms/LoopDistribute/laa-invalidation.ll
-Transforms/LoopDistribute/outside-use.ll
-Transforms/LoopDistribute/pointer-phi-in-loop.ll
-Transforms/LoopDistribute/scev-inserted-runtime-check.ll
-Transforms/LoopDistribute/symbolic-stride.ll
-Transforms/LoopFlatten/loop-flatten-version.ll
Transforms/LoopIdiom/AArch64/byte-compare-index.ll
Transforms/LoopIdiom/AArch64/find-first-byte.ll
Transforms/LoopIdiom/RISCV/byte-compare-index.ll
-Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll
-Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
-Transforms/LoopIdiom/X86/left-shift-until-zero.ll
-Transforms/LoopIdiom/X86/logical-right-shift-until-zero-debuginfo.ll
-Transforms/LoopIdiom/X86/logical-right-shift-until-zero.ll
-Transforms/LoopLoadElim/forward.ll
-Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll
-Transforms/LoopLoadElim/memcheck.ll
-Transforms/LoopLoadElim/pr47457.ll
-Transforms/LoopLoadElim/symbolic-stride.ll
-Transforms/LoopLoadElim/unknown-stride-known-dep.ll
-Transforms/LoopLoadElim/versioning-scev-invalidation.ll
-Transforms/LoopPredication/preserve-bpi.ll
-Transforms/LoopSimplifyCFG/constant-fold-branch.ll
-Transforms/LoopSimplifyCFG/handle_dead_exits.ll
-Transforms/LoopSimplifyCFG/invalidate-scev-dispositions-2.ll
-Transforms/LoopSimplifyCFG/invalidate-scev-dispositions.ll
-Transforms/LoopSimplifyCFG/lcssa.ll
-Transforms/LoopSimplifyCFG/live_block_marking.ll
-Transforms/LoopSimplifyCFG/mssa_update.ll
-Transforms/LoopSimplifyCFG/pr117537.ll
-Transforms/LoopSimplifyCFG/update_parents.ll
Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
Transforms/LoopUnroll/peel-last-iteration-with-guards.ll
Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
-Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
-Transforms/LoopVersioning/add-phi-update-users.ll
-Transforms/LoopVersioning/basic.ll
-Transforms/LoopVersioning/bound-check-partially-known.ll
-Transforms/LoopVersioning/crash-36998.ll
-Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll
-Transforms/LoopVersioning/incorrect-phi.ll
-Transforms/LoopVersioning/invalidate-laa-after-versioning.ll
-Transforms/LoopVersioning/lcssa.ll
-Transforms/LoopVersioningLICM/load-from-unknown-address.ll
-Transforms/LoopVersioningLICM/loopversioningLICM1.ll
-Transforms/LoopVersioningLICM/loopversioningLICM2.ll
-Transforms/LoopVersioningLICM/metadata.ll
-Transforms/LoopVersioning/loop-invariant-bound.ll
-Transforms/LoopVersioning/noalias.ll
-Transforms/LoopVersioning/noalias-version-twice.ll
-Transforms/LoopVersioning/single-iteration.ll
-Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll
-Transforms/LoopVersioning/wrapping-pointer-versioning.ll
Transforms/LowerAtomic/atomic-load.ll
Transforms/LowerAtomic/atomic-swap.ll
Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll
@@ -740,27 +660,6 @@ Transforms/Scalarizer/scatter-order.ll
Transforms/Scalarizer/variable-extractelement.ll
Transforms/Scalarizer/variable-insertelement.ll
Transforms/Scalarizer/vector-of-pointer-to-vector.ll
-Transforms/SimpleLoopUnswitch/debuginfo.ll
-Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll
-Transforms/SimpleLoopUnswitch/endless-unswitch.ll
-Transforms/SimpleLoopUnswitch/guards.ll
-Transforms/SimpleLoopUnswitch/inject-invariant-conditions-exponential.ll
-Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
-Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll
-Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll
-Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll
-Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll
-Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll
-Transforms/SimpleLoopUnswitch/partial-unswitch.ll
-Transforms/SimpleLoopUnswitch/partial-unswitch-loop-and-block-dispositions.ll
-Transforms/SimpleLoopUnswitch/partial-unswitch-mssa-threshold.ll
-Transforms/SimpleLoopUnswitch/partial-unswitch-update-memoryssa.ll
-Transforms/SimpleLoopUnswitch/pr138509.ll
-Transforms/SimpleLoopUnswitch/pr59546.ll
-Transforms/SimpleLoopUnswitch/pr60736.ll
-Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
-Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
-Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll
Transforms/StructurizeCFG/callbr.ll
Transforms/StructurizeCFG/hoist-zerocost.ll
diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md
index a920d57c7cd2..8d20b496cd3a 100644
--- a/mlir/docs/PassManagement.md
+++ b/mlir/docs/PassManagement.md
@@ -835,6 +835,12 @@ each pass, the generator produces a `registerPassName` where
generates a `registerGroupPasses`, where `Group` is the tag provided via the
`-name` input parameter, that registers all of the passes present.
+These declarations can be enabled for the whole group of passes by
+defining the `GEN_PASS_REGISTRATION` macro, or on a per-pass basis by
+defining `GEN_PASS_REGISTRATION_PASSNAME` where `PASSNAME` is the
+uppercase version of the name of the pass (similar to pass def and
+decls).
+
```c++
// Tablegen options: -gen-pass-decls -name="Example"
diff --git a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp
index 1e56810ff7aa..7420412f0936 100644
--- a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp
+++ b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp
@@ -328,7 +328,6 @@ static bool traverseRegionGraph(Region *begin,
<< nextRegion->getRegionNumber() << ", returning true";
return true;
}
- llvm::dbgs() << "Region: " << nextRegion << "\n";
if (!nextRegion->getParentOp()) {
llvm::errs() << "Region " << *nextRegion << " has no parent op\n";
return false;
diff --git a/mlir/tools/mlir-tblgen/PassGen.cpp b/mlir/tools/mlir-tblgen/PassGen.cpp
index f7134ce02b72..f4b8eb43b49b 100644
--- a/mlir/tools/mlir-tblgen/PassGen.cpp
+++ b/mlir/tools/mlir-tblgen/PassGen.cpp
@@ -57,19 +57,23 @@ const char *const passRegistrationCode = R"(
//===----------------------------------------------------------------------===//
// {0} Registration
//===----------------------------------------------------------------------===//
+#ifdef {1}
inline void register{0}() {{
::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {{
- return {1};
+ return {2};
});
}
// Old registration code, kept for temporary backwards compatibility.
inline void register{0}Pass() {{
::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {{
- return {1};
+ return {2};
});
}
+
+#undef {1}
+#endif // {1}
)";
/// The code snippet used to generate a function to register all passes in a
@@ -116,6 +120,10 @@ static std::string getPassDeclVarName(const Pass &pass) {
return "GEN_PASS_DECL_" + pass.getDef()->getName().upper();
}
+static std::string getPassRegistrationVarName(const Pass &pass) {
+ return "GEN_PASS_REGISTRATION_" + pass.getDef()->getName().upper();
+}
+
/// Emit the code to be included in the public header of the pass.
static void emitPassDecls(const Pass &pass, raw_ostream &os) {
StringRef passName = pass.getDef()->getName();
@@ -143,18 +151,25 @@ static void emitPassDecls(const Pass &pass, raw_ostream &os) {
/// PassRegistry.
static void emitRegistrations(llvm::ArrayRef<Pass> passes, raw_ostream &os) {
os << "#ifdef GEN_PASS_REGISTRATION\n";
+ os << "// Generate registrations for all passes.\n";
+ for (const Pass &pass : passes)
+ os << "#define " << getPassRegistrationVarName(pass) << "\n";
+ os << "#endif // GEN_PASS_REGISTRATION\n";
for (const Pass &pass : passes) {
+ std::string passName = pass.getDef()->getName().str();
+ std::string passEnableVarName = getPassRegistrationVarName(pass);
+
std::string constructorCall;
if (StringRef constructor = pass.getConstructor(); !constructor.empty())
constructorCall = constructor.str();
else
- constructorCall = formatv("create{0}()", pass.getDef()->getName()).str();
-
- os << formatv(passRegistrationCode, pass.getDef()->getName(),
+ constructorCall = formatv("create{0}()", passName).str();
+ os << formatv(passRegistrationCode, passName, passEnableVarName,
constructorCall);
}
+ os << "#ifdef GEN_PASS_REGISTRATION\n";
os << formatv(passGroupRegistrationCode, groupName);
for (const Pass &pass : passes)