summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Instrumentation
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation')
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp48
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfUse.cpp120
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp190
4 files changed, 270 insertions, 92 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 50258af5e26c..42c3d4a4f4c4 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1219,7 +1219,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
std::optional<TypeSize> Size = AI->getAllocationSize(AI->getDataLayout());
// Check that size is known and can be stored in IntptrTy.
- if (!Size || !ConstantInt::isValueValidForType(IntptrTy, *Size))
+ // TODO: Add support for scalable vectors if possible.
+ if (!Size || Size->isScalable() ||
+ !ConstantInt::isValueValidForType(IntptrTy, *Size))
return;
bool DoPoison = (ID == Intrinsic::lifetime_end);
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 66cdbfcf998c..832592e7663b 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -212,6 +212,15 @@ static cl::opt<float>
"OR because of the hot percentile cutoff, if "
"both are supplied."));
+static cl::opt<bool> ClStaticLinking(
+ "hwasan-static-linking",
+ cl::desc("Don't use .note.hwasan.globals section to instrument globals "
+ "from loadable libraries. "
+ "Note: in static binaries, the global variables section can be "
+ "accessed directly via linker-provided "
+ "__start_hwasan_globals and __stop_hwasan_globals symbols"),
+ cl::Hidden, cl::init(false));
+
STATISTIC(NumTotalFuncs, "Number of total funcs");
STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs");
STATISTIC(NumNoProfileSummaryFuncs, "Number of funcs without PS");
@@ -335,6 +344,7 @@ private:
FunctionAnalysisManager &FAM) const;
void initializeModule();
void createHwasanCtorComdat();
+ void createHwasanNote();
void initializeCallbacks(Module &M);
@@ -533,20 +543,7 @@ void HWAddressSanitizerPass::printPipeline(
OS << '>';
}
-void HWAddressSanitizer::createHwasanCtorComdat() {
- std::tie(HwasanCtorFunction, std::ignore) =
- getOrCreateSanitizerCtorAndInitFunctions(
- M, kHwasanModuleCtorName, kHwasanInitName,
- /*InitArgTypes=*/{},
- /*InitArgs=*/{},
- // This callback is invoked when the functions are created the first
- // time. Hook them into the global ctors list in that case:
- [&](Function *Ctor, FunctionCallee) {
- Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
- Ctor->setComdat(CtorComdat);
- appendToGlobalCtors(M, Ctor, 0, Ctor);
- });
-
+void HWAddressSanitizer::createHwasanNote() {
// Create a note that contains pointers to the list of global
// descriptors. Adding a note to the output file will cause the linker to
// create a PT_NOTE program header pointing to the note that we can use to
@@ -630,6 +627,29 @@ void HWAddressSanitizer::createHwasanCtorComdat() {
appendToCompilerUsed(M, Dummy);
}
+void HWAddressSanitizer::createHwasanCtorComdat() {
+ std::tie(HwasanCtorFunction, std::ignore) =
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kHwasanModuleCtorName, kHwasanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) {
+ Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
+ Ctor->setComdat(CtorComdat);
+ appendToGlobalCtors(M, Ctor, 0, Ctor);
+ });
+
+ // Do not create .note.hwasan.globals for static binaries, as it is only
+ // needed for instrumenting globals from dynamic libraries. In static
+ // binaries, the global variables section can be accessed directly via the
+ // __start_hwasan_globals and __stop_hwasan_globals symbols inserted by the
+ // linker.
+ if (!ClStaticLinking)
+ createHwasanNote();
+}
+
/// Module-level initialization.
///
/// inserts a call to __hwasan_init to the module's constructor list.
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index a9a0731f16d9..ecb2f2dbc552 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/MemProfCommon.h"
@@ -75,6 +76,10 @@ static cl::opt<unsigned> MinMatchedColdBytePercent(
"memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
cl::desc("Min percent of cold bytes matched to hint allocation cold"));
+static cl::opt<bool> AnnotateStaticDataSectionPrefix(
+ "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
+ cl::desc("If true, annotate the static data section prefix"));
+
// Matching statistics
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
STATISTIC(NumOfMemProfMismatch,
@@ -90,6 +95,14 @@ STATISTIC(NumOfMemProfMatchedAllocs,
"Number of matched memory profile allocs.");
STATISTIC(NumOfMemProfMatchedCallSites,
"Number of matched memory profile callsites.");
+STATISTIC(NumOfMemProfHotGlobalVars,
+ "Number of global vars annotated with 'hot' section prefix.");
+STATISTIC(NumOfMemProfColdGlobalVars,
+ "Number of global vars annotated with 'unlikely' section prefix.");
+STATISTIC(NumOfMemProfUnknownGlobalVars,
+ "Number of global vars with unknown hotness (no section prefix).");
+STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
+ "Number of global vars with user-specified section (not annotated).");
static void addCallsiteMetadata(Instruction &I,
ArrayRef<uint64_t> InlinedCallStack,
@@ -674,11 +687,12 @@ MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
}
PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
- // Return immediately if the module doesn't contain any function.
- if (M.empty())
+ // Return immediately if the module doesn't contain any function or global
+ // variables.
+ if (M.empty() && M.globals().empty())
return PreservedAnalyses::all();
- LLVM_DEBUG(dbgs() << "Read in memory profile:");
+ LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
auto &Ctx = M.getContext();
auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
if (Error E = ReaderOrErr.takeError()) {
@@ -703,6 +717,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::all();
}
+ const bool Changed =
+ annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
+
+ // If the module doesn't contain any function, return after we process all
+ // global variables.
+ if (M.empty())
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
@@ -752,3 +774,95 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::none();
}
+
+// Returns true iff the global variable has custom section either by
+// __attribute__((section("name")))
+// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate)
+// or #pragma clang section directives
+// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section).
+static bool hasExplicitSectionName(const GlobalVariable &GVar) {
+ if (GVar.hasSection())
+ return true;
+
+ auto Attrs = GVar.getAttributes();
+ if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") ||
+ Attrs.hasAttribute("relro-section") ||
+ Attrs.hasAttribute("rodata-section"))
+ return true;
+ return false;
+}
+
+bool MemProfUsePass::annotateGlobalVariables(
+ Module &M, const memprof::DataAccessProfData *DataAccessProf) {
+ if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
+ return false;
+
+ if (!DataAccessProf) {
+ M.getContext().diagnose(DiagnosticInfoPGOProfile(
+ MemoryProfileFileName.data(),
+ StringRef("Data access profiles not found in memprof. Ignore "
+ "-memprof-annotate-static-data-prefix."),
+ DS_Warning));
+ return false;
+ }
+
+ bool Changed = false;
+ // Iterate all global variables in the module and annotate them based on
+ // data access profiles. Note it's up to the linker to decide how to map input
+ // sections to output sections, and one conservative practice is to map
+ // unlikely-prefixed ones to unlikely output section, and map the rest
+ // (hot-prefixed or prefix-less) to the canonical output section.
+ for (GlobalVariable &GVar : M.globals()) {
+ assert(!GVar.getSectionPrefix().has_value() &&
+ "GVar shouldn't have section prefix yet");
+ if (GVar.isDeclarationForLinker())
+ continue;
+
+ if (hasExplicitSectionName(GVar)) {
+ ++NumOfMemProfExplicitSectionGlobalVars;
+ LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName()
+ << " has explicit section name. Skip annotating.\n");
+ continue;
+ }
+
+ StringRef Name = GVar.getName();
+ // Skip string literals as their mangled names don't stay stable across
+ // binary releases.
+ // TODO: Track string content hash in the profiles and compute it inside the
+ // compiler to categeorize the hotness string literals.
+ if (Name.starts_with(".str")) {
+
+ LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
+ continue;
+ }
+
+ // DataAccessProfRecord's get* methods will canonicalize the name under the
+ // hood before looking it up, so optimizer doesn't need to do it.
+ std::optional<DataAccessProfRecord> Record =
+ DataAccessProf->getProfileRecord(Name);
+ // Annotate a global variable as hot if it has non-zero sampled count, and
+ // annotate it as cold if it's seen in the profiled binary
+ // file but doesn't have any access sample.
+ // For logging, optimization remark emitter requires a llvm::Function, but
+ // it's not well defined how to associate a global variable with a function.
+ // So we just print out the static data section prefix in LLVM_DEBUG.
+ if (Record && Record->AccessCount > 0) {
+ ++NumOfMemProfHotGlobalVars;
+ GVar.setSectionPrefix("hot");
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "Global variable " << Name
+ << " is annotated as hot\n");
+ } else if (DataAccessProf->isKnownColdSymbol(Name)) {
+ ++NumOfMemProfColdGlobalVars;
+ GVar.setSectionPrefix("unlikely");
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "Global variable " << Name
+ << " is annotated as unlikely\n");
+ } else {
+ ++NumOfMemProfUnknownGlobalVars;
+ LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
+ }
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 27292d1a66c3..9899a2aae2b1 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3263,7 +3263,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return true;
}
- /// Heuristically instrument unknown intrinsics.
+ /// Returns whether it was able to heuristically instrument unknown
+ /// intrinsics.
///
/// The main purpose of this code is to do something reasonable with all
/// random intrinsics we might encounter, most importantly - SIMD intrinsics.
@@ -3273,7 +3274,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
///
/// We special-case intrinsics where this approach fails. See llvm.bswap
/// handling as an example of that.
- bool handleUnknownIntrinsicUnlogged(IntrinsicInst &I) {
+ bool maybeHandleUnknownIntrinsicUnlogged(IntrinsicInst &I) {
unsigned NumArgOperands = I.arg_size();
if (NumArgOperands == 0)
return false;
@@ -3300,8 +3301,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return false;
}
- bool handleUnknownIntrinsic(IntrinsicInst &I) {
- if (handleUnknownIntrinsicUnlogged(I)) {
+ bool maybeHandleUnknownIntrinsic(IntrinsicInst &I) {
+ if (maybeHandleUnknownIntrinsicUnlogged(I)) {
if (ClDumpHeuristicInstructions)
dumpInst(I);
@@ -3860,7 +3861,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
//
// Three operands:
// <4 x i32> @llvm.x86.avx512.vpdpbusd.128
- // (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b)
+ // (<4 x i32> %s, <16 x i8> %a, <16 x i8> %b)
// (this is equivalent to multiply-add on %a and %b, followed by
// adding/"accumulating" %s. "Accumulation" stores the result in one
// of the source registers, but this accumulate vs. add distinction
@@ -3902,15 +3903,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
ReturnType->getPrimitiveSizeInBits());
if (I.arg_size() == 3) {
- assert(ParamType == ReturnType);
- assert(ParamType == I.getArgOperand(0)->getType());
+ [[maybe_unused]] auto *AccumulatorType =
+ cast<FixedVectorType>(I.getOperand(0)->getType());
+ assert(AccumulatorType == ReturnType);
}
FixedVectorType *ImplicitReturnType = ReturnType;
// Step 1: instrument multiplication of corresponding vector elements
if (EltSizeInBits) {
- ImplicitReturnType = cast<FixedVectorType>(getMMXVectorTy(
- EltSizeInBits * 2, ParamType->getPrimitiveSizeInBits()));
+ ImplicitReturnType = cast<FixedVectorType>(
+ getMMXVectorTy(EltSizeInBits * ReductionFactor,
+ ParamType->getPrimitiveSizeInBits()));
ParamType = cast<FixedVectorType>(
getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits()));
@@ -3958,7 +3961,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Step 2: instrument horizontal add
// We don't need bit-precise horizontalReduce because we only want to check
- // if each pair of elements is fully zero.
+ // if each pair/quad of elements is fully zero.
// Cast to <4 x i32>.
Value *Horizontal = IRB.CreateBitCast(And, ImplicitReturnType);
@@ -3968,7 +3971,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Constant::getNullValue(Horizontal->getType())),
ImplicitReturnType);
- // Cast it back to the required fake return type (<1 x i64>).
+ // Cast it back to the required fake return type (if MMX: <1 x i64>; for
+ // AVX, it is already correct).
if (EltSizeInBits)
OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I));
@@ -5262,7 +5266,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleShadowOr(I);
}
- void visitIntrinsicInst(IntrinsicInst &I) {
+ bool maybeHandleCrossPlatformIntrinsic(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
@@ -5342,6 +5346,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorReduceWithStarterIntrinsic(I);
break;
+ case Intrinsic::scmp:
+ case Intrinsic::ucmp: {
+ handleShadowOr(I);
+ break;
+ }
+
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ handleFunnelShift(I);
+ break;
+
+ case Intrinsic::is_constant:
+ // The result of llvm.is.constant() is always defined.
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ break;
+
+ default:
+ return false;
+ }
+
+ return true;
+ }
+
+ bool maybeHandleX86SIMDIntrinsic(IntrinsicInst &I) {
+ switch (I.getIntrinsicID()) {
case Intrinsic::x86_sse_stmxcsr:
handleStmxcsr(I);
break;
@@ -5392,6 +5422,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
+ // Convert Packed Single Precision Floating-Point Values
+ // to Packed Signed Doubleword Integer Values
+ //
+ // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
+ // (<16 x float>, <16 x i32>, i16, i32)
+ case Intrinsic::x86_avx512_mask_cvtps2dq_512:
+ handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
+ break;
+
// Convert Packed Double Precision Floating-Point Values
// to Packed Single Precision Floating-Point Values
case Intrinsic::x86_sse2_cvtpd2ps:
@@ -5492,23 +5531,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_mmx_psrli_q:
case Intrinsic::x86_mmx_psrai_w:
case Intrinsic::x86_mmx_psrai_d:
- case Intrinsic::aarch64_neon_rshrn:
- case Intrinsic::aarch64_neon_sqrshl:
- case Intrinsic::aarch64_neon_sqrshrn:
- case Intrinsic::aarch64_neon_sqrshrun:
- case Intrinsic::aarch64_neon_sqshl:
- case Intrinsic::aarch64_neon_sqshlu:
- case Intrinsic::aarch64_neon_sqshrn:
- case Intrinsic::aarch64_neon_sqshrun:
- case Intrinsic::aarch64_neon_srshl:
- case Intrinsic::aarch64_neon_sshl:
- case Intrinsic::aarch64_neon_uqrshl:
- case Intrinsic::aarch64_neon_uqrshrn:
- case Intrinsic::aarch64_neon_uqshl:
- case Intrinsic::aarch64_neon_uqshrn:
- case Intrinsic::aarch64_neon_urshl:
- case Intrinsic::aarch64_neon_ushl:
- // Not handled here: aarch64_neon_vsli (vector shift left and insert)
handleVectorShiftIntrinsic(I, /* Variable */ false);
break;
case Intrinsic::x86_avx2_psllv_d:
@@ -5621,19 +5643,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
//
// Multiply and Add Packed Signed and Unsigned Bytes
// < 4 x i32> @llvm.x86.avx512.vpdpbusd.128
- // (< 4 x i32>, < 4 x i32>, < 4 x i32>)
+ // (< 4 x i32>, <16 x i8>, <16 x i8>)
// < 8 x i32> @llvm.x86.avx512.vpdpbusd.256
- // (< 8 x i32>, < 8 x i32>, < 8 x i32>)
+ // (< 8 x i32>, <32 x i8>, <32 x i8>)
// <16 x i32> @llvm.x86.avx512.vpdpbusd.512
- // (<16 x i32>, <16 x i32>, <16 x i32>)
+ // (<16 x i32>, <64 x i8>, <64 x i8>)
//
// Multiply and Add Unsigned and Signed Bytes With Saturation
// < 4 x i32> @llvm.x86.avx512.vpdpbusds.128
- // (< 4 x i32>, < 4 x i32>, < 4 x i32>)
+ // (< 4 x i32>, <16 x i8>, <16 x i8>)
// < 8 x i32> @llvm.x86.avx512.vpdpbusds.256
- // (< 8 x i32>, < 8 x i32>, < 8 x i32>)
+ // (< 8 x i32>, <32 x i8>, <32 x i8>)
// <16 x i32> @llvm.x86.avx512.vpdpbusds.512
- // (<16 x i32>, <16 x i32>, <16 x i32>)
+ // (<16 x i32>, <64 x i8>, <64 x i8>)
//
// < 4 x i32> @llvm.x86.avx2.vpdpbssd.128
// (< 4 x i32>, < 4 x i32>, < 4 x i32>)
@@ -5652,30 +5674,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
//
// These intrinsics are auto-upgraded into non-masked forms:
// <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128
- // (<4 x i32>, <4 x i32>, <4 x i32>, i8)
+ // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
// <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128
- // (<4 x i32>, <4 x i32>, <4 x i32>, i8)
+ // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
// <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256
- // (<8 x i32>, <8 x i32>, <8 x i32>, i8)
+ // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
// <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256
- // (<8 x i32>, <8 x i32>, <8 x i32>, i8)
+ // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
// <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512
- // (<16 x i32>, <16 x i32>, <16 x i32>, i16)
+ // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
// <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512
- // (<16 x i32>, <16 x i32>, <16 x i32>, i16)
+ // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
//
// <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128
- // (<4 x i32>, <4 x i32>, <4 x i32>, i8)
+ // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
// <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128
- // (<4 x i32>, <4 x i32>, <4 x i32>, i8)
+ // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
// <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256
- // (<8 x i32>, <8 x i32>, <8 x i32>, i8)
+ // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
// <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256
- // (<8 x i32>, <8 x i32>, <8 x i32>, i8)
+ // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
// <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512
- // (<16 x i32>, <16 x i32>, <16 x i32>, i16)
+ // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
// <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512
- // (<16 x i32>, <16 x i32>, <16 x i32>, i16)
+ // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
case Intrinsic::x86_avx512_vpdpbusd_128:
case Intrinsic::x86_avx512_vpdpbusd_256:
case Intrinsic::x86_avx512_vpdpbusd_512:
@@ -5930,7 +5952,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_avx512_max_pd_512: {
// These AVX512 variants contain the rounding mode as a trailing flag.
// Earlier variants do not have a trailing flag and are already handled
- // by maybeHandleSimpleNomemIntrinsic(I, 0) via handleUnknownIntrinsic.
+ // by maybeHandleSimpleNomemIntrinsic(I, 0) via
+ // maybeHandleUnknownIntrinsic.
[[maybe_unused]] bool Success =
maybeHandleSimpleNomemIntrinsic(I, /*trailingFlags=*/1);
assert(Success);
@@ -5988,15 +6011,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/*trailingVerbatimArgs=*/1);
break;
- // Convert Packed Single Precision Floating-Point Values
- // to Packed Signed Doubleword Integer Values
- //
- // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
- // (<16 x float>, <16 x i32>, i16, i32)
- case Intrinsic::x86_avx512_mask_cvtps2dq_512:
- handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
- break;
-
// AVX512 PMOV: Packed MOV, with truncation
// Precisely handled by applying the same intrinsic to the shadow
case Intrinsic::x86_avx512_mask_pmov_dw_512:
@@ -6074,15 +6088,33 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleAVXGF2P8Affine(I);
break;
- case Intrinsic::fshl:
- case Intrinsic::fshr:
- handleFunnelShift(I);
- break;
+ default:
+ return false;
+ }
- case Intrinsic::is_constant:
- // The result of llvm.is.constant() is always defined.
- setShadow(&I, getCleanShadow(&I));
- setOrigin(&I, getCleanOrigin());
+ return true;
+ }
+
+ bool maybeHandleArmSIMDIntrinsic(IntrinsicInst &I) {
+ switch (I.getIntrinsicID()) {
+ case Intrinsic::aarch64_neon_rshrn:
+ case Intrinsic::aarch64_neon_sqrshl:
+ case Intrinsic::aarch64_neon_sqrshrn:
+ case Intrinsic::aarch64_neon_sqrshrun:
+ case Intrinsic::aarch64_neon_sqshl:
+ case Intrinsic::aarch64_neon_sqshlu:
+ case Intrinsic::aarch64_neon_sqshrn:
+ case Intrinsic::aarch64_neon_sqshrun:
+ case Intrinsic::aarch64_neon_srshl:
+ case Intrinsic::aarch64_neon_sshl:
+ case Intrinsic::aarch64_neon_uqrshl:
+ case Intrinsic::aarch64_neon_uqrshrn:
+ case Intrinsic::aarch64_neon_uqshl:
+ case Intrinsic::aarch64_neon_uqshrn:
+ case Intrinsic::aarch64_neon_urshl:
+ case Intrinsic::aarch64_neon_ushl:
+ // Not handled here: aarch64_neon_vsli (vector shift left and insert)
+ handleVectorShiftIntrinsic(I, /* Variable */ false);
break;
// TODO: handling max/min similarly to AND/OR may be more precise
@@ -6233,17 +6265,27 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
- case Intrinsic::scmp:
- case Intrinsic::ucmp: {
- handleShadowOr(I);
- break;
- }
-
default:
- if (!handleUnknownIntrinsic(I))
- visitInstruction(I);
- break;
+ return false;
}
+
+ return true;
+ }
+
+ void visitIntrinsicInst(IntrinsicInst &I) {
+ if (maybeHandleCrossPlatformIntrinsic(I))
+ return;
+
+ if (maybeHandleX86SIMDIntrinsic(I))
+ return;
+
+ if (maybeHandleArmSIMDIntrinsic(I))
+ return;
+
+ if (maybeHandleUnknownIntrinsic(I))
+ return;
+
+ visitInstruction(I);
}
void visitLibAtomicLoad(CallBase &CB) {