diff options
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation')
4 files changed, 270 insertions, 92 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 50258af5e26c..42c3d4a4f4c4 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1219,7 +1219,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { std::optional<TypeSize> Size = AI->getAllocationSize(AI->getDataLayout()); // Check that size is known and can be stored in IntptrTy. - if (!Size || !ConstantInt::isValueValidForType(IntptrTy, *Size)) + // TODO: Add support for scalable vectors if possible. + if (!Size || Size->isScalable() || + !ConstantInt::isValueValidForType(IntptrTy, *Size)) return; bool DoPoison = (ID == Intrinsic::lifetime_end); diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 66cdbfcf998c..832592e7663b 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -212,6 +212,15 @@ static cl::opt<float> "OR because of the hot percentile cutoff, if " "both are supplied.")); +static cl::opt<bool> ClStaticLinking( + "hwasan-static-linking", + cl::desc("Don't use .note.hwasan.globals section to instrument globals " + "from loadable libraries. " + "Note: in static binaries, the global variables section can be " + "accessed directly via linker-provided " + "__start_hwasan_globals and __stop_hwasan_globals symbols"), + cl::Hidden, cl::init(false)); + STATISTIC(NumTotalFuncs, "Number of total funcs"); STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs"); STATISTIC(NumNoProfileSummaryFuncs, "Number of funcs without PS"); @@ -335,6 +344,7 @@ private: FunctionAnalysisManager &FAM) const; void initializeModule(); void createHwasanCtorComdat(); + void createHwasanNote(); void initializeCallbacks(Module &M); @@ -533,20 +543,7 @@ void HWAddressSanitizerPass::printPipeline( OS << '>'; } -void HWAddressSanitizer::createHwasanCtorComdat() { - std::tie(HwasanCtorFunction, std::ignore) = - getOrCreateSanitizerCtorAndInitFunctions( - M, kHwasanModuleCtorName, kHwasanInitName, - /*InitArgTypes=*/{}, - /*InitArgs=*/{}, - // This callback is invoked when the functions are created the first - // time. Hook them into the global ctors list in that case: - [&](Function *Ctor, FunctionCallee) { - Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName); - Ctor->setComdat(CtorComdat); - appendToGlobalCtors(M, Ctor, 0, Ctor); - }); - +void HWAddressSanitizer::createHwasanNote() { // Create a note that contains pointers to the list of global // descriptors. Adding a note to the output file will cause the linker to // create a PT_NOTE program header pointing to the note that we can use to @@ -630,6 +627,29 @@ void HWAddressSanitizer::createHwasanCtorComdat() { appendToCompilerUsed(M, Dummy); } +void HWAddressSanitizer::createHwasanCtorComdat() { + std::tie(HwasanCtorFunction, std::ignore) = + getOrCreateSanitizerCtorAndInitFunctions( + M, kHwasanModuleCtorName, kHwasanInitName, + /*InitArgTypes=*/{}, + /*InitArgs=*/{}, + // This callback is invoked when the functions are created the first + // time. Hook them into the global ctors list in that case: + [&](Function *Ctor, FunctionCallee) { + Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName); + Ctor->setComdat(CtorComdat); + appendToGlobalCtors(M, Ctor, 0, Ctor); + }); + + // Do not create .note.hwasan.globals for static binaries, as it is only + // needed for instrumenting globals from dynamic libraries. In static + // binaries, the global variables section can be accessed directly via the + // __start_hwasan_globals and __stop_hwasan_globals symbols inserted by the + // linker. + if (!ClStaticLinking) + createHwasanNote(); +} + /// Module-level initialization. /// /// inserts a call to __hwasan_init to the module's constructor list. diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index a9a0731f16d9..ecb2f2dbc552 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/MemProfCommon.h" @@ -75,6 +76,10 @@ static cl::opt<unsigned> MinMatchedColdBytePercent( "memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold")); +static cl::opt<bool> AnnotateStaticDataSectionPrefix( + "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, + cl::desc("If true, annotate the static data section prefix")); + // Matching statistics STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); STATISTIC(NumOfMemProfMismatch, @@ -90,6 +95,14 @@ STATISTIC(NumOfMemProfMatchedAllocs, "Number of matched memory profile allocs."); STATISTIC(NumOfMemProfMatchedCallSites, "Number of matched memory profile callsites."); +STATISTIC(NumOfMemProfHotGlobalVars, + "Number of global vars annotated with 'hot' section prefix."); +STATISTIC(NumOfMemProfColdGlobalVars, + "Number of global vars annotated with 'unlikely' section prefix."); +STATISTIC(NumOfMemProfUnknownGlobalVars, + "Number of global vars with unknown hotness (no section prefix)."); +STATISTIC(NumOfMemProfExplicitSectionGlobalVars, + "Number of global vars with user-specified section (not annotated)."); static void addCallsiteMetadata(Instruction &I, ArrayRef<uint64_t> InlinedCallStack, @@ -674,11 +687,12 @@ MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, } PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { - // Return immediately if the module doesn't contain any function. - if (M.empty()) + // Return immediately if the module doesn't contain any function or global + // variables. + if (M.empty() && M.globals().empty()) return PreservedAnalyses::all(); - LLVM_DEBUG(dbgs() << "Read in memory profile:"); + LLVM_DEBUG(dbgs() << "Read in memory profile:\n"); auto &Ctx = M.getContext(); auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); if (Error E = ReaderOrErr.takeError()) { @@ -703,6 +717,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::all(); } + const bool Changed = + annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData()); + + // If the module doesn't contain any function, return after we process all + // global variables. + if (M.empty()) + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); + auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin()); @@ -752,3 +774,95 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::none(); } + +// Returns true iff the global variable has custom section either by +// __attribute__((section("name"))) +// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate) +// or #pragma clang section directives +// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section). +static bool hasExplicitSectionName(const GlobalVariable &GVar) { + if (GVar.hasSection()) + return true; + + auto Attrs = GVar.getAttributes(); + if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") || + Attrs.hasAttribute("relro-section") || + Attrs.hasAttribute("rodata-section")) + return true; + return false; +} + +bool MemProfUsePass::annotateGlobalVariables( + Module &M, const memprof::DataAccessProfData *DataAccessProf) { + if (!AnnotateStaticDataSectionPrefix || M.globals().empty()) + return false; + + if (!DataAccessProf) { + M.getContext().diagnose(DiagnosticInfoPGOProfile( + MemoryProfileFileName.data(), + StringRef("Data access profiles not found in memprof. Ignore " + "-memprof-annotate-static-data-prefix."), + DS_Warning)); + return false; + } + + bool Changed = false; + // Iterate all global variables in the module and annotate them based on + // data access profiles. Note it's up to the linker to decide how to map input + // sections to output sections, and one conservative practice is to map + // unlikely-prefixed ones to unlikely output section, and map the rest + // (hot-prefixed or prefix-less) to the canonical output section. + for (GlobalVariable &GVar : M.globals()) { + assert(!GVar.getSectionPrefix().has_value() && + "GVar shouldn't have section prefix yet"); + if (GVar.isDeclarationForLinker()) + continue; + + if (hasExplicitSectionName(GVar)) { + ++NumOfMemProfExplicitSectionGlobalVars; + LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName() + << " has explicit section name. Skip annotating.\n"); + continue; + } + + StringRef Name = GVar.getName(); + // Skip string literals as their mangled names don't stay stable across + // binary releases. + // TODO: Track string content hash in the profiles and compute it inside the + // compiler to categeorize the hotness string literals. + if (Name.starts_with(".str")) { + + LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n"); + continue; + } + + // DataAccessProfRecord's get* methods will canonicalize the name under the + // hood before looking it up, so optimizer doesn't need to do it. + std::optional<DataAccessProfRecord> Record = + DataAccessProf->getProfileRecord(Name); + // Annotate a global variable as hot if it has non-zero sampled count, and + // annotate it as cold if it's seen in the profiled binary + // file but doesn't have any access sample. + // For logging, optimization remark emitter requires a llvm::Function, but + // it's not well defined how to associate a global variable with a function. + // So we just print out the static data section prefix in LLVM_DEBUG. + if (Record && Record->AccessCount > 0) { + ++NumOfMemProfHotGlobalVars; + GVar.setSectionPrefix("hot"); + Changed = true; + LLVM_DEBUG(dbgs() << "Global variable " << Name + << " is annotated as hot\n"); + } else if (DataAccessProf->isKnownColdSymbol(Name)) { + ++NumOfMemProfColdGlobalVars; + GVar.setSectionPrefix("unlikely"); + Changed = true; + LLVM_DEBUG(dbgs() << "Global variable " << Name + << " is annotated as unlikely\n"); + } else { + ++NumOfMemProfUnknownGlobalVars; + LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n"); + } + } + + return Changed; +} diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 27292d1a66c3..9899a2aae2b1 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3263,7 +3263,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return true; } - /// Heuristically instrument unknown intrinsics. + /// Returns whether it was able to heuristically instrument unknown + /// intrinsics. /// /// The main purpose of this code is to do something reasonable with all /// random intrinsics we might encounter, most importantly - SIMD intrinsics. @@ -3273,7 +3274,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// /// We special-case intrinsics where this approach fails. See llvm.bswap /// handling as an example of that. - bool handleUnknownIntrinsicUnlogged(IntrinsicInst &I) { + bool maybeHandleUnknownIntrinsicUnlogged(IntrinsicInst &I) { unsigned NumArgOperands = I.arg_size(); if (NumArgOperands == 0) return false; @@ -3300,8 +3301,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return false; } - bool handleUnknownIntrinsic(IntrinsicInst &I) { - if (handleUnknownIntrinsicUnlogged(I)) { + bool maybeHandleUnknownIntrinsic(IntrinsicInst &I) { + if (maybeHandleUnknownIntrinsicUnlogged(I)) { if (ClDumpHeuristicInstructions) dumpInst(I); @@ -3860,7 +3861,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // // Three operands: // <4 x i32> @llvm.x86.avx512.vpdpbusd.128 - // (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b) + // (<4 x i32> %s, <16 x i8> %a, <16 x i8> %b) // (this is equivalent to multiply-add on %a and %b, followed by // adding/"accumulating" %s. "Accumulation" stores the result in one // of the source registers, but this accumulate vs. add distinction @@ -3902,15 +3903,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { ReturnType->getPrimitiveSizeInBits()); if (I.arg_size() == 3) { - assert(ParamType == ReturnType); - assert(ParamType == I.getArgOperand(0)->getType()); + [[maybe_unused]] auto *AccumulatorType = + cast<FixedVectorType>(I.getOperand(0)->getType()); + assert(AccumulatorType == ReturnType); } FixedVectorType *ImplicitReturnType = ReturnType; // Step 1: instrument multiplication of corresponding vector elements if (EltSizeInBits) { - ImplicitReturnType = cast<FixedVectorType>(getMMXVectorTy( - EltSizeInBits * 2, ParamType->getPrimitiveSizeInBits())); + ImplicitReturnType = cast<FixedVectorType>( + getMMXVectorTy(EltSizeInBits * ReductionFactor, + ParamType->getPrimitiveSizeInBits())); ParamType = cast<FixedVectorType>( getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits())); @@ -3958,7 +3961,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Step 2: instrument horizontal add // We don't need bit-precise horizontalReduce because we only want to check - // if each pair of elements is fully zero. + // if each pair/quad of elements is fully zero. // Cast to <4 x i32>. Value *Horizontal = IRB.CreateBitCast(And, ImplicitReturnType); @@ -3968,7 +3971,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Constant::getNullValue(Horizontal->getType())), ImplicitReturnType); - // Cast it back to the required fake return type (<1 x i64>). + // Cast it back to the required fake return type (if MMX: <1 x i64>; for + // AVX, it is already correct). if (EltSizeInBits) OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I)); @@ -5262,7 +5266,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { handleShadowOr(I); } - void visitIntrinsicInst(IntrinsicInst &I) { + bool maybeHandleCrossPlatformIntrinsic(IntrinsicInst &I) { switch (I.getIntrinsicID()) { case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: @@ -5342,6 +5346,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { handleVectorReduceWithStarterIntrinsic(I); break; + case Intrinsic::scmp: + case Intrinsic::ucmp: { + handleShadowOr(I); + break; + } + + case Intrinsic::fshl: + case Intrinsic::fshr: + handleFunnelShift(I); + break; + + case Intrinsic::is_constant: + // The result of llvm.is.constant() is always defined. + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + break; + + default: + return false; + } + + return true; + } + + bool maybeHandleX86SIMDIntrinsic(IntrinsicInst &I) { + switch (I.getIntrinsicID()) { case Intrinsic::x86_sse_stmxcsr: handleStmxcsr(I); break; @@ -5392,6 +5422,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { break; } + // Convert Packed Single Precision Floating-Point Values + // to Packed Signed Doubleword Integer Values + // + // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512 + // (<16 x float>, <16 x i32>, i16, i32) + case Intrinsic::x86_avx512_mask_cvtps2dq_512: + handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false); + break; + // Convert Packed Double Precision Floating-Point Values // to Packed Single Precision Floating-Point Values case Intrinsic::x86_sse2_cvtpd2ps: @@ -5492,23 +5531,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { case Intrinsic::x86_mmx_psrli_q: case Intrinsic::x86_mmx_psrai_w: case Intrinsic::x86_mmx_psrai_d: - case Intrinsic::aarch64_neon_rshrn: - case Intrinsic::aarch64_neon_sqrshl: - case Intrinsic::aarch64_neon_sqrshrn: - case Intrinsic::aarch64_neon_sqrshrun: - case Intrinsic::aarch64_neon_sqshl: - case Intrinsic::aarch64_neon_sqshlu: - case Intrinsic::aarch64_neon_sqshrn: - case Intrinsic::aarch64_neon_sqshrun: - case Intrinsic::aarch64_neon_srshl: - case Intrinsic::aarch64_neon_sshl: - case Intrinsic::aarch64_neon_uqrshl: - case Intrinsic::aarch64_neon_uqrshrn: - case Intrinsic::aarch64_neon_uqshl: - case Intrinsic::aarch64_neon_uqshrn: - case Intrinsic::aarch64_neon_urshl: - case Intrinsic::aarch64_neon_ushl: - // Not handled here: aarch64_neon_vsli (vector shift left and insert) handleVectorShiftIntrinsic(I, /* Variable */ false); break; case Intrinsic::x86_avx2_psllv_d: @@ -5621,19 +5643,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // // Multiply and Add Packed Signed and Unsigned Bytes // < 4 x i32> @llvm.x86.avx512.vpdpbusd.128 - // (< 4 x i32>, < 4 x i32>, < 4 x i32>) + // (< 4 x i32>, <16 x i8>, <16 x i8>) // < 8 x i32> @llvm.x86.avx512.vpdpbusd.256 - // (< 8 x i32>, < 8 x i32>, < 8 x i32>) + // (< 8 x i32>, <32 x i8>, <32 x i8>) // <16 x i32> @llvm.x86.avx512.vpdpbusd.512 - // (<16 x i32>, <16 x i32>, <16 x i32>) + // (<16 x i32>, <64 x i8>, <64 x i8>) // // Multiply and Add Unsigned and Signed Bytes With Saturation // < 4 x i32> @llvm.x86.avx512.vpdpbusds.128 - // (< 4 x i32>, < 4 x i32>, < 4 x i32>) + // (< 4 x i32>, <16 x i8>, <16 x i8>) // < 8 x i32> @llvm.x86.avx512.vpdpbusds.256 - // (< 8 x i32>, < 8 x i32>, < 8 x i32>) + // (< 8 x i32>, <32 x i8>, <32 x i8>) // <16 x i32> @llvm.x86.avx512.vpdpbusds.512 - // (<16 x i32>, <16 x i32>, <16 x i32>) + // (<16 x i32>, <64 x i8>, <64 x i8>) // // < 4 x i32> @llvm.x86.avx2.vpdpbssd.128 // (< 4 x i32>, < 4 x i32>, < 4 x i32>) @@ -5652,30 +5674,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // // These intrinsics are auto-upgraded into non-masked forms: // <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128 - // (<4 x i32>, <4 x i32>, <4 x i32>, i8) + // (<4 x i32>, <16 x i8>, <16 x i8>, i8) // <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128 - // (<4 x i32>, <4 x i32>, <4 x i32>, i8) + // (<4 x i32>, <16 x i8>, <16 x i8>, i8) // <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256 - // (<8 x i32>, <8 x i32>, <8 x i32>, i8) + // (<8 x i32>, <32 x i8>, <32 x i8>, i8) // <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256 - // (<8 x i32>, <8 x i32>, <8 x i32>, i8) + // (<8 x i32>, <32 x i8>, <32 x i8>, i8) // <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512 - // (<16 x i32>, <16 x i32>, <16 x i32>, i16) + // (<16 x i32>, <64 x i8>, <64 x i8>, i16) // <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512 - // (<16 x i32>, <16 x i32>, <16 x i32>, i16) + // (<16 x i32>, <64 x i8>, <64 x i8>, i16) // // <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128 - // (<4 x i32>, <4 x i32>, <4 x i32>, i8) + // (<4 x i32>, <16 x i8>, <16 x i8>, i8) // <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128 - // (<4 x i32>, <4 x i32>, <4 x i32>, i8) + // (<4 x i32>, <16 x i8>, <16 x i8>, i8) // <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256 - // (<8 x i32>, <8 x i32>, <8 x i32>, i8) + // (<8 x i32>, <32 x i8>, <32 x i8>, i8) // <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256 - // (<8 x i32>, <8 x i32>, <8 x i32>, i8) + // (<8 x i32>, <32 x i8>, <32 x i8>, i8) // <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512 - // (<16 x i32>, <16 x i32>, <16 x i32>, i16) + // (<16 x i32>, <64 x i8>, <64 x i8>, i16) // <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512 - // (<16 x i32>, <16 x i32>, <16 x i32>, i16) + // (<16 x i32>, <64 x i8>, <64 x i8>, i16) case Intrinsic::x86_avx512_vpdpbusd_128: case Intrinsic::x86_avx512_vpdpbusd_256: case Intrinsic::x86_avx512_vpdpbusd_512: @@ -5930,7 +5952,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { case Intrinsic::x86_avx512_max_pd_512: { // These AVX512 variants contain the rounding mode as a trailing flag. // Earlier variants do not have a trailing flag and are already handled - // by maybeHandleSimpleNomemIntrinsic(I, 0) via handleUnknownIntrinsic. + // by maybeHandleSimpleNomemIntrinsic(I, 0) via + // maybeHandleUnknownIntrinsic. [[maybe_unused]] bool Success = maybeHandleSimpleNomemIntrinsic(I, /*trailingFlags=*/1); assert(Success); @@ -5988,15 +6011,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /*trailingVerbatimArgs=*/1); break; - // Convert Packed Single Precision Floating-Point Values - // to Packed Signed Doubleword Integer Values - // - // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512 - // (<16 x float>, <16 x i32>, i16, i32) - case Intrinsic::x86_avx512_mask_cvtps2dq_512: - handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false); - break; - // AVX512 PMOV: Packed MOV, with truncation // Precisely handled by applying the same intrinsic to the shadow case Intrinsic::x86_avx512_mask_pmov_dw_512: @@ -6074,15 +6088,33 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { handleAVXGF2P8Affine(I); break; - case Intrinsic::fshl: - case Intrinsic::fshr: - handleFunnelShift(I); - break; + default: + return false; + } - case Intrinsic::is_constant: - // The result of llvm.is.constant() is always defined. - setShadow(&I, getCleanShadow(&I)); - setOrigin(&I, getCleanOrigin()); + return true; + } + + bool maybeHandleArmSIMDIntrinsic(IntrinsicInst &I) { + switch (I.getIntrinsicID()) { + case Intrinsic::aarch64_neon_rshrn: + case Intrinsic::aarch64_neon_sqrshl: + case Intrinsic::aarch64_neon_sqrshrn: + case Intrinsic::aarch64_neon_sqrshrun: + case Intrinsic::aarch64_neon_sqshl: + case Intrinsic::aarch64_neon_sqshlu: + case Intrinsic::aarch64_neon_sqshrn: + case Intrinsic::aarch64_neon_sqshrun: + case Intrinsic::aarch64_neon_srshl: + case Intrinsic::aarch64_neon_sshl: + case Intrinsic::aarch64_neon_uqrshl: + case Intrinsic::aarch64_neon_uqrshrn: + case Intrinsic::aarch64_neon_uqshl: + case Intrinsic::aarch64_neon_uqshrn: + case Intrinsic::aarch64_neon_urshl: + case Intrinsic::aarch64_neon_ushl: + // Not handled here: aarch64_neon_vsli (vector shift left and insert) + handleVectorShiftIntrinsic(I, /* Variable */ false); break; // TODO: handling max/min similarly to AND/OR may be more precise @@ -6233,17 +6265,27 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { break; } - case Intrinsic::scmp: - case Intrinsic::ucmp: { - handleShadowOr(I); - break; - } - default: - if (!handleUnknownIntrinsic(I)) - visitInstruction(I); - break; + return false; } + + return true; + } + + void visitIntrinsicInst(IntrinsicInst &I) { + if (maybeHandleCrossPlatformIntrinsic(I)) + return; + + if (maybeHandleX86SIMDIntrinsic(I)) + return; + + if (maybeHandleArmSIMDIntrinsic(I)) + return; + + if (maybeHandleUnknownIntrinsic(I)) + return; + + visitInstruction(I); } void visitLibAtomicLoad(CallBase &CB) { |
