summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAiden Grossman <aidengrossman@google.com>2025-11-07 20:31:48 +0000
committerAiden Grossman <aidengrossman@google.com>2025-11-07 20:31:48 +0000
commitecb8acd796ca75a0e5130d83955818b2762344c3 (patch)
treec6b38191b183b7ab4c29d22a8d4ba183b3d056d7
parenta67b7975eaced8df0654bff06756c8cc902723d4 (diff)
parent1f8d5d46df48720973c8312447b1c9a0165213d4 (diff)
[𝘀𝗽𝗿] changes introduced through rebaseusers/boomanaiden154/main.ci-make-premerge-uploadwrite-comments
Created using spr 1.3.7 [skip ci]
-rw-r--r--.ci/generate_test_report_lib.py6
-rwxr-xr-x.ci/monolithic-windows.sh3
-rw-r--r--.github/actions/build-container/action.yml95
-rw-r--r--.github/actions/push-container/action.yml44
-rw-r--r--.github/renovate.json9
-rw-r--r--.github/workflows/bazel-checks.yml23
-rw-r--r--.github/workflows/build-ci-container-tooling.yml97
-rw-r--r--.github/workflows/build-ci-container-windows.yml4
-rw-r--r--.github/workflows/build-ci-container.yml107
-rw-r--r--.github/workflows/build-metrics-container.yml4
-rw-r--r--.github/workflows/check-ci.yml2
-rw-r--r--.github/workflows/ci-post-commit-analyzer.yml2
-rw-r--r--.github/workflows/commit-access-review.yml2
-rw-r--r--.github/workflows/containers/github-action-ci-tooling/Dockerfile3
-rw-r--r--.github/workflows/docs.yml6
-rw-r--r--.github/workflows/email-check.yaml2
-rw-r--r--.github/workflows/gha-codeql.yml4
-rw-r--r--.github/workflows/issue-write.yml2
-rw-r--r--.github/workflows/libclang-abi-tests.yml8
-rw-r--r--.github/workflows/libclang-python-tests.yml2
-rw-r--r--.github/workflows/libcxx-build-and-test.yaml8
-rw-r--r--.github/workflows/llvm-abi-tests.yml12
-rw-r--r--.github/workflows/llvm-bugs.yml6
-rw-r--r--.github/workflows/pr-code-format.yml4
-rw-r--r--.github/workflows/pr-code-lint.yml6
-rw-r--r--.github/workflows/pr-request-release-note.yml2
-rw-r--r--.github/workflows/premerge.yaml6
-rw-r--r--.github/workflows/release-asset-audit.yml2
-rw-r--r--.github/workflows/release-binaries.yml8
-rw-r--r--.github/workflows/release-documentation.yml4
-rw-r--r--.github/workflows/release-doxygen.yml2
-rw-r--r--.github/workflows/release-sources.yml4
-rw-r--r--.github/workflows/scorecard.yml4
-rw-r--r--.github/workflows/unprivileged-download-artifact/action.yml2
-rw-r--r--bolt/lib/Core/BinaryBasicBlock.cpp2
-rw-r--r--bolt/lib/Core/BinaryContext.cpp8
-rw-r--r--bolt/lib/Core/DynoStats.cpp2
-rw-r--r--bolt/lib/Profile/DataAggregator.cpp2
-rw-r--r--bolt/lib/Rewrite/RewriteInstance.cpp1
-rw-r--r--clang-tools-extra/clang-doc/BitcodeWriter.cpp2
-rw-r--r--clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h6
-rw-r--r--clang-tools-extra/clang-tidy/FileExtensionsSet.h6
-rw-r--r--clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h5
-rw-r--r--clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecAccept4Check.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecCreatCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecDupCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecFopenCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecOpenCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecPipe2Check.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecPipeCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/android/CloexecSocketCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/boost/UseToStringCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp3
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt1
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/CopyConstructorMutatesArgumentCheck.cpp (renamed from clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.cpp)12
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/CopyConstructorMutatesArgumentCheck.h (renamed from clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.h)16
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp5
-rw-r--r--clang-tools-extra/clang-tidy/cert/CMakeLists.txt1
-rw-r--r--clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/google/FunctionNamingCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/llvmlibc/NamespaceConstants.h5
-rw-r--r--clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/PassByValueCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseAutoCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseUsingCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/objc/AssertEquals.h6
-rw-r--r--clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.h6
-rw-r--r--clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.cpp5
-rw-r--r--clang-tools-extra/clang-tidy/tool/ClangTidyMain.h5
-rw-r--r--clang-tools-extra/clang-tidy/utils/ASTUtils.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h5
-rw-r--r--clang-tools-extra/clang-tidy/utils/DesignatedInitializers.h5
-rw-r--r--clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/ExprSequence.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/IncludeInserter.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/IncludeSorter.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/LexerUtils.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/NamespaceAliaser.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/OptionsUtils.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h6
-rw-r--r--clang-tools-extra/clang-tidy/utils/UsingInserter.h6
-rw-r--r--clang-tools-extra/clangd/ClangdLSPServer.cpp1
-rw-r--r--clang-tools-extra/clangd/FileDistance.cpp1
-rw-r--r--clang-tools-extra/clangd/FuzzyMatch.cpp3
-rw-r--r--clang-tools-extra/clangd/index/SymbolLocation.cpp3
-rw-r--r--clang-tools-extra/docs/ReleaseNotes.rst9
-rw-r--r--clang-tools-extra/docs/clang-tidy/checks/bugprone/copy-constructor-mutates-argument.rst11
-rw-r--r--clang-tools-extra/docs/clang-tidy/checks/cert/oop58-cpp.rst13
-rw-r--r--clang-tools-extra/docs/clang-tidy/checks/list.rst2
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/bugprone/copy-constructor-mutates-argument.cpp (renamed from clang-tools-extra/test/clang-tidy/checkers/cert/oop58-cpp.cpp)2
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/use-concise-preprocessor-directives.cpp8
-rw-r--r--clang/Maintainers.rst2
-rw-r--r--clang/bindings/python/clang/cindex.py174
-rw-r--r--clang/cmake/caches/Fuchsia-stage2.cmake5
-rw-r--r--clang/docs/AddressSanitizer.rst6
-rw-r--r--clang/docs/BlockLanguageSpec.rst2
-rw-r--r--clang/docs/BoundsSafety.rst4
-rw-r--r--clang/docs/BoundsSafetyImplPlans.rst2
-rw-r--r--clang/docs/ClangFormatStyleOptions.rst10
-rw-r--r--clang/docs/ClangLinkerWrapper.rst2
-rw-r--r--clang/docs/ClangNVLinkWrapper.rst4
-rw-r--r--clang/docs/ClangPlugins.rst2
-rw-r--r--clang/docs/ClangTools.rst2
-rw-r--r--clang/docs/ConstantInterpreter.rst2
-rw-r--r--clang/docs/ControlFlowIntegrity.rst2
-rw-r--r--clang/docs/DataFlowSanitizer.rst2
-rw-r--r--clang/docs/HardwareAssistedAddressSanitizerDesign.rst2
-rw-r--r--clang/docs/JSONCompilationDatabase.rst2
-rw-r--r--clang/docs/LibASTImporter.rst4
-rw-r--r--clang/docs/LibASTMatchers.rst2
-rw-r--r--clang/docs/LibASTMatchersTutorial.rst2
-rw-r--r--clang/docs/LibFormat.rst2
-rw-r--r--clang/docs/MatrixTypes.rst2
-rw-r--r--clang/docs/MemorySanitizer.rst2
-rw-r--r--clang/docs/Modules.rst2
-rw-r--r--clang/docs/ReleaseNotes.rst15
-rw-r--r--clang/include/clang/AST/JSONNodeDumper.h2
-rw-r--r--clang/include/clang/AST/Stmt.h20
-rw-r--r--clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h40
-rw-r--r--clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h2
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.td10
-rw-r--r--clang/include/clang/Basic/DiagnosticDriverKinds.td2
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td11
-rw-r--r--clang/include/clang/Basic/arm_neon.td73
-rw-r--r--clang/include/clang/Format/Format.h12
-rw-r--r--clang/include/clang/Sema/Sema.h131
-rw-r--r--clang/lib/AST/ByteCode/Compiler.cpp2
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp36
-rw-r--r--clang/lib/AST/ByteCode/Program.cpp3
-rw-r--r--clang/lib/AST/ComputeDependence.cpp2
-rw-r--r--clang/lib/AST/ExprConstant.cpp48
-rw-r--r--clang/lib/AST/JSONNodeDumper.cpp21
-rw-r--r--clang/lib/Analysis/LifetimeSafety/Dataflow.h14
-rw-r--r--clang/lib/Analysis/LifetimeSafety/Facts.cpp15
-rw-r--r--clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp2
-rw-r--r--clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp1
-rw-r--r--clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp2
-rw-r--r--clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp135
-rw-r--r--clang/lib/Basic/BuiltinTargetFeatures.h2
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenStmt.cpp2
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp12
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp69
-rw-r--r--clang/lib/CodeGen/CodeGenPGO.cpp9
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/ARM.cpp32
-rw-r--r--clang/lib/Format/Format.cpp12
-rw-r--r--clang/lib/Format/WhitespaceManager.cpp18
-rw-r--r--clang/lib/Frontend/CompilerInstance.cpp4
-rw-r--r--clang/lib/Headers/avx512bwintrin.h36
-rw-r--r--clang/lib/Headers/avx512dqintrin.h36
-rw-r--r--clang/lib/Headers/avx512fintrin.h22
-rw-r--r--clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h72
-rw-r--r--clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h12
-rw-r--r--clang/lib/Headers/hlsl/hlsl_intrinsics.h61
-rw-r--r--clang/lib/Headers/llvm_libc_wrappers/stdlib.h4
-rw-r--r--clang/lib/Interpreter/Interpreter.cpp49
-rw-r--r--clang/lib/Parse/ParseExprCXX.cpp22
-rw-r--r--clang/lib/Parse/ParseStmt.cpp14
-rw-r--r--clang/lib/Sema/Sema.cpp42
-rw-r--r--clang/lib/Sema/SemaAMDGPU.cpp2
-rw-r--r--clang/lib/Sema/SemaBoundsSafety.cpp23
-rw-r--r--clang/lib/Sema/SemaConcept.cpp58
-rw-r--r--clang/lib/Sema/SemaDecl.cpp11
-rw-r--r--clang/lib/Sema/SemaExpr.cpp20
-rw-r--r--clang/lib/Sema/SemaTemplate.cpp30
-rw-r--r--clang/lib/Sema/SemaTemplateDeduction.cpp130
-rw-r--r--clang/lib/Sema/SemaTemplateDeductionGuide.cpp1
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiate.cpp182
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiateDecl.cpp9
-rw-r--r--clang/lib/Sema/SemaTemplateVariadic.cpp11
-rw-r--r--clang/lib/Sema/TreeTransform.h25
-rw-r--r--clang/test/AST/ast-dump-stmt.c2
-rw-r--r--clang/test/C/C2y/n3525.c30
-rw-r--r--clang/test/CIR/CodeGen/statement-exprs.c12
-rw-r--r--clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c205
-rw-r--r--clang/test/CodeGen/AArch64/v9.6a-neon-f16-intrinsics.c23
-rw-r--r--clang/test/CodeGen/AArch64/v9.6a-neon-f32-intrinsics.c21
-rw-r--r--clang/test/CodeGen/X86/avx512bw-builtins.c140
-rw-r--r--clang/test/CodeGen/X86/avx512dq-builtins.c108
-rw-r--r--clang/test/CodeGen/X86/avx512f-builtins.c40
-rw-r--r--clang/test/CodeGen/attr-counted-by-void-ptr-gnu.c65
-rw-r--r--clang/test/CodeGen/exprs.c9
-rw-r--r--clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl163
-rw-r--r--clang/test/Parser/lambda-misplaced-capture-default.cpp9
-rw-r--r--clang/test/Profile/Inputs/c-counter-overflows.proftext2
-rw-r--r--clang/test/Profile/Inputs/c-general.profdata.v12bin0 -> 2616 bytes
-rw-r--r--clang/test/Profile/Inputs/c-general.proftext12
-rw-r--r--clang/test/Profile/Inputs/c-unprofiled-blocks.proftext4
-rw-r--r--clang/test/Profile/Inputs/cxx-rangefor.proftext2
-rw-r--r--clang/test/Profile/Inputs/cxx-throws.proftext2
-rw-r--r--clang/test/Profile/Inputs/misexpect-switch-default.proftext2
-rw-r--r--clang/test/Profile/Inputs/misexpect-switch-nonconst.proftext2
-rw-r--r--clang/test/Profile/c-collision.c4
-rw-r--r--clang/test/Profile/c-general.c1
-rw-r--r--clang/test/Sema/attr-counted-by-late-parsed-struct-ptrs.c6
-rw-r--r--clang/test/Sema/attr-counted-by-or-null-last-field.c8
-rw-r--r--clang/test/Sema/attr-counted-by-or-null-late-parsed-struct-ptrs.c6
-rw-r--r--clang/test/Sema/attr-counted-by-or-null-struct-ptrs.c10
-rw-r--r--clang/test/Sema/attr-counted-by-struct-ptrs.c10
-rw-r--r--clang/test/Sema/attr-counted-by-void-ptr-gnu.c101
-rw-r--r--clang/test/Sema/statements.c9
-rw-r--r--clang/test/SemaCUDA/error-includes-mode.cu9
-rw-r--r--clang/test/SemaCXX/attr-mode-tmpl.cpp2
-rw-r--r--clang/test/SemaCXX/cxx23-assume.cpp13
-rw-r--r--clang/test/SemaCXX/cxx2b-consteval-propagate.cpp2
-rw-r--r--clang/test/SemaCXX/cxx2b-warn-shadow.cpp26
-rw-r--r--clang/test/SemaCXX/statements.cpp17
-rw-r--r--clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl2
-rw-r--r--clang/test/SemaTemplate/temp_arg_nontype.cpp3
-rw-r--r--clang/test/SemaTemplate/temp_arg_nontype_cxx11.cpp2
-rw-r--r--clang/tools/clang-repl/ClangRepl.cpp1
-rw-r--r--clang/unittests/Analysis/LifetimeSafetyTest.cpp7
-rw-r--r--clang/unittests/Format/ConfigParseTest.cpp11
-rw-r--r--clang/unittests/Format/FormatTestComments.cpp60
-rw-r--r--clang/www/c_status.html2
-rw-r--r--compiler-rt/CMakeLists.txt4
-rw-r--r--compiler-rt/include/profile/InstrProfData.inc2
-rw-r--r--compiler-rt/lib/builtins/assembly.h22
-rw-r--r--compiler-rt/lib/fuzzer/CMakeLists.txt1
-rw-r--r--compiler-rt/lib/hwasan/hwasan_setjmp_aarch64.S2
-rw-r--r--compiler-rt/lib/hwasan/hwasan_tag_mismatch_aarch64.S2
-rw-r--r--compiler-rt/lib/msan/tests/CMakeLists.txt1
-rw-r--r--compiler-rt/lib/orc/elfnix_tls.aarch64.S4
-rw-r--r--compiler-rt/lib/orc/sysv_reenter.arm64.S4
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp78
-rw-r--r--compiler-rt/lib/tsan/CMakeLists.txt1
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S6
-rw-r--r--compiler-rt/lib/xray/xray_trampoline_AArch64.S2
-rw-r--r--compiler-rt/test/asan/TestCases/Darwin/asan-verify-module-map.cpp25
-rw-r--r--compiler-rt/test/asan/TestCases/log-path_test.cpp3
-rw-r--r--compiler-rt/test/asan/TestCases/verbose-log-path_test.cpp4
-rw-r--r--compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cpp2
-rw-r--r--flang-rt/include/flang-rt/runtime/io-stmt.h3
-rw-r--r--flang-rt/lib/runtime/edit-output.cpp7
-rw-r--r--flang-rt/lib/runtime/io-stmt.cpp28
-rw-r--r--flang/include/flang/Lower/OpenMP/Clauses.h2
-rw-r--r--flang/include/flang/Optimizer/OpenACC/Analysis/FIROpenACCSupportAnalysis.h51
-rw-r--r--flang/include/flang/Optimizer/OpenACC/Passes.h4
-rw-r--r--flang/include/flang/Optimizer/OpenACC/Passes.td16
-rw-r--r--flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h57
-rw-r--r--flang/lib/Evaluate/check-expression.cpp7
-rw-r--r--flang/lib/Evaluate/common.cpp19
-rw-r--r--flang/lib/Evaluate/intrinsics-library.cpp2
-rw-r--r--flang/lib/Frontend/CompilerInvocation.cpp12
-rw-r--r--flang/lib/Lower/OpenACC.cpp312
-rw-r--r--flang/lib/Lower/OpenMP/Clauses.cpp2
-rw-r--r--flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp24
-rw-r--r--flang/lib/Optimizer/Builder/HLFIRTools.cpp1
-rw-r--r--flang/lib/Optimizer/OpenACC/Analysis/CMakeLists.txt22
-rw-r--r--flang/lib/Optimizer/OpenACC/Analysis/FIROpenACCSupportAnalysis.cpp40
-rw-r--r--flang/lib/Optimizer/OpenACC/CMakeLists.txt1
-rw-r--r--flang/lib/Optimizer/OpenACC/Support/CMakeLists.txt1
-rw-r--r--flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp269
-rw-r--r--flang/lib/Optimizer/OpenACC/Transforms/ACCInitializeFIRAnalyses.cpp56
-rw-r--r--flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt4
-rw-r--r--flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp95
-rw-r--r--flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp25
-rw-r--r--flang/lib/Semantics/check-omp-structure.cpp2
-rwxr-xr-xflang/test/Driver/convert.f902
-rw-r--r--flang/test/Driver/do_concurrent_to_omp_cli.f904
-rw-r--r--flang/test/Driver/emit-mlir.f902
-rw-r--r--flang/test/Driver/fatal-errors-parsing.f902
-rw-r--r--flang/test/Driver/fatal-errors-semantics.f901
-rw-r--r--flang/test/Driver/flang-ld-aarch64.f902
-rw-r--r--flang/test/Driver/flang-ld-powerpc.f906
-rw-r--r--flang/test/Driver/gcc-toolchain-install-dir.f906
-rw-r--r--flang/test/Driver/large-data-threshold.f904
-rw-r--r--flang/test/Driver/lto-fatlto.f902
-rw-r--r--flang/test/Driver/mlir-debug-pass-pipeline.f908
-rw-r--r--flang/test/Driver/mlir-pass-pipeline.f908
-rw-r--r--flang/test/Driver/multiple-actions-error.f9538
-rw-r--r--flang/test/Driver/multiple-fc1-input.f904
-rw-r--r--flang/test/Driver/omp-driver-offload.f9014
-rw-r--r--flang/test/Driver/tune-cpu-fir.f902
-rw-r--r--flang/test/Driver/version-loops.f9016
-rw-r--r--flang/test/Evaluate/folding33.f902
-rw-r--r--flang/test/Integration/debug-proc-ptr-e2e.f9026
-rw-r--r--flang/test/Lower/CUDA/cuda-device-proc.cuf6
-rw-r--r--flang/test/Lower/CUDA/cuda-synchronization.cuf14
-rw-r--r--flang/test/Lower/OpenACC/acc-unstructured.f904
-rw-r--r--flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f902
-rw-r--r--flang/test/Lower/OpenMP/map-character.f9016
-rw-r--r--flang/test/Lower/OpenMP/optional-argument-map-2.f9025
-rw-r--r--flang/test/Semantics/structconst12.f9012
-rw-r--r--flang/test/Transforms/OpenACC/acc-implicit-copy-reduction.fir134
-rw-r--r--flang/test/Transforms/OpenACC/acc-implicit-data-derived-type-member.F9038
-rw-r--r--flang/test/Transforms/OpenACC/acc-implicit-data-fortran.F9079
-rw-r--r--flang/test/Transforms/OpenACC/acc-implicit-data.fir358
-rw-r--r--flang/test/Transforms/OpenACC/acc-implicit-firstprivate.fir284
-rw-r--r--flang/test/Transforms/debug-proc-ptr.fir41
-rw-r--r--flang/test/Transforms/omp-map-info-finalization.fir7
-rw-r--r--libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake2
-rw-r--r--libc/cmake/modules/cpu_features/check_MOPS.cpp5
-rw-r--r--libc/cmake/modules/cpu_features/check_SVE.cpp5
-rw-r--r--libc/cmake/modules/cpu_features/check_SVE2.cpp5
-rw-r--r--libc/config/baremetal/aarch64/entrypoints.txt6
-rw-r--r--libc/config/baremetal/arm/entrypoints.txt3
-rw-r--r--libc/config/baremetal/riscv/entrypoints.txt2
-rw-r--r--libc/src/__support/macros/properties/cpu_features.h12
-rw-r--r--libc/src/time/baremetal/CMakeLists.txt8
-rw-r--r--libc/test/src/stdlib/CMakeLists.txt1
-rw-r--r--libc/test/src/stdlib/StrfromTest.h4
-rw-r--r--libcxx/docs/ReleaseNotes/22.rst4
-rw-r--r--libcxx/include/CMakeLists.txt1
-rw-r--r--libcxx/include/__algorithm/fill.h22
-rw-r--r--libcxx/include/__algorithm/fill_n.h41
-rw-r--r--libcxx/include/__algorithm/for_each.h30
-rw-r--r--libcxx/include/__algorithm/for_each_n.h67
-rw-r--r--libcxx/include/__chrono/is_clock.h72
-rw-r--r--libcxx/include/__iterator/distance.h35
-rw-r--r--libcxx/include/__iterator/segmented_iterator.h5
-rw-r--r--libcxx/include/chrono4
-rw-r--r--libcxx/include/fstream13
-rw-r--r--libcxx/include/module.modulemap.in4
-rw-r--r--libcxx/modules/std/chrono.inc4
-rw-r--r--libcxx/test/benchmarks/streams/fstream.bench.cpp (renamed from libcxx/test/benchmarks/streams/ofstream.bench.cpp)22
-rw-r--r--libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp2
-rw-r--r--libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp2
-rw-r--r--libcxx/test/libcxx/time/time.traits/is.clock.verify.cpp36
-rw-r--r--libcxx/test/std/time/time.traits/is.clock.compile.pass.cpp230
-rw-r--r--libcxxabi/src/demangle/ItaniumDemangle.h12
-rw-r--r--libcxxabi/src/demangle/Utility.h26
-rw-r--r--libunwind/CMakeLists.txt4
-rw-r--r--libunwind/src/UnwindRegistersRestore.S2
-rw-r--r--libunwind/src/UnwindRegistersSave.S2
-rw-r--r--lldb/bindings/python/CMakeLists.txt1
-rw-r--r--lldb/bindings/python/python-swigsafecast.swig5
-rw-r--r--lldb/bindings/python/python-wrapper.swig12
-rw-r--r--lldb/examples/python/templates/scripted_frame_provider.py113
-rw-r--r--lldb/include/lldb/API/SBFrameList.h14
-rw-r--r--lldb/include/lldb/API/SBModuleSpec.h10
-rw-r--r--lldb/include/lldb/API/SBTarget.h1
-rw-r--r--lldb/include/lldb/Core/ModuleList.h4
-rw-r--r--lldb/include/lldb/Core/ModuleSpec.h18
-rw-r--r--lldb/include/lldb/Core/PluginManager.h18
-rw-r--r--lldb/include/lldb/Core/Section.h16
-rw-r--r--lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h30
-rw-r--r--lldb/include/lldb/Interpreter/ScriptInterpreter.h10
-rw-r--r--lldb/include/lldb/Symbol/ObjectFile.h6
-rw-r--r--lldb/include/lldb/Target/Platform.h16
-rw-r--r--lldb/include/lldb/Target/RemoteAwarePlatform.h6
-rw-r--r--lldb/include/lldb/Target/SyntheticFrameProvider.h156
-rw-r--r--lldb/include/lldb/lldb-forward.h6
-rw-r--r--lldb/include/lldb/lldb-private-interfaces.h9
-rw-r--r--lldb/packages/Python/lldbsuite/test/make/Makefile.rules5
-rw-r--r--lldb/source/API/SBModule.cpp4
-rw-r--r--lldb/source/API/SBModuleSpec.cpp13
-rw-r--r--lldb/source/Commands/CommandObjectTarget.cpp9
-rw-r--r--lldb/source/Core/DemangledNameInfo.cpp4
-rw-r--r--lldb/source/Core/DynamicLoader.cpp5
-rw-r--r--lldb/source/Core/ModuleList.cpp34
-rw-r--r--lldb/source/Core/PluginManager.cpp55
-rw-r--r--lldb/source/Core/Section.cpp33
-rw-r--r--lldb/source/Host/common/Editline.cpp3
-rw-r--r--lldb/source/Interpreter/ScriptInterpreter.cpp5
-rw-r--r--lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp6
-rw-r--r--lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp5
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h8
-rw-r--r--lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp31
-rw-r--r--lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp14
-rw-r--r--lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h2
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp10
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h1
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp47
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h3
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.cpp10
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.h1
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp22
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h11
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp9
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h1
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp20
-rw-r--r--lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.h1
-rw-r--r--lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp3
-rw-r--r--lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp3
-rw-r--r--lldb/source/Plugins/Process/scripted/ScriptedFrame.h1
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt1
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.h1
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp57
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h44
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp17
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h13
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h2
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp5
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h3
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp2
-rw-r--r--lldb/source/Target/CMakeLists.txt1
-rw-r--r--lldb/source/Target/ModuleCache.cpp2
-rw-r--r--lldb/source/Target/Platform.cpp44
-rw-r--r--lldb/source/Target/Process.cpp1
-rw-r--r--lldb/source/Target/RemoteAwarePlatform.cpp11
-rw-r--r--lldb/source/Target/SyntheticFrameProvider.cpp100
-rw-r--r--lldb/source/Target/Target.cpp23
-rw-r--r--lldb/source/Target/TargetList.cpp8
-rw-r--r--lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py33
-rw-r--r--lldb/test/API/commands/target/stop-hooks/stop_hook.py25
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/map/TestDataFormatterStdMap.py2
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multimap/TestDataFormatterGenericMultiMap.py2
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multiset/TestDataFormatterGenericMultiSet.py2
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/set/TestDataFormatterGenericSet.py2
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/string/TestDataFormatterStdString.py81
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/string_view/TestDataFormatterStdStringView.py78
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/tuple/TestDataFormatterStdTuple.py2
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/u8string/TestDataFormatterStdU8String.py16
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/u8string_view/TestDataFormatterStdU8StringView.py16
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/vbool/TestDataFormatterStdVBool.py2
-rw-r--r--lldb/test/API/functionalities/gdb_remote_client/TestConnectRemoteDetach.py67
-rw-r--r--lldb/test/API/python_api/unified_section_list/Makefile5
-rw-r--r--lldb/test/API/python_api/unified_section_list/TestModuleUnifiedSectionList.py285
-rw-r--r--lldb/test/API/python_api/unified_section_list/main.cpp3
-rw-r--r--lldb/test/API/python_api/unified_section_list/main.largercomment.yaml46
-rw-r--r--lldb/test/API/python_api/unified_section_list/main.largertext.yaml46
-rw-r--r--lldb/test/API/python_api/unified_section_list/main.reversedtext.yaml45
-rw-r--r--lldb/test/API/python_api/unified_section_list/main.yaml45
-rw-r--r--lldb/test/API/terminal/TestEditline.py2
-rw-r--r--lldb/test/Shell/Commands/Inputs/sigchld.c4
-rw-r--r--lldb/test/Shell/Commands/command-list-reach-beginning-of-file.test4
-rw-r--r--lldb/tools/debugserver/source/MacOSX/MachProcess.mm6
-rw-r--r--lldb/tools/lldb-dap/Handler/ExceptionInfoRequestHandler.cpp209
-rw-r--r--lldb/tools/lldb-dap/Handler/RequestHandler.h10
-rw-r--r--lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp18
-rw-r--r--lldb/tools/lldb-dap/Protocol/ProtocolRequests.h22
-rw-r--r--lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp33
-rw-r--r--lldb/tools/lldb-dap/Protocol/ProtocolTypes.h30
-rw-r--r--lldb/unittests/Core/CMakeLists.txt1
-rw-r--r--lldb/unittests/Core/MangledTest.cpp10
-rw-r--r--lldb/unittests/Core/ModuleListTest.cpp178
-rw-r--r--lldb/unittests/DAP/CMakeLists.txt1
-rw-r--r--lldb/unittests/DAP/ProtocolRequestsTest.cpp69
-rw-r--r--lldb/unittests/DAP/ProtocolTypesTest.cpp47
-rw-r--r--lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp10
-rw-r--r--lldb/unittests/Target/LocateModuleCallbackTest.cpp20
-rw-r--r--lldb/unittests/Target/RemoteAwarePlatformTest.cpp17
-rw-r--r--lldb/unittests/TestingSupport/TestUtilities.cpp5
-rw-r--r--lldb/unittests/TestingSupport/TestUtilities.h4
-rw-r--r--llvm/docs/AMDGPUUsage.rst45
-rw-r--r--llvm/docs/LangRef.rst146
-rw-r--r--llvm/docs/ReleaseNotes.md3
-rw-r--r--llvm/include/llvm/CodeGen/Analysis.h16
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h4
-rw-r--r--llvm/include/llvm/CodeGen/LibcallLoweringInfo.h9
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGISel.h1
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h4
-rw-r--r--llvm/include/llvm/CodeGen/TargetInstrInfo.h11
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h4
-rw-r--r--llvm/include/llvm/Demangle/ItaniumDemangle.h12
-rw-r--r--llvm/include/llvm/Demangle/Utility.h26
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/ClauseT.h18
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMP.td12
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td7
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td1
-rw-r--r--llvm/include/llvm/IR/PatternMatch.h36
-rw-r--r--llvm/include/llvm/ProfileData/InstrProf.h10
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfData.inc2
-rw-r--r--llvm/include/llvm/Support/Casting.h16
-rw-r--r--llvm/include/llvm/Support/TargetOpcodes.def3
-rw-r--r--llvm/include/llvm/Support/ThreadPool.h23
-rw-r--r--llvm/include/llvm/Support/thread.h4
-rw-r--r--llvm/include/llvm/Target/Target.td5
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td2
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp24
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp137
-rw-r--r--llvm/lib/CAS/UnifiedOnDiskCache.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp11
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp7
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp7
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp114
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp14
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp23
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp33
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp11
-rw-r--r--llvm/lib/DWARFLinker/Parallel/SyntheticTypeNameBuilder.cpp6
-rw-r--r--llvm/lib/Demangle/ItaniumDemangle.cpp4
-rw-r--r--llvm/lib/IR/Verifier.cpp7
-rw-r--r--llvm/lib/Option/ArgList.cpp4
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp5
-rw-r--r--llvm/lib/ProfileData/InstrProfWriter.cpp2
-rw-r--r--llvm/lib/Support/ThreadPool.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp16
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp30
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp12
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td16
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h2
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.cpp20
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.h14
-rw-r--r--llvm/lib/Target/BPF/BPFInstrInfo.td6
-rw-r--r--llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp18
-rw-r--r--llvm/lib/Target/BPF/BPFSelectionDAGInfo.h10
-rw-r--r--llvm/lib/Target/BPF/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/DirectX/DXILDataScalarization.cpp68
-rw-r--r--llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp4
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp145
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp45
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.h3
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp71
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp43
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp167
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp52
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td66
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp15
-rw-r--r--llvm/lib/Target/X86/X86.h14
-rw-r--r--llvm/lib/Target/X86/X86AvoidTrailingCall.cpp33
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp32
-rw-r--r--llvm/lib/Target/X86/X86PassRegistry.def2
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp33
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp119
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp5
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp67
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll268
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll2
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll86
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/monotonicity-no-wrap-flags.ll56
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll32
-rw-r--r--llvm/test/CMakeLists.txt2
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-matmul-fp16.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-matmul-fp32.ll13
-rw-r--r--llvm/test/CodeGen/AArch64/seh-extended-spills.ll34
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-fdot.ll93
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-fixed-length-fdot.ll230
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll941
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll32
-rw-r--r--llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll130
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-args-inreg.ll604
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-argument-types.ll3547
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-c-function.ll61
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-constexpr.ll343
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll95
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-encoding.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll15
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll90
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-return-types.ll13
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-skip.ll112
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-waitcnt.ll211
-rw-r--r--llvm/test/CodeGen/AMDGPU/carryout-selection.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll48
-rw-r--r--llvm/test/CodeGen/AMDGPU/optimize-compare.mir178
-rw-r--r--llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/sdiv64.ll368
-rw-r--r--llvm/test/CodeGen/AMDGPU/shlN_add.ll (renamed from llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll)372
-rw-r--r--llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll23
-rw-r--r--llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll198
-rw-r--r--llvm/test/CodeGen/AMDGPU/srem64.ll410
-rw-r--r--llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir57
-rw-r--r--llvm/test/CodeGen/AMDGPU/uaddo.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/udiv64.ll199
-rw-r--r--llvm/test/CodeGen/AMDGPU/urem64.ll296
-rw-r--r--llvm/test/CodeGen/AMDGPU/usubo.ll6
-rw-r--r--llvm/test/CodeGen/DirectX/llvm_assume.ll9
-rw-r--r--llvm/test/CodeGen/DirectX/scalarize-alloca.ll65
-rw-r--r--llvm/test/CodeGen/DirectX/scalarize-global.ll70
-rw-r--r--llvm/test/CodeGen/Generic/reloc-none.ll10
-rw-r--r--llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll372
-rw-r--r--llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll60
-rw-r--r--llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll4
-rw-r--r--llvm/test/CodeGen/Hexagon/vect-qfp.mir202
-rw-r--r--llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir97
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt1
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt1
-rw-r--r--llvm/test/CodeGen/PowerPC/vp-ld-st.ll160
-rw-r--r--llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir197
-rw-r--r--llvm/test/CodeGen/RISCV/remat.ll217
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zba.ll56
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll7
-rw-r--r--llvm/test/CodeGen/RISCV/xaluo.ll42
-rw-r--r--llvm/test/CodeGen/RISCV/zicond-opts.ll130
-rw-r--r--llvm/test/CodeGen/SPIRV/ComparePointers.ll2
-rw-r--r--llvm/test/CodeGen/SPIRV/complex-constexpr.ll2
-rw-r--r--llvm/test/CodeGen/SPIRV/transcoding/ConvertPtrInGlobalInit.ll49
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-load-element.ll4
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll14
-rw-r--r--llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx10_2bf16-arith.ll4
-rw-r--r--llvm/test/CodeGen/X86/bittest-big-integer.ll59
-rw-r--r--llvm/test/CodeGen/X86/narrow-add-i64.ll94
-rw-r--r--llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll185
-rw-r--r--llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td2
-rw-r--r--llvm/test/TableGen/get-named-operand-idx.td3
-rw-r--r--llvm/test/Transforms/InstCombine/known-bits-lerp-pattern.ll181
-rw-r--r--llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll31
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll81
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll25
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll511
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll576
-rw-r--r--llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll80
-rw-r--r--llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll69
-rw-r--r--llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll4
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll141
-rw-r--r--llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll6
-rw-r--r--llvm/test/Transforms/PhaseOrdering/X86/addsub.ll6
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll37
-rw-r--r--llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll150
-rw-r--r--llvm/test/Verifier/reloc-none.ll13
-rw-r--r--llvm/test/lit.cfg.py4
-rw-r--r--llvm/test/lit.site.cfg.py.in1
-rw-r--r--llvm/test/tools/dsymutil/Inputs/typedefs-with-same-name.obin0 -> 2080 bytes
-rw-r--r--llvm/test/tools/dsymutil/X86/DWARFLinkerParallel/odr-fwd-declaration.test8
-rw-r--r--llvm/test/tools/dsymutil/typedefs-with-same-name.test41
-rw-r--r--llvm/test/tools/dxil-dis/llvm_assume.ll11
-rw-r--r--llvm/test/tools/llvm-cas/Inputs/oneline1
-rw-r--r--llvm/test/tools/llvm-cas/Inputs/oneline-nonewline1
-rw-r--r--llvm/test/tools/llvm-cas/action-cache.test14
-rw-r--r--llvm/test/tools/llvm-cas/cache.test14
-rw-r--r--llvm/test/tools/llvm-cas/dump.test27
-rw-r--r--llvm/test/tools/llvm-cas/lit.local.cfg2
-rw-r--r--llvm/test/tools/llvm-cas/make-blob.test41
-rw-r--r--llvm/test/tools/llvm-cas/make-node.test37
-rw-r--r--llvm/test/tools/llvm-cas/print-id.test13
-rw-r--r--llvm/test/tools/llvm-cas/validation.test31
-rw-r--r--llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt52
-rw-r--r--llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt11239
-rw-r--r--llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-reduction.s1118
-rw-r--r--llvm/test/tools/llvm-profdata/profile-version.test2
-rw-r--r--llvm/tools/llvm-cas/CMakeLists.txt17
-rw-r--r--llvm/tools/llvm-cas/Options.td63
-rw-r--r--llvm/tools/llvm-cas/llvm-cas.cpp405
-rw-r--r--llvm/unittests/IR/PatternMatch.cpp27
-rw-r--r--llvm/unittests/Option/OptionSubCommandsTest.cpp13
-rw-r--r--llvm/unittests/Support/ThreadPool.cpp14
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPlanTest.cpp182
-rw-r--r--llvm/utils/gn/secondary/libcxx/include/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn1
-rw-r--r--mlir/cmake/modules/AddMLIRPython.cmake4
-rw-r--r--mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h13
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td17
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td7
-rw-r--r--mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td27
-rw-r--r--mlir/include/mlir/Dialect/MemRef/IR/MemRef.h1
-rw-r--r--mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td18
-rw-r--r--mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td2
-rw-r--r--mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCooperativeMatrixOps.td6
-rw-r--r--mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td8
-rw-r--r--mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.h1
-rw-r--r--mlir/include/mlir/Dialect/Vector/IR/VectorOps.h1
-rw-r--r--mlir/include/mlir/Dialect/Vector/IR/VectorOps.td35
-rw-r--r--mlir/include/mlir/Interfaces/AlignmentAttrInterface.h21
-rw-r--r--mlir/include/mlir/Interfaces/AlignmentAttrInterface.td65
-rw-r--r--mlir/include/mlir/Interfaces/CMakeLists.txt1
-rw-r--r--mlir/include/mlir/TableGen/CodeGenHelpers.h24
-rw-r--r--mlir/lib/Analysis/Presburger/IntegerRelation.cpp14
-rw-r--r--mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp68
-rw-r--r--mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp19
-rw-r--r--mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp9
-rw-r--r--mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp68
-rw-r--r--mlir/lib/Interfaces/AlignmentAttrInterface.cpp13
-rw-r--r--mlir/lib/Interfaces/CMakeLists.txt2
-rw-r--r--mlir/lib/TableGen/CodeGenHelpers.cpp90
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp14
-rw-r--r--mlir/python/mlir/dialects/transform/__init__.py163
-rw-r--r--mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir6
-rw-r--r--mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir53
-rw-r--r--mlir/test/Conversion/XeGPUToXeVM/prefetch_nd.mlir33
-rw-r--r--mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir34
-rw-r--r--mlir/test/Dialect/MemRef/invalid.mlir16
-rw-r--r--mlir/test/Dialect/OpenACC/canonicalize.mlir27
-rw-r--r--mlir/test/Target/LLVMIR/nvvm/membar.mlir14
-rw-r--r--mlir/test/Target/SPIRV/loop.mlir7
-rw-r--r--mlir/test/Target/SPIRV/phi.mlir62
-rw-r--r--mlir/test/Target/SPIRV/selection.mlir25
-rw-r--r--mlir/test/Target/SPIRV/struct.mlir23
-rw-r--r--mlir/test/lib/Dialect/Test/TestOpDefs.cpp26
-rw-r--r--mlir/test/lib/Dialect/Test/TestOps.td15
-rw-r--r--mlir/test/mlir-tblgen/constraint-unique.td10
-rw-r--r--mlir/test/mlir-tblgen/op-attribute.td16
-rw-r--r--mlir/test/mlir-tblgen/op-properties-predicates.td2
-rw-r--r--mlir/test/mlir-tblgen/predicate.td16
-rw-r--r--mlir/test/python/dialects/transform.py215
-rw-r--r--mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp38
-rw-r--r--mlir/unittests/TableGen/CMakeLists.txt2
-rw-r--r--mlir/utils/pygments/mlir_lexer.py139
-rw-r--r--offload/include/Shared/Environment.h22
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp14
-rw-r--r--offload/plugins-nextgen/common/include/PluginInterface.h24
-rw-r--r--offload/plugins-nextgen/common/src/PluginInterface.cpp102
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp6
-rw-r--r--offload/plugins-nextgen/host/src/rtl.cpp8
-rw-r--r--offload/test/libc/malloc_parallel.c (renamed from offload/test/offloading/malloc_parallel.c)0
-rw-r--r--offload/test/mapping/lambda_mapping.cpp2
-rw-r--r--offload/test/offloading/interop-print.c1
-rw-r--r--offload/test/offloading/malloc.c2
-rw-r--r--openmp/device/include/Allocator.h6
-rw-r--r--openmp/device/src/Allocator.cpp67
-rw-r--r--openmp/device/src/Kernel.cpp1
-rw-r--r--openmp/device/src/Misc.cpp4
-rw-r--r--openmp/device/src/State.cpp24
-rw-r--r--openmp/docs/design/Runtimes.rst1
-rw-r--r--orc-rt/include/orc-rt/Endian.h44
-rw-r--r--orc-rt/unittests/CMakeLists.txt1
-rw-r--r--orc-rt/unittests/EndianTest.cpp100
-rw-r--r--runtimes/CMakeLists.txt14
-rw-r--r--utils/bazel/llvm-project-overlay/llvm/BUILD.bazel1
-rw-r--r--utils/bazel/llvm-project-overlay/mlir/BUILD.bazel52
-rw-r--r--utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel19
861 files changed, 26189 insertions, 15016 deletions
diff --git a/.ci/generate_test_report_lib.py b/.ci/generate_test_report_lib.py
index 48a6be903da4..ce8262f0dc73 100644
--- a/.ci/generate_test_report_lib.py
+++ b/.ci/generate_test_report_lib.py
@@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Library to parse JUnit XML files and return a markdown report."""
-from typing import TypedDict
+from typing import TypedDict, Optional
import platform
from junitparser import JUnitXml, Failure
@@ -11,10 +11,12 @@ from junitparser import JUnitXml, Failure
# This data structure should match the definition in llvm-zorg in
# premerge/advisor/advisor_lib.py
+# TODO(boomanaiden154): Drop the Optional here and switch to str | None when
+# we require Python 3.10.
class FailureExplanation(TypedDict):
name: str
explained: bool
- reason: str | None
+ reason: Optional[str]
SEE_BUILD_FILE_STR = "Download the build's log file to see the details."
diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh
index 5fb8f69528e8..beaed71f49f6 100755
--- a/.ci/monolithic-windows.sh
+++ b/.ci/monolithic-windows.sh
@@ -32,8 +32,6 @@ export LD=link
# see https://github.com/llvm/llvm-project/pull/82393 and
# https://discourse.llvm.org/t/rfc-future-of-windows-pre-commit-ci/76840/40
# for further information.
-# We limit the number of parallel compile jobs to 24 control memory
-# consumption and improve build reliability.
cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
-D LLVM_ENABLE_PROJECTS="${projects}" \
-G Ninja \
@@ -49,7 +47,6 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
-D CMAKE_EXE_LINKER_FLAGS="/MANIFEST:NO" \
-D CMAKE_MODULE_LINKER_FLAGS="/MANIFEST:NO" \
-D CMAKE_SHARED_LINKER_FLAGS="/MANIFEST:NO" \
- -D CMAKE_CXX_FLAGS="-Wno-c++98-compat -Wno-c++14-compat -Wno-unsafe-buffer-usage -Wno-old-style-cast" \
-D LLVM_ENABLE_RUNTIMES="${runtimes}"
start-group "ninja"
diff --git a/.github/actions/build-container/action.yml b/.github/actions/build-container/action.yml
new file mode 100644
index 000000000000..595c3f8dd207
--- /dev/null
+++ b/.github/actions/build-container/action.yml
@@ -0,0 +1,95 @@
+name: Build Container
+description: >-
+ Build and test a container using the standard llvm naming scheme for containers.
+
+inputs:
+ tag:
+ description: >-
+ The tag to use for this container.
+ required: false
+ container-name:
+ description: >-
+ The name for the container.
+ required: true
+ dockerfile:
+ description: >-
+ Path to docker file.
+ required: false
+ target:
+ description: >-
+ The container target to build 'passed to podman via ---target option'
+ required: false
+ context:
+ description: >-
+ Path to context for the container build.
+ required: false
+ test-command:
+ description: >-
+ Test command to run to ensure the container is working correctly.
+ required: false
+
+runs:
+ using: "composite"
+ steps:
+ # podman is not installed by default on the ARM64 images.
+ - name: Install Podman
+ if: runner.arch == 'ARM64'
+ shell: bash
+ run: |
+ sudo apt-get install podman
+
+ - name: Build Container
+ shell: bash
+ env:
+ INPUT_TAG: ${{inputs.tag }}
+ INPUT_CONTAINER_NAME: ${{ inputs.container-name }}
+ INPUT_TARGET: ${{ inputs.target }}
+ INPUT_DOCKERFILE: ${{ inputs.dockerfile }}
+ INPUT_CONTEXT: ${{ inputs.context }}
+ id: build
+ run: |
+ env
+ tag="${INPUT_TAG:-$(git rev-parse --short=12 HEAD)}"
+
+ case "$RUNNER_ARCH" in
+ ARM64)
+ container_arch="arm64v8"
+ ;;
+ *)
+ container_arch="amd64"
+ ;;
+ esac
+
+ container_name="ghcr.io/$GITHUB_REPOSITORY_OWNER/$container_arch/$INPUT_CONTAINER_NAME:$tag"
+ container_filename="$(echo $container_name | sed -e 's/\//-/g' -e 's/:/-/g').tar"
+ if [ -n "$INPUT_TARGET" ]; then
+ podman_options="$podman_options --target $INPUT_TARGET"
+ fi
+ if [ -n "$INPUT_DOCKERFILE" ]; then
+ podman_options="$podman_options -f $INPUT_DOCKERFILE"
+ fi
+ podman_options="$podman_options ${INPUT_CONTEXT:-.}"
+ echo "Podman Options: $podman_options"
+
+ podman build -t $container_name $podman_options
+
+ podman save $container_name > $container_filename
+
+ echo "container-full-name=$container_name" >> $GITHUB_OUTPUT
+
+ - name: Create container artifact
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+ with:
+ name: ${{ inputs.container-name }}-${{ runner.arch }}
+ path: "*.tar"
+ retention-days: 14
+
+ - name: Test container
+ shell: bash
+ if: inputs.test-command
+ env:
+ INPUT_TEST_COMMAND: ${{ inputs.test-command }}
+ CONTAINER_FULL_NAME: ${{ steps.build.outputs.container-full-name }}
+ run: |
+ podman run --pull=never --rm -it $CONTAINER_FULL_NAME /usr/bin/bash -x -c "$INPUT_TEST_COMMAND"
+
diff --git a/.github/actions/push-container/action.yml b/.github/actions/push-container/action.yml
new file mode 100644
index 000000000000..087e3dcb2718
--- /dev/null
+++ b/.github/actions/push-container/action.yml
@@ -0,0 +1,44 @@
+name: Push Container
+description: >-
+ Download all container artifacts for this job and push them to the GitHub registry.
+
+inputs:
+ token:
+ description: >-
+ Token to use to authenticate with the container registry.
+ required: true
+
+runs:
+ using: "composite"
+ steps:
+ - name: Download container
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+
+ - name: Push Container
+ env:
+ GITHUB_TOKEN: ${{ inputs.token }}
+ shell: bash
+ run: |
+ function push_container {
+ image_name=$1
+ latest_name=$(echo $image_name | sed 's/:[a-f0-9]\+$/:latest/g')
+ podman tag $image_name $latest_name
+ echo "Pushing $image_name ..."
+ podman push --compression-format=zstd $image_name
+ echo "Pushing $latest_name ..."
+ podman push --compression-format=zstd $latest_name
+ }
+
+ podman login -u ${{ github.actor }} -p $GITHUB_TOKEN ghcr.io
+ for f in $(find . -iname '*.tar'); do
+ image_name=$(podman load -q -i $f | sed 's/Loaded image: //g')
+ push_container $image_name
+
+ if echo $image_name | grep '/amd64/'; then
+ # For amd64, create an alias with the arch component removed.
+ # This matches the convention used on dockerhub.
+ default_image_name=$(echo $(dirname $(dirname $image_name))/$(basename $image_name))
+ podman tag $image_name $default_image_name
+ push_container $default_image_name
+ fi
+ done
diff --git a/.github/renovate.json b/.github/renovate.json
index 6ce98c4e7b10..8e89ba8c4b32 100644
--- a/.github/renovate.json
+++ b/.github/renovate.json
@@ -8,5 +8,12 @@
"minimumReleaseAge": "3 days",
"assignees": ["boomanaiden154"],
"ignorePaths": [".github/workflows/containers/**"],
- "groupName": "[Github] Update GHA Dependencies"
+ "groupName": "[Github] Update GHA Dependencies",
+ "packageRules": [
+ {
+ "matchPackageNames": ["windows", "macos"],
+ "matchManagers": ["github-actions"],
+ "enabled": false
+ }
+ ]
}
diff --git a/.github/workflows/bazel-checks.yml b/.github/workflows/bazel-checks.yml
index 65d51649dd9e..7c3db4ed7865 100644
--- a/.github/workflows/bazel-checks.yml
+++ b/.github/workflows/bazel-checks.yml
@@ -30,3 +30,26 @@ jobs:
- name: Run Buildifier
run: |
buildifier --mode=check $(find ./utils/bazel -name *BUILD*)
+
+ bazel-build:
+ name: "Bazel Build/Test"
+ runs-on: llvm-premerge-linux-runners
+ if: github.repository == 'llvm/llvm-project'
+ steps:
+ - name: Fetch LLVM sources
+ uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+ # TODO(boomanaiden154): We should use a purpose built container for this. Move
+ # over when we have fixed the issues with using custom containers with Github
+ # ARC in GKE.
+ - name: Setup System Dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y libmpfr-dev libpfm4-dev
+ sudo curl -L https://github.com/bazelbuild/bazelisk/releases/download/v1.27.0/bazelisk-amd64.deb > /tmp/bazelisk.deb
+ sudo apt-get install -y /tmp/bazelisk.deb
+ rm /tmp/bazelisk.deb
+ - name: Build/Test
+ working-directory: utils/bazel
+ run: |
+ bazelisk test --config=ci --sandbox_base="" \
+ @llvm-project//llvm/unittests:adt_tests
diff --git a/.github/workflows/build-ci-container-tooling.yml b/.github/workflows/build-ci-container-tooling.yml
index 992947eb2fff..0bb8242eb35a 100644
--- a/.github/workflows/build-ci-container-tooling.yml
+++ b/.github/workflows/build-ci-container-tooling.yml
@@ -12,17 +12,30 @@ on:
- '.github/workflows/containers/github-action-ci-tooling/**'
- llvm/utils/git/requirements_formatting.txt
- llvm/utils/git/requirements_linting.txt
+ - '.github/actions/build-container/**'
+ - '.github/actions/push-container/**'
pull_request:
paths:
- .github/workflows/build-ci-container-tooling.yml
- '.github/workflows/containers/github-action-ci-tooling/**'
- llvm/utils/git/requirements_formatting.txt
- llvm/utils/git/requirements_linting.txt
+ - '.github/actions/build-container/**'
+ - '.github/actions/push-container/**'
jobs:
build-ci-container-tooling:
+ name: Build Container ${{ matrix.container-name }}
if: github.repository_owner == 'llvm'
runs-on: ubuntu-24.04
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - container-name: code-format
+ test-command: 'cd $HOME && clang-format --version | grep version && git-clang-format -h | grep usage && black --version | grep black'
+ - container-name: code-lint
+ test-command: 'cd $HOME && clang-tidy --version | grep version && clang-tidy-diff.py -h | grep usage'
steps:
- name: Checkout LLVM
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
@@ -32,48 +45,15 @@ jobs:
llvm/utils/git/requirements_formatting.txt
llvm/utils/git/requirements_linting.txt
clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py
+ .github/actions/build-container
- - name: Write Variables
- id: vars
- run: |
- tag=$(git rev-parse --short=12 HEAD)
- container_name="ghcr.io/$GITHUB_REPOSITORY_OWNER/amd64/ci-ubuntu-24.04"
- echo "container-name-format=$container_name-code-format" >> $GITHUB_OUTPUT
- echo "container-name-lint=$container_name-code-lint" >> $GITHUB_OUTPUT
- echo "container-name-format-tag=$container_name-format:$tag" >> $GITHUB_OUTPUT
- echo "container-name-lint-tag=$container_name-lint:$tag" >> $GITHUB_OUTPUT
- echo "container-format-filename=$(echo $container_name-format:$tag | sed -e 's/\//-/g' -e 's/:/-/g').tar" >> $GITHUB_OUTPUT
- echo "container-lint-filename=$(echo $container_name-lint:$tag | sed -e 's/\//-/g' -e 's/:/-/g').tar" >> $GITHUB_OUTPUT
-
- - name: Build container
- run: |
- podman build --target ci-container-code-format \
- -f .github/workflows/containers/github-action-ci-tooling/Dockerfile \
- -t ${{ steps.vars.outputs.container-name-format-tag }} .
- podman build --target ci-container-code-lint \
- -f .github/workflows/containers/github-action-ci-tooling/Dockerfile \
- -t ${{ steps.vars.outputs.container-name-lint-tag }} .
-
- # Save the container so we have it in case the push fails. This also
- # allows us to separate the push step into a different job so we can
- # maintain minimal permissions while building the container.
- - name: Save container image
- run: |
- podman save ${{ steps.vars.outputs.container-name-format-tag }} > ${{ steps.vars.outputs.container-format-filename }}
- podman save ${{ steps.vars.outputs.container-name-lint-tag }} > ${{ steps.vars.outputs.container-lint-filename }}
-
- - name: Upload container image
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ - name: Build Container
+ uses: ./.github/actions/build-container
with:
- name: container-amd64
- path: "*.tar"
- retention-days: 14
-
- - name: Test Container
- run: |
- # Use --pull=never to ensure we are testing the just built image.
- podman run --pull=never --rm -it ${{ steps.vars.outputs.container-name-format-tag }} /usr/bin/bash -x -c 'cd $HOME && clang-format --version | grep version && git-clang-format -h | grep usage && black --version | grep black'
- podman run --pull=never --rm -it ${{ steps.vars.outputs.container-name-lint-tag }} /usr/bin/bash -x -c 'cd $HOME && clang-tidy --version | grep version && clang-tidy-diff.py -h | grep usage'
+ container-name: ci-ubuntu-24.04-${{ matrix.container-name }}
+ dockerfile: .github/workflows/containers/github-action-ci-tooling/Dockerfile
+ target: ci-container-${{ matrix.container-name }}
+ test-command: ${{ matrix.test-command }}
push-ci-container:
if: github.event_name == 'push'
@@ -82,34 +62,13 @@ jobs:
permissions:
packages: write
runs-on: ubuntu-24.04
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- - name: Download container
- uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
-
- - name: Push Container
- run: |
- function push_container {
- image_name=$1
- latest_name=$(echo $image_name | sed 's/:[a-f0-9]\+$/:latest/g')
- podman tag $image_name $latest_name
- echo "Pushing $image_name ..."
- podman push $image_name
- echo "Pushing $latest_name ..."
- podman push $latest_name
- }
-
- podman login -u ${{ github.actor }} -p $GITHUB_TOKEN ghcr.io
- for f in $(find . -iname '*.tar'); do
- image_name=$(podman load -q -i $f | sed 's/Loaded image: //g')
- push_container $image_name
+ - name: Checkout LLVM
+ uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+ with:
+ sparse-checkout: |
+ .github/actions/push-container
- if echo $image_name | grep '/amd64/'; then
- # For amd64, create an alias with the arch component removed.
- # This matches the convention used on dockerhub.
- default_image_name=$(echo $(dirname $(dirname $image_name))/$(basename $image_name))
- podman tag $image_name $default_image_name
- push_container $default_image_name
- fi
- done
+ - uses: ./.github/actions/push-container
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/build-ci-container-windows.yml b/.github/workflows/build-ci-container-windows.yml
index 14c349b1b2fe..b6c46b70030a 100644
--- a/.github/workflows/build-ci-container-windows.yml
+++ b/.github/workflows/build-ci-container-windows.yml
@@ -44,7 +44,7 @@ jobs:
run: |
docker save ${{ steps.vars.outputs.container-name-tag }} > ${{ steps.vars.outputs.container-filename }}
- name: Upload container image
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: container
path: ${{ steps.vars.outputs.container-filename }}
@@ -61,7 +61,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Download container
- uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
with:
name: container
- name: Push Container
diff --git a/.github/workflows/build-ci-container.yml b/.github/workflows/build-ci-container.yml
index 027c558afdd0..ddb803fb969f 100644
--- a/.github/workflows/build-ci-container.yml
+++ b/.github/workflows/build-ci-container.yml
@@ -10,72 +10,46 @@ on:
paths:
- .github/workflows/build-ci-container.yml
- '.github/workflows/containers/github-action-ci/**'
+ - '.github/actions/build-container/**'
+ - '.github/actions/push-container/**'
pull_request:
paths:
- .github/workflows/build-ci-container.yml
- '.github/workflows/containers/github-action-ci/**'
+ - '.github/actions/build-container/**'
+ - '.github/actions/push-container/**'
jobs:
build-ci-container:
+ name: Build Container ${{ matrix.container-name }} ${{ (contains(matrix.runs-on, 'arm') && 'ARM64') || 'X64' }}
if: github.repository_owner == 'llvm'
runs-on: ${{ matrix.runs-on }}
strategy:
matrix:
- include:
- # The arch names should match the names used on dockerhub.
- # See https://github.com/docker-library/official-images#architectures-other-than-amd64
- - arch: amd64
- runs-on: depot-ubuntu-24.04-16
- - arch: arm64v8
- runs-on: depot-ubuntu-24.04-arm-16
+ runs-on:
+ - depot-ubuntu-24.04-16
+ - depot-ubuntu-24.04-arm-16
+ container-name:
+ - ''
+ - agent
+ test-command:
+ - cd $HOME && printf '#include <iostream>\nint main(int argc, char **argv) { std::cout << "Hello\\n"; }' | clang++ -x c++ - && ./a.out | grep Hello
steps:
- name: Checkout LLVM
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
- sparse-checkout: .github/workflows/containers/github-action-ci/
- # podman is not installed by default on the ARM64 images.
- - name: Install Podman
- if: runner.arch == 'ARM64'
- run: |
- sudo apt-get install podman
- - name: Write Variables
- id: vars
- run: |
- tag=$(git rev-parse --short=12 HEAD)
- container_name="ghcr.io/$GITHUB_REPOSITORY_OWNER/${{ matrix.arch }}/ci-ubuntu-24.04"
- echo "container-name=$container_name" >> $GITHUB_OUTPUT
- echo "container-name-agent=$container_name-agent" >> $GITHUB_OUTPUT
- echo "container-name-tag=$container_name:$tag" >> $GITHUB_OUTPUT
- echo "container-name-agent-tag=$container_name-agent:$tag" >> $GITHUB_OUTPUT
- echo "container-filename=$(echo $container_name:$tag | sed -e 's/\//-/g' -e 's/:/-/g').tar" >> $GITHUB_OUTPUT
- echo "container-agent-filename=$(echo $container_name-agent:$tag | sed -e 's/\//-/g' -e 's/:/-/g').tar" >> $GITHUB_OUTPUT
- - name: Build container
- working-directory: ./.github/workflows/containers/github-action-ci/
- run: |
- podman build --target ci-container -t ${{ steps.vars.outputs.container-name-tag }} .
- podman build --target ci-container-agent -t ${{ steps.vars.outputs.container-name-agent-tag }} .
+ sparse-checkout: |
+ .github/workflows/containers/github-action-ci/
+ .github/actions/build-container
- # Save the container so we have it in case the push fails. This also
- # allows us to separate the push step into a different job so we can
- # maintain minimal permissions while building the container.
- - name: Save container image
- run: |
- podman save ${{ steps.vars.outputs.container-name-tag }} > ${{ steps.vars.outputs.container-filename }}
- podman save ${{ steps.vars.outputs.container-name-agent-tag }} > ${{ steps.vars.outputs.container-agent-filename }}
-
- - name: Upload container image
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ - name: Build Container
+ uses: ./.github/actions/build-container
with:
- name: container-${{ matrix.arch }}
- path: "*.tar"
- retention-days: 14
-
- - name: Test Container
- run: |
- for image in ${{ steps.vars.outputs.container-name-tag }}; do
- # Use --pull=never to ensure we are testing the just built image.
- podman run --pull=never --rm -it $image /usr/bin/bash -x -c 'cd $HOME && printf '\''#include <iostream>\nint main(int argc, char **argv) { std::cout << "Hello\\n"; }'\'' | clang++ -x c++ - && ./a.out | grep Hello'
- done
+ container-name: ci-ubuntu-24.04${{ matrix.container-name && format('-{0}', matrix.container-name)}}
+ context: .github/workflows/containers/github-action-ci/
+ dockerfile: .github/workflows/containers/github-action-ci/Dockerfile
+ target: ci-container${{ matrix.container-name && format('-{0}', matrix.container-name) }}
+ test-command: ${{ matrix.test-command }}
push-ci-container:
if: github.event_name == 'push'
@@ -87,31 +61,12 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- - name: Download container
- uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
-
- - name: Push Container
- run: |
- function push_container {
- image_name=$1
- latest_name=$(echo $image_name | sed 's/:[a-f0-9]\+$/:latest/g')
- podman tag $image_name $latest_name
- echo "Pushing $image_name ..."
- podman push $image_name
- echo "Pushing $latest_name ..."
- podman push $latest_name
- }
-
- podman login -u ${{ github.actor }} -p $GITHUB_TOKEN ghcr.io
- for f in $(find . -iname '*.tar'); do
- image_name=$(podman load -q -i $f | sed 's/Loaded image: //g')
- push_container $image_name
+ - name: Checkout LLVM
+ uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+ with:
+ sparse-checkout: |
+ .github/actions/push-container
- if echo $image_name | grep '/amd64/'; then
- # For amd64, create an alias with the arch component removed.
- # This matches the convention used on dockerhub.
- default_image_name=$(echo $(dirname $(dirname $image_name))/$(basename $image_name))
- podman tag $image_name $default_image_name
- push_container $default_image_name
- fi
- done
+ - uses: ./.github/actions/push-container
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/build-metrics-container.yml b/.github/workflows/build-metrics-container.yml
index 69b571575f40..786c41214d85 100644
--- a/.github/workflows/build-metrics-container.yml
+++ b/.github/workflows/build-metrics-container.yml
@@ -49,7 +49,7 @@ jobs:
run: |
podman save ${{ steps.vars.outputs.container-name-tag }} > ${{ steps.vars.outputs.container-filename }}
- name: Upload Container Image
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: container
path: ${{ steps.vars.outputs.container-filename }}
@@ -66,7 +66,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Download Container
- uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
with:
name: container
- name: Push Container
diff --git a/.github/workflows/check-ci.yml b/.github/workflows/check-ci.yml
index 6ecad5536109..7fecb010a64f 100644
--- a/.github/workflows/check-ci.yml
+++ b/.github/workflows/check-ci.yml
@@ -26,7 +26,7 @@ jobs:
with:
sparse-checkout: .ci
- name: Setup Python
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: 3.14
cache: 'pip'
diff --git a/.github/workflows/ci-post-commit-analyzer.yml b/.github/workflows/ci-post-commit-analyzer.yml
index 49cf4100dd71..59df0b68a8ad 100644
--- a/.github/workflows/ci-post-commit-analyzer.yml
+++ b/.github/workflows/ci-post-commit-analyzer.yml
@@ -87,7 +87,7 @@ jobs:
scan-build --generate-index-only build/analyzer-results
- name: Upload Results
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
if: always()
with:
name: analyzer-results
diff --git a/.github/workflows/commit-access-review.yml b/.github/workflows/commit-access-review.yml
index 734dc212fa64..7cdcfca53299 100644
--- a/.github/workflows/commit-access-review.yml
+++ b/.github/workflows/commit-access-review.yml
@@ -28,7 +28,7 @@ jobs:
python3 .github/workflows/commit-access-review.py $GITHUB_TOKEN
- name: Upload Triage List
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: triagers
path: triagers.log
diff --git a/.github/workflows/containers/github-action-ci-tooling/Dockerfile b/.github/workflows/containers/github-action-ci-tooling/Dockerfile
index 8aaa2e88f2ba..707bdb309b78 100644
--- a/.github/workflows/containers/github-action-ci-tooling/Dockerfile
+++ b/.github/workflows/containers/github-action-ci-tooling/Dockerfile
@@ -22,6 +22,7 @@ RUN apt-get update && \
FROM docker.io/library/ubuntu:24.04 AS base
ENV LLVM_SYSROOT=/opt/llvm
+ENV PATH=${LLVM_SYSROOT}/bin:${PATH}
# Need nodejs for some of the GitHub actions.
# Need git for git-clang-format.
@@ -53,7 +54,6 @@ COPY --from=llvm-downloader /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/cla
/llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/git-clang-format \
${LLVM_SYSROOT}/bin/
-ENV PATH=${LLVM_SYSROOT}/bin:${PATH}
# Install dependencies for 'pr-code-format.yml' job
COPY llvm/utils/git/requirements_formatting.txt requirements_formatting.txt
@@ -77,7 +77,6 @@ COPY clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py ${LLVM_SYSROOT}/bin/cl
RUN ln -s ${LLVM_SYSROOT}/bin/clang-${LLVM_VERSION_MAJOR} ${LLVM_SYSROOT}/bin/clang && \
ln -s ${LLVM_SYSROOT}/bin/clang ${LLVM_SYSROOT}/bin/clang++
-ENV PATH=${LLVM_SYSROOT}/bin:${PATH}
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y \
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 7374777cb759..3eb146d21dc4 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -60,7 +60,7 @@ jobs:
fetch-depth: 2
- name: Get subprojects that have doc changes
id: docs-changed-subprojects
- uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
+ uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
with:
skip_initial_fetch: true
base_sha: 'HEAD~1'
@@ -95,7 +95,7 @@ jobs:
workflow:
- '.github/workflows/docs.yml'
- name: Setup Python env
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: '3.14'
cache: 'pip'
@@ -209,7 +209,7 @@ jobs:
mkdir built-docs/flang
cp -r flang-build/docs/* built-docs/flang/
- name: Upload docs
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: docs-output
path: built-docs/
diff --git a/.github/workflows/email-check.yaml b/.github/workflows/email-check.yaml
index 981c6fa62cb1..ba625b2b3b06 100644
--- a/.github/workflows/email-check.yaml
+++ b/.github/workflows/email-check.yaml
@@ -39,7 +39,7 @@ jobs:
[{"body" : "$COMMENT"}]
EOF
- - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
if: always()
with:
name: workflow-args
diff --git a/.github/workflows/gha-codeql.yml b/.github/workflows/gha-codeql.yml
index 6d490ca2c4b2..4b9df6b66845 100644
--- a/.github/workflows/gha-codeql.yml
+++ b/.github/workflows/gha-codeql.yml
@@ -29,9 +29,9 @@ jobs:
sparse-checkout: |
.github/
- name: Initialize CodeQL
- uses: github/codeql-action/init@5d5cd550d3e189c569da8f16ea8de2d821c9bf7a # v3.31.2
+ uses: github/codeql-action/init@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2
with:
languages: actions
queries: security-extended
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@5d5cd550d3e189c569da8f16ea8de2d821c9bf7a # v3.31.2
+ uses: github/codeql-action/analyze@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2
diff --git a/.github/workflows/issue-write.yml b/.github/workflows/issue-write.yml
index 26cd60c07025..8a083f9143ec 100644
--- a/.github/workflows/issue-write.yml
+++ b/.github/workflows/issue-write.yml
@@ -40,7 +40,7 @@ jobs:
- name: 'Comment on PR'
if: steps.download-artifact.outputs.artifact-id != ''
- uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml
index 432c45744abd..6377dd53d1f6 100644
--- a/.github/workflows/libclang-abi-tests.yml
+++ b/.github/workflows/libclang-abi-tests.yml
@@ -131,7 +131,7 @@ jobs:
sed -i 's/LLVM_[0-9]\+/LLVM_NOVERSION/' $lib-${{ matrix.ref }}.abi
done
- name: Upload ABI file
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # 5.0.0
with:
name: ${{ matrix.name }}
path: '*${{ matrix.ref }}.abi'
@@ -144,12 +144,12 @@ jobs:
- abi-dump
steps:
- name: Download baseline
- uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
with:
name: build-baseline
path: build-baseline
- name: Download latest
- uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
with:
name: build-latest
path: build-latest
@@ -165,7 +165,7 @@ jobs:
done
- name: Upload ABI Comparison
if: always()
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # 5.0.0
with:
name: compat-report-${{ github.sha }}
path: compat_reports/
diff --git a/.github/workflows/libclang-python-tests.yml b/.github/workflows/libclang-python-tests.yml
index 8fb8cec3b4f0..0d66f5d595e0 100644
--- a/.github/workflows/libclang-python-tests.yml
+++ b/.github/workflows/libclang-python-tests.yml
@@ -34,7 +34,7 @@ jobs:
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Setup Python
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: ${{ matrix.python-version }}
- name: Setup ccache
diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml
index 6c8f2cb45ee0..461b723bd736 100644
--- a/.github/workflows/libcxx-build-and-test.yaml
+++ b/.github/workflows/libcxx-build-and-test.yaml
@@ -60,7 +60,7 @@ jobs:
env:
CC: ${{ matrix.cc }}
CXX: ${{ matrix.cxx }}
- - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
if: always()
with:
name: ${{ matrix.config }}-${{ matrix.cxx }}-results
@@ -105,7 +105,7 @@ jobs:
env:
CC: ${{ matrix.cc }}
CXX: ${{ matrix.cxx }}
- - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
if: always() # Upload artifacts even if the build or test suite fails
with:
name: ${{ matrix.config }}-${{ matrix.cxx }}-results
@@ -169,7 +169,7 @@ jobs:
env:
CC: clang-22
CXX: clang++-22
- - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
if: always()
with:
name: ${{ matrix.config }}-results
@@ -223,7 +223,7 @@ jobs:
source .venv/bin/activate
python -m pip install psutil
bash libcxx/utils/ci/run-buildbot ${{ matrix.config }}
- - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
if: always() # Upload artifacts even if the build or test suite fails
with:
name: macos-${{ matrix.config }}-results
diff --git a/.github/workflows/llvm-abi-tests.yml b/.github/workflows/llvm-abi-tests.yml
index 961f1cc79389..b0c2d32d4a41 100644
--- a/.github/workflows/llvm-abi-tests.yml
+++ b/.github/workflows/llvm-abi-tests.yml
@@ -128,14 +128,14 @@ jobs:
# Remove symbol versioning from dumps, so we can compare across major versions.
sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' ${{ matrix.ref }}.abi
- name: Upload ABI file
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # 5.0.0
with:
name: ${{ matrix.name }}
path: ${{ matrix.ref }}.abi
- name: Upload symbol list file
if: matrix.name == 'build-baseline'
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # 5.0.0
with:
name: symbol-list
path: llvm.symbols
@@ -148,17 +148,17 @@ jobs:
- abi-dump
steps:
- name: Download baseline
- uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
with:
name: build-baseline
path: build-baseline
- name: Download latest
- uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
with:
name: build-latest
path: build-latest
- name: Download symbol list
- uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
with:
name: symbol-list
path: symbol-list
@@ -179,7 +179,7 @@ jobs:
abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.abi -new build-latest/*.abi || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c"
- name: Upload ABI Comparison
if: always()
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # 5.0.0
with:
name: compat-report-${{ github.sha }}
path: compat_reports/
diff --git a/.github/workflows/llvm-bugs.yml b/.github/workflows/llvm-bugs.yml
index 3274f1adf9e6..96fc553abfe3 100644
--- a/.github/workflows/llvm-bugs.yml
+++ b/.github/workflows/llvm-bugs.yml
@@ -14,13 +14,13 @@ jobs:
runs-on: ubuntu-24.04
if: github.repository == 'llvm/llvm-project'
steps:
- - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
+ - uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
with:
- node-version: 18
+ node-version: 24
check-latest: true
- run: npm install mailgun.js form-data
- name: Send notification
- uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6.4.1
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
env:
MAILGUN_API_KEY: ${{ secrets.LLVM_BUGS_KEY }}
with:
diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index ac0689b4d324..dc253e4fbae9 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -27,7 +27,7 @@ jobs:
- name: Get changed files
id: changed-files
- uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
+ uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
with:
separator: ","
skip_initial_fetch: true
@@ -56,7 +56,7 @@ jobs:
--end-rev HEAD \
--changed-files "$CHANGED_FILES"
- - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
if: always()
with:
name: workflow-args
diff --git a/.github/workflows/pr-code-lint.yml b/.github/workflows/pr-code-lint.yml
index 8ba937870373..5444a29c2220 100644
--- a/.github/workflows/pr-code-lint.yml
+++ b/.github/workflows/pr-code-lint.yml
@@ -27,13 +27,13 @@ jobs:
cancel-in-progress: true
steps:
- name: Fetch LLVM sources
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+ uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 2
- name: Get changed files
id: changed-files
- uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
+ uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
with:
separator: ","
skip_initial_fetch: true
@@ -91,7 +91,7 @@ jobs:
--changed-files "$CHANGED_FILES"
- name: Upload results
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
if: always()
with:
name: workflow-args
diff --git a/.github/workflows/pr-request-release-note.yml b/.github/workflows/pr-request-release-note.yml
index 8162a8984ee5..c2dc2de65f13 100644
--- a/.github/workflows/pr-request-release-note.yml
+++ b/.github/workflows/pr-request-release-note.yml
@@ -41,7 +41,7 @@ jobs:
request-release-note \
--pr-number ${{ github.event.pull_request.number}}
- - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
if: always()
with:
name: workflow-args
diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml
index 7f875f27097f..46c773bf7bbd 100644
--- a/.github/workflows/premerge.yaml
+++ b/.github/workflows/premerge.yaml
@@ -66,6 +66,7 @@ jobs:
continue-on-error: ${{ runner.arch == 'ARM64' }}
env:
GITHUB_TOKEN: ${{ github.token }}
+ GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
run: |
git config --global --add safe.directory '*'
@@ -112,7 +113,7 @@ jobs:
# https://github.com/actions/upload-artifact/issues/569
continue-on-error: true
if: '!cancelled()'
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: Premerge Artifacts (Linux ${{ runner.arch }})
path: artifacts/
@@ -157,6 +158,7 @@ jobs:
shell: cmd
env:
GITHUB_TOKEN: ${{ github.token }}
+ GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
run: |
call C:\\BuildTools\\Common7\\Tools\\VsDevCmd.bat -arch=amd64 -host_arch=amd64
# See the comments above in the Linux job for why we define each of
@@ -169,7 +171,7 @@ jobs:
# https://github.com/actions/upload-artifact/issues/569
continue-on-error: true
if: '!cancelled()'
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: Premerge Artifacts (Windows)
path: artifacts/
diff --git a/.github/workflows/release-asset-audit.yml b/.github/workflows/release-asset-audit.yml
index 8b24948b568e..b658167d1db3 100644
--- a/.github/workflows/release-asset-audit.yml
+++ b/.github/workflows/release-asset-audit.yml
@@ -38,7 +38,7 @@ jobs:
if: >-
github.event_name != 'pull_request' &&
failure()
- uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ secrets.ISSUE_SUBSCRIBER_TOKEN }}
script: |
diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index 25f426b7814d..a4a462ae6737 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -225,7 +225,7 @@ jobs:
release_dir=`find ${{ steps.setup-stage.outputs.build-prefix }}/build -iname 'stage2-bins'`
mv $release_dir/${{ needs.prepare.outputs.release-binary-filename }} .
- - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: ${{ runner.os }}-${{ runner.arch }}-release-binary
# Due to path differences on Windows when running in bash vs running on node,
@@ -263,14 +263,14 @@ jobs:
sparse-checkout-cone-mode: false
- name: 'Download artifact'
- uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
+ uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
with:
pattern: '*-release-binary'
merge-multiple: true
- name: Attest Build Provenance
id: provenance
- uses: actions/attest-build-provenance@ef244123eb79f2f7a7e75d99086184180e6d0018 # v1.4.4
+ uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0
with:
subject-path: ${{ needs.prepare.outputs.release-binary-filename }}
@@ -279,7 +279,7 @@ jobs:
mv ${{ steps.provenance.outputs.bundle-path }} ${{ needs.prepare.outputs.release-binary-filename }}.jsonl
- name: Upload Build Provenance
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: ${{ needs.prepare.outputs.release-binary-filename }}-attestation
path: ${{ needs.prepare.outputs.release-binary-filename }}.jsonl
diff --git a/.github/workflows/release-documentation.yml b/.github/workflows/release-documentation.yml
index 4cf973d000a4..c09ad5706671 100644
--- a/.github/workflows/release-documentation.yml
+++ b/.github/workflows/release-documentation.yml
@@ -41,7 +41,7 @@ jobs:
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Setup Python env
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
cache: 'pip'
cache-dependency-path: './llvm/docs/requirements.txt'
@@ -63,7 +63,7 @@ jobs:
./llvm/utils/release/build-docs.sh -release "${{ inputs.release-version }}" -no-doxygen
- name: Create Release Notes Artifact
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # 5.0.0
with:
name: release-notes
path: docs-build/html-export/
diff --git a/.github/workflows/release-doxygen.yml b/.github/workflows/release-doxygen.yml
index 79e509e5e6a8..c31319e47833 100644
--- a/.github/workflows/release-doxygen.yml
+++ b/.github/workflows/release-doxygen.yml
@@ -43,7 +43,7 @@ jobs:
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Setup Python env
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
cache: 'pip'
cache-dependency-path: './llvm/docs/requirements.txt'
diff --git a/.github/workflows/release-sources.yml b/.github/workflows/release-sources.yml
index 2278b96dbe24..4c47bd7575d9 100644
--- a/.github/workflows/release-sources.yml
+++ b/.github/workflows/release-sources.yml
@@ -92,14 +92,14 @@ jobs:
- name: Attest Build Provenance
if: github.event_name != 'pull_request'
id: provenance
- uses: actions/attest-build-provenance@ef244123eb79f2f7a7e75d99086184180e6d0018 # v1.4.4
+ uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0
with:
subject-path: "*.xz"
- if: github.event_name != 'pull_request'
run: |
mv ${{ steps.provenance.outputs.bundle-path }} .
- name: Create Tarball Artifacts
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
path: |
*.xz
diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
index bd3277a8b452..05a6d98a81ba 100644
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -49,7 +49,7 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+ uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: SARIF file
path: results.sarif
@@ -57,6 +57,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@b8d3b6e8af63cde30bdc382c0bc28114f4346c88 # v2.28.1
+ uses: github/codeql-action/upload-sarif@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2
with:
sarif_file: results.sarif
diff --git a/.github/workflows/unprivileged-download-artifact/action.yml b/.github/workflows/unprivileged-download-artifact/action.yml
index 5b50d7ce3d3f..72815b26bcf4 100644
--- a/.github/workflows/unprivileged-download-artifact/action.yml
+++ b/.github/workflows/unprivileged-download-artifact/action.yml
@@ -27,7 +27,7 @@ outputs:
runs:
using: "composite"
steps:
- - uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
+ - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
id: artifact-url
with:
script: |
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp
index d680850bf2ea..a6d0ca948115 100644
--- a/bolt/lib/Core/BinaryBasicBlock.cpp
+++ b/bolt/lib/Core/BinaryBasicBlock.cpp
@@ -22,8 +22,6 @@
namespace llvm {
namespace bolt {
-constexpr uint32_t BinaryBasicBlock::INVALID_OFFSET;
-
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) {
return LHS.Index < RHS.Index;
}
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 7af32c8c5663..b478925a4d7b 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1010,14 +1010,12 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
return Offset - StartOffset;
};
- // Skip a sequence of zero bytes. For AArch64 we only skip 4 bytes of zeros
- // in case the following zeros belong to constant island or veneer.
+ // Skip a sequence of zero bytes. For AArch64 we only skip 4's exact
+ // multiple number of zeros in case the following zeros belong to veneer.
auto skipZeros = [&]() {
const uint64_t StartOffset = Offset;
uint64_t CurrentOffset = Offset;
- for (; CurrentOffset < BF.getMaxSize() &&
- (!isAArch64() || CurrentOffset < StartOffset + 4);
- ++CurrentOffset)
+ for (; CurrentOffset < BF.getMaxSize(); ++CurrentOffset)
if ((*FunctionData)[CurrentOffset] != 0)
break;
diff --git a/bolt/lib/Core/DynoStats.cpp b/bolt/lib/Core/DynoStats.cpp
index 1d9818777596..64a6d12b76e8 100644
--- a/bolt/lib/Core/DynoStats.cpp
+++ b/bolt/lib/Core/DynoStats.cpp
@@ -51,8 +51,6 @@ PrintDynoOpcodeStat("print-dyno-opcode-stats",
namespace llvm {
namespace bolt {
-constexpr const char *DynoStats::Desc[];
-
bool DynoStats::operator<(const DynoStats &Other) const {
return std::lexicographical_compare(
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 8554683bc3cf..cafe4bfebf19 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -159,8 +159,6 @@ std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
}
}
-constexpr uint64_t DataAggregator::KernelBaseAddr;
-
DataAggregator::~DataAggregator() { deleteTempFiles(); }
namespace {
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 77e5688781d5..ab3431ef8bd5 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -295,7 +295,6 @@ cl::bits<GadgetScannerKind> GadgetScannersToRun(
} // namespace opts
// FIXME: implement a better way to mark sections for replacement.
-constexpr const char *RewriteInstance::SectionsToOverwrite[];
std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = {
".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_line_str",
".debug_loc", ".debug_loclists", ".debug_ranges", ".debug_rnglists",
diff --git a/clang-tools-extra/clang-doc/BitcodeWriter.cpp b/clang-tools-extra/clang-doc/BitcodeWriter.cpp
index e23511bf6369..3a7ac6e2abcd 100644
--- a/clang-tools-extra/clang-doc/BitcodeWriter.cpp
+++ b/clang-tools-extra/clang-doc/BitcodeWriter.cpp
@@ -303,8 +303,6 @@ static const std::vector<std::pair<BlockId, std::vector<RecordId>>>
// AbbreviationMap
-constexpr unsigned char BitCodeConstants::Signature[];
-
void ClangDocBitcodeWriter::AbbreviationMap::add(RecordId RID,
unsigned AbbrevID) {
assert(RecordIdNameMap[RID] && "Unknown RecordId.");
diff --git a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h
index aaa04107a11e..60cb01f13ae2 100644
--- a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h
+++ b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLING_EXPANDMODULARHEADERSPPCALLBACKS_H_
-#define LLVM_CLANG_TOOLING_EXPANDMODULARHEADERSPPCALLBACKS_H_
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_EXPANDMODULARHEADERSPPCALLBACKS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_EXPANDMODULARHEADERSPPCALLBACKS_H
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Lex/PPCallbacks.h"
@@ -144,4 +144,4 @@ private:
} // namespace tooling
} // namespace clang
-#endif // LLVM_CLANG_TOOLING_EXPANDMODULARHEADERSPPCALLBACKS_H_
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_EXPANDMODULARHEADERSPPCALLBACKS_H
diff --git a/clang-tools-extra/clang-tidy/FileExtensionsSet.h b/clang-tools-extra/clang-tidy/FileExtensionsSet.h
index 95c221c84da2..f97bb64ff946 100644
--- a/clang-tools-extra/clang-tidy/FileExtensionsSet.h
+++ b/clang-tools-extra/clang-tidy/FileExtensionsSet.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FILE_EXTENSIONS_SET_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FILE_EXTENSIONS_SET_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FILEEXTENSIONSSET_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FILEEXTENSIONSSET_H
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
@@ -16,4 +16,4 @@ namespace clang::tidy {
using FileExtensionsSet = llvm::SmallSet<llvm::StringRef, 5>;
} // namespace clang::tidy
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FILE_EXTENSIONS_SET_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FILEEXTENSIONSSET_H
diff --git a/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h b/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h
index 2ae3c00f7ee3..ac2e759c4c27 100644
--- a/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h
+++ b/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h
@@ -6,6 +6,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_ABSEILMATCHER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_ABSEILMATCHER_H
+
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include <algorithm>
@@ -57,3 +60,5 @@ AST_POLYMORPHIC_MATCHER(
}
} // namespace clang::ast_matchers
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_ABSEILMATCHER_H
diff --git a/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.h
index b728118c3da0..f5bab53035f8 100644
--- a/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.h
+++ b/clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMEADDITIONCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMEADDITIONCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_DURATIONADDITIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_DURATIONADDITIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ public:
} // namespace clang::tidy::abseil
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMEADDITIONCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_DURATIONADDITIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h
index 59af8968e8b3..f5d25116b5bc 100644
--- a/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h
+++ b/clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMEDOUBLECONVERSIONCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMEDOUBLECONVERSIONCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_DURATIONUNNECESSARYCONVERSIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_DURATIONUNNECESSARYCONVERSIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ public:
} // namespace clang::tidy::abseil
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMEDOUBLECONVERSIONCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_DURATIONUNNECESSARYCONVERSIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.h b/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.h
index 2911a1ad14ae..22918311398f 100644
--- a/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.h
+++ b/clang-tools-extra/clang-tidy/abseil/NoInternalDependenciesCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_NOINTERNALDEPSCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_NOINTERNALDEPSCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_NOINTERNALDEPENDENCIESCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_NOINTERNALDEPENDENCIESCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ public:
} // namespace clang::tidy::abseil
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_NOINTERNALDEPSCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_NOINTERNALDEPENDENCIESCHECK_H
diff --git a/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.h b/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.h
index 703d9514e8c0..74a877a84d9e 100644
--- a/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.h
+++ b/clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMECOMPARECHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMECOMPARECHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMECOMPARISONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMECOMPARISONCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ public:
} // namespace clang::tidy::abseil
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMECOMPARECHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_TIMECOMPARISONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.h b/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.h
index 182d10b5539e..441cf36446c9 100644
--- a/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.h
+++ b/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_KERNEL_NAME_RESTRICTION_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_KERNEL_NAME_RESTRICTION_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_KERNELNAMERESTRICTIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_KERNELNAMERESTRICTIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -28,4 +28,4 @@ public:
} // namespace clang::tidy::altera
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_KERNEL_NAME_RESTRICTION_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_KERNELNAMERESTRICTIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h b/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h
index dab3dbce5037..dcfefcb0a1b2 100644
--- a/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h
+++ b/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLEWORKITEMBARRIERCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLEWORKITEMBARRIERCHECK_H
#include "../ClangTidyCheck.h"
@@ -33,4 +33,4 @@ public:
} // namespace clang::tidy::altera
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLE_WORK_ITEM_BARRIER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_SINGLEWORKITEMBARRIERCHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.h b/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.h
index 02c4e0056ea1..5637fc8bb350 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecAccept4Check.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_ACCEPT4_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_ACCEPT4_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECACCEPT4CHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECACCEPT4CHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_ACCEPT4_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECACCEPT4CHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.h b/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.h
index 4540f938fd47..332a97ace91a 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_ACCEPT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_ACCEPT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECACCEPTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECACCEPTCHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_ACCEPT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECACCEPTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecCheck.h b/clang-tools-extra/clang-tidy/android/CloexecCheck.h
index 858d96ab45b6..fc1accd5b4d4 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecCheck.h
@@ -12,8 +12,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECCHECK_H
#include "../ClangTidyCheck.h"
@@ -97,4 +97,4 @@ protected:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECCHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.h b/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.h
index ee2f51abf05f..d7d2b42049cd 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecCreatCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_CREAT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_CREAT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECCREATCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECCREATCHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_CREAT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECCREATCHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecDupCheck.h b/clang-tools-extra/clang-tidy/android/CloexecDupCheck.h
index f5699685ed08..4eae507b99b1 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecDupCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecDupCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_DUP_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_DUP_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECDUPCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECDUPCHECK_H
#include "CloexecCheck.h"
@@ -28,4 +28,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_DUP_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECDUPCHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.h b/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.h
index f467b87a6cf7..03a529f02a6d 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_EPOLL_CREATE1_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_EPOLL_CREATE1_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECEPOLLCREATE1CHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECEPOLLCREATE1CHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_EPOLL_CREATE1_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECEPOLLCREATE1CHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.h b/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.h
index a8d17c82d457..243b9bd7b131 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_EPOLL_CREATE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_EPOLL_CREATE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECEPOLLCREATECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECEPOLLCREATECHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_EPOLL_CREATE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECEPOLLCREATECHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.h b/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.h
index 646b237a663e..a018fc5deadd 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecFopenCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_FOPEN_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_FOPEN_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECFOPENCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECFOPENCHECK_H
#include "CloexecCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_FOPEN_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECFOPENCHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.h b/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.h
index 3960d05e2e1f..c2e45332fd04 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_INOTIFY_INIT1_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_INOTIFY_INIT1_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECINOTIFYINIT1CHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECINOTIFYINIT1CHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_INOTIFY_INIT1_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECINOTIFYINIT1CHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.h b/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.h
index cb9e6820571b..cd202c2ad97f 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_INOTIFY_INIT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_INOTIFY_INIT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECINOTIFYINITCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECINOTIFYINITCHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_INOTIFY_INIT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECINOTIFYINITCHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.h b/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.h
index dd96ee968f3b..1a77c7fcb196 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_MEMFD_CREATE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_MEMFD_CREATE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECMEMFDCREATECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECMEMFDCREATECHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_MEMFD_CREATE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECMEMFDCREATECHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.h b/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.h
index d95fe21fb3e8..d30b456dcdc1 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecOpenCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_OPEN_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_OPEN_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECOPENCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECOPENCHECK_H
#include "CloexecCheck.h"
@@ -32,4 +32,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_OPEN_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECOPENCHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.h b/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.h
index 496bd6b6cbbc..31653081ef98 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecPipe2Check.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_PIPE2_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_PIPE2_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECPIPE2CHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECPIPE2CHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_PIPE2_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECPIPE2CHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.h b/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.h
index f0145e14eb49..721a68883dd0 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecPipeCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_PIPE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_PIPE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECPIPECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECPIPECHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_PIPE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECPIPECHECK_H
diff --git a/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.h b/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.h
index 0a29d7224e78..8865db3a1678 100644
--- a/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.h
+++ b/clang-tools-extra/clang-tidy/android/CloexecSocketCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_SOCKET_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_SOCKET_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECSOCKETCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECSOCKETCHECK_H
#include "CloexecCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::android
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXEC_SOCKET_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_CLOEXECSOCKETCHECK_H
diff --git a/clang-tools-extra/clang-tidy/boost/UseToStringCheck.h b/clang-tools-extra/clang-tidy/boost/UseToStringCheck.h
index af87f15a1dc0..dae3f7c125db 100644
--- a/clang-tools-extra/clang-tidy/boost/UseToStringCheck.h
+++ b/clang-tools-extra/clang-tidy/boost/UseToStringCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USE_TO_STRING_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USE_TO_STRING_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USETOSTRINGCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USETOSTRINGCHECK_H
#include "../ClangTidyCheck.h"
@@ -32,4 +32,4 @@ public:
} // namespace clang::tidy::boost
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USE_TO_STRING_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USETOSTRINGCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
index e1856ff24cd8..baea231f6e06 100644
--- a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
@@ -22,6 +22,7 @@
#include "CommandProcessorCheck.h"
#include "ComparePointerToMemberVirtualFunctionCheck.h"
#include "CopyConstructorInitCheck.h"
+#include "CopyConstructorMutatesArgumentCheck.h"
#include "CrtpConstructorAccessibilityCheck.h"
#include "DanglingHandleCheck.h"
#include "DefaultOperatorNewOnOveralignedTypeCheck.h"
@@ -141,6 +142,8 @@ public:
"bugprone-compare-pointer-to-member-virtual-function");
CheckFactories.registerCheck<CopyConstructorInitCheck>(
"bugprone-copy-constructor-init");
+ CheckFactories.registerCheck<CopyConstructorMutatesArgumentCheck>(
+ "bugprone-copy-constructor-mutates-argument");
CheckFactories.registerCheck<DanglingHandleCheck>(
"bugprone-dangling-handle");
CheckFactories.registerCheck<DefaultOperatorNewOnOveralignedTypeCheck>(
diff --git a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
index 7d2e10887dfe..aacaa6188814 100644
--- a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
@@ -18,6 +18,7 @@ add_clang_library(clangTidyBugproneModule STATIC
CommandProcessorCheck.cpp
ComparePointerToMemberVirtualFunctionCheck.cpp
CopyConstructorInitCheck.cpp
+ CopyConstructorMutatesArgumentCheck.cpp
CrtpConstructorAccessibilityCheck.cpp
DanglingHandleCheck.cpp
DefaultOperatorNewOnOveralignedTypeCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h
index cba1a25d9bc1..e977bc2466dc 100644
--- a/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_COPY_CONSTRUCTOR_INIT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_COPY_CONSTRUCTOR_INIT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_COPYCONSTRUCTORINITCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_COPYCONSTRUCTORINITCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ public:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_COPY_CONSTRUCTOR_INIT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_COPYCONSTRUCTORINITCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorMutatesArgumentCheck.cpp
index fb9d72ce6bd3..cbbb1a0070a0 100644
--- a/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorMutatesArgumentCheck.cpp
@@ -6,19 +6,20 @@
//
//===----------------------------------------------------------------------===//
-#include "MutatingCopyCheck.h"
+#include "CopyConstructorMutatesArgumentCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
using namespace clang::ast_matchers;
-namespace clang::tidy::cert {
+namespace clang::tidy::bugprone {
static constexpr llvm::StringLiteral SourceDeclName = "ChangedPVD";
static constexpr llvm::StringLiteral MutatingOperatorName = "MutatingOp";
static constexpr llvm::StringLiteral MutatingCallName = "MutatingCall";
-void MutatingCopyCheck::registerMatchers(MatchFinder *Finder) {
+void CopyConstructorMutatesArgumentCheck::registerMatchers(
+ MatchFinder *Finder) {
const auto MemberExprOrSourceObject = anyOf(
memberExpr(),
declRefExpr(to(decl(equalsBoundNode(std::string(SourceDeclName))))));
@@ -60,7 +61,8 @@ void MutatingCopyCheck::registerMatchers(MatchFinder *Finder) {
this);
}
-void MutatingCopyCheck::check(const MatchFinder::MatchResult &Result) {
+void CopyConstructorMutatesArgumentCheck::check(
+ const MatchFinder::MatchResult &Result) {
if (const auto *MemberCall =
Result.Nodes.getNodeAs<CXXMemberCallExpr>(MutatingCallName))
diag(MemberCall->getBeginLoc(), "call mutates copied object");
@@ -69,4 +71,4 @@ void MutatingCopyCheck::check(const MatchFinder::MatchResult &Result) {
diag(Assignment->getBeginLoc(), "mutating copied object");
}
-} // namespace clang::tidy::cert
+} // namespace clang::tidy::bugprone
diff --git a/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.h b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorMutatesArgumentCheck.h
index c211fa004120..0fed57258b0d 100644
--- a/clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorMutatesArgumentCheck.h
@@ -6,21 +6,21 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_MUTATINGCOPYCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_MUTATINGCOPYCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_COPYCONSTRUCTORMUTATESARGUMENTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_COPYCONSTRUCTORMUTATESARGUMENTCHECK_H
#include "../ClangTidyCheck.h"
-namespace clang::tidy::cert {
+namespace clang::tidy::bugprone {
/// Finds assignments to the copied object and its direct or indirect members
/// in copy constructors and copy assignment operators.
///
/// For the user-facing documentation see:
-/// https://clang.llvm.org/extra/clang-tidy/checks/cert/oop58-cpp.html
-class MutatingCopyCheck : public ClangTidyCheck {
+/// https://clang.llvm.org/extra/clang-tidy/checks/bugprone/copy-constructor-mutates-argument.html
+class CopyConstructorMutatesArgumentCheck : public ClangTidyCheck {
public:
- MutatingCopyCheck(StringRef Name, ClangTidyContext *Context)
+ CopyConstructorMutatesArgumentCheck(StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context) {}
bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
return LangOpts.CPlusPlus;
@@ -29,6 +29,6 @@ public:
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
};
-} // namespace clang::tidy::cert
+} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_MUTATINGCOPYCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_COPYCONSTRUCTORMUTATESARGUMENTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.h b/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.h
index 486562c30f79..0b71bc43057d 100644
--- a/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DANGLING_HANDLE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DANGLING_HANDLE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DANGLINGHANDLECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DANGLINGHANDLECHECK_H
#include "../ClangTidyCheck.h"
@@ -37,4 +37,4 @@ private:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DANGLING_HANDLE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DANGLINGHANDLECHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.h b/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.h
index e02c62a53ffa..00e4bb1e7500 100644
--- a/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DYNAMIC_STATIC_INITIALIZERS_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DYNAMIC_STATIC_INITIALIZERS_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DYNAMICSTATICINITIALIZERSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DYNAMICSTATICINITIALIZERSCHECK_H
#include "../ClangTidyCheck.h"
#include "../FileExtensionsSet.h"
@@ -30,4 +30,4 @@ private:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DYNAMIC_STATIC_INITIALIZERS_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_DYNAMICSTATICINITIALIZERSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h
index bd1e7bae57f5..31d9e85082c5 100644
--- a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EXCEPTION_ESCAPE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EXCEPTION_ESCAPE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EXCEPTIONESCAPECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EXCEPTIONESCAPECHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/ExceptionAnalyzer.h"
@@ -42,4 +42,4 @@ private:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EXCEPTION_ESCAPE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EXCEPTIONESCAPECHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.h b/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.h
index 119728d97230..ef8b4d11d651 100644
--- a/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_FOLD_INIT_TYPE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_FOLD_INIT_TYPE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_FOLDINITTYPECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_FOLDINITTYPECHECK_H
#include "../ClangTidyCheck.h"
@@ -39,4 +39,4 @@ private:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_FOLD_INIT_TYPE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_FOLDINITTYPECHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.h b/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.h
index 777e31868c96..acab7be7f33c 100644
--- a/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_INTEGER_DIVISION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_INTEGER_DIVISION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_INTEGERDIVISIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_INTEGERDIVISIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -28,4 +28,4 @@ public:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_INTEGER_DIVISION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_INTEGERDIVISIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.h b/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.h
index f650145203ce..c40aef339e91 100644
--- a/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/MisplacedOperatorInStrlenInAllocCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACED_OPERATOR_IN_STRLEN_IN_ALLOC_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACED_OPERATOR_IN_STRLEN_IN_ALLOC_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACEDOPERATORINSTRLENINALLOCCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACEDOPERATORINSTRLENINALLOCCHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACED_OPERATOR_IN_STRLEN_IN_ALLOC_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACEDOPERATORINSTRLENINALLOCCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.h b/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.h
index e78c30cbb644..9f6504fe8a91 100644
--- a/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACED_OPERATOR_IN_ALLOC_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACED_OPERATOR_IN_ALLOC_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACEDPOINTERARITHMETICINALLOCCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACEDPOINTERARITHMETICINALLOCCHECK_H
#include "../ClangTidyCheck.h"
@@ -29,4 +29,4 @@ public:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACED_OPERATOR_IN_ALLOC_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MISPLACEDPOINTERARITHMETICINALLOCCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.h b/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.h
index 1a2d4a410b46..1c3679a893ce 100644
--- a/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MULTIPLE_STATEMENT_MACRO_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MULTIPLE_STATEMENT_MACRO_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MULTIPLESTATEMENTMACROCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MULTIPLESTATEMENTMACROCHECK_H
#include "../ClangTidyCheck.h"
@@ -29,4 +29,4 @@ public:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MULTIPLE_STATEMENT_MACRO_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_MULTIPLESTATEMENTMACROCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.h b/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.h
index 9631c71dee64..e506e5b0315d 100644
--- a/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NARROWING_CONVERSIONS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NARROWING_CONVERSIONS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NARROWINGCONVERSIONSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NARROWINGCONVERSIONSCHECK_H
#include "../ClangTidyCheck.h"
@@ -108,4 +108,4 @@ private:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NARROWING_CONVERSIONS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NARROWINGCONVERSIONSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h
index 054d5804745b..46b4e1250862 100644
--- a/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTIC_POINTER_ITERATION_ORDER_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTIC_POINTER_ITERATION_ORDER_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTICPOINTERITERATIONORDERCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTICPOINTERITERATIONORDERCHECK_H
#include "../ClangTidyCheck.h"
@@ -36,4 +36,4 @@ public:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTIC_POINTER_ITERATION_ORDER_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTICPOINTERITERATIONORDERCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.h b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.h
index a8f4ca32a0b5..cf61eb5c585f 100644
--- a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NOT_NULL_TERMINATED_RESULT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NOT_NULL_TERMINATED_RESULT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NOTNULLTERMINATEDRESULTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NOTNULLTERMINATEDRESULTCHECK_H
#include "../ClangTidyCheck.h"
@@ -60,4 +60,4 @@ private:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NOT_NULL_TERMINATED_RESULT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NOTNULLTERMINATEDRESULTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.h b/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.h
index d72c86c060fb..a9cb7a6e3447 100644
--- a/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_POSIX_RETURN_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_POSIX_RETURN_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_POSIXRETURNCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_POSIXRETURNCHECK_H
#include "../ClangTidyCheck.h"
@@ -23,4 +23,4 @@ public:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_POSIX_RETURN_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_POSIXRETURNCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h b/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h
index 0d7a203a52e1..9c08e4bc3f3f 100644
--- a/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_STRING_CONSTRUCTOR_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_STRING_CONSTRUCTOR_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_STRINGCONSTRUCTORCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_STRINGCONSTRUCTORCHECK_H
#include "../ClangTidyCheck.h"
@@ -36,4 +36,4 @@ private:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_STRING_CONSTRUCTOR_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_STRINGCONSTRUCTORCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.h b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.h
index a1f5f2bfd1a3..c45f3326733f 100644
--- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUS_MEMSET_USAGE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUS_MEMSET_USAGE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUSMEMSETUSAGECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUSMEMSETUSAGECHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUS_MEMSET_USAGE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUSMEMSETUSAGECHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.h b/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.h
index c9a232a1b177..409122feefd0 100644
--- a/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNDEFINED_MEMORY_MANIPULATION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNDEFINED_MEMORY_MANIPULATION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNDEFINEDMEMORYMANIPULATIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNDEFINEDMEMORYMANIPULATIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -29,4 +29,4 @@ public:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNDEFINED_MEMORY_MANIPULATION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNDEFINEDMEMORYMANIPULATIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.h b/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.h
index 18465f7353b1..c7cadbf6e165 100644
--- a/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNDELEGATEDCONSTRUCTOR_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNDELEGATEDCONSTRUCTOR_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNDELEGATEDCONSTRUCTORCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNDELEGATEDCONSTRUCTORCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ public:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNDELEGATEDCONSTRUCTOR_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNDELEGATEDCONSTRUCTORCHECK_H
diff --git a/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.h b/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.h
index 71d302f49ff9..22788177c86b 100644
--- a/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_VIRTUAL_NEAR_MISS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_VIRTUAL_NEAR_MISS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_VIRTUALNEARMISSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_VIRTUALNEARMISSCHECK_H
#include "../ClangTidyCheck.h"
#include "llvm/ADT/DenseMap.h"
@@ -60,4 +60,4 @@ private:
} // namespace clang::tidy::bugprone
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_VIRTUAL_NEAR_MISS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_VIRTUALNEARMISSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp b/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp
index a1f62707b107..b0799da94761 100644
--- a/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp
@@ -11,6 +11,7 @@
#include "../ClangTidyModuleRegistry.h"
#include "../bugprone/BadSignalToKillThreadCheck.h"
#include "../bugprone/CommandProcessorCheck.h"
+#include "../bugprone/CopyConstructorMutatesArgumentCheck.h"
#include "../bugprone/DefaultOperatorNewOnOveralignedTypeCheck.h"
#include "../bugprone/FloatLoopCounterCheck.h"
#include "../bugprone/PointerArithmeticOnPolymorphicObjectCheck.h"
@@ -39,7 +40,6 @@
#include "../readability/EnumInitialValueCheck.h"
#include "../readability/UppercaseLiteralSuffixCheck.h"
#include "LimitedRandomnessCheck.h"
-#include "MutatingCopyCheck.h"
#include "ProperlySeededRandomGeneratorCheck.h"
#include "ThrownExceptionTypeCheck.h"
@@ -282,7 +282,8 @@ public:
"cert-oop54-cpp");
CheckFactories.registerCheck<bugprone::RawMemoryCallOnNonTrivialTypeCheck>(
"cert-oop57-cpp");
- CheckFactories.registerCheck<MutatingCopyCheck>("cert-oop58-cpp");
+ CheckFactories.registerCheck<bugprone::CopyConstructorMutatesArgumentCheck>(
+ "cert-oop58-cpp");
// C checkers
// ARR
diff --git a/clang-tools-extra/clang-tidy/cert/CMakeLists.txt b/clang-tools-extra/clang-tidy/cert/CMakeLists.txt
index b25576a31272..5abb47277e78 100644
--- a/clang-tools-extra/clang-tidy/cert/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/cert/CMakeLists.txt
@@ -6,7 +6,6 @@ set(LLVM_LINK_COMPONENTS
add_clang_library(clangTidyCERTModule STATIC
CERTTidyModule.cpp
LimitedRandomnessCheck.cpp
- MutatingCopyCheck.cpp
ProperlySeededRandomGeneratorCheck.cpp
ThrownExceptionTypeCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.h b/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.h
index a9d607665adb..a806cd344d21 100644
--- a/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.h
+++ b/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_LIMITED_RANDOMNESS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_LIMITED_RANDOMNESS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_LIMITEDRANDOMNESSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_LIMITEDRANDOMNESSCHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::cert
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_LIMITED_RANDOMNESS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_LIMITEDRANDOMNESSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h
index 7da01cc85718..8cb2e624e050 100644
--- a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h
+++ b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_PROPERLY_SEEDED_RANDOM_GENERATOR_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_PROPERLY_SEEDED_RANDOM_GENERATOR_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_PROPERLYSEEDEDRANDOMGENERATORCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_PROPERLYSEEDEDRANDOMGENERATORCHECK_H
#include "../ClangTidyCheck.h"
#include <string>
@@ -39,4 +39,4 @@ private:
} // namespace clang::tidy::cert
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_PROPERLY_SEEDED_RANDOM_GENERATOR_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CERT_PROPERLYSEEDEDRANDOMGENERATORCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h
index 780b4b39254a..dc91854ee497 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_INTERFACES_GLOBAL_INIT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_INTERFACES_GLOBAL_INIT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_INTERFACESGLOBALINITCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_INTERFACESGLOBALINITCHECK_H
#include "../ClangTidyCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_INTERFACES_GLOBAL_INIT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_INTERFACESGLOBALINITCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.h
index e4dece6a54c9..da35b530f5d3 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_NO_MALLOC_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_NO_MALLOC_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_NOMALLOCCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_NOMALLOCCHECK_H
#include "../ClangTidyCheck.h"
@@ -56,4 +56,4 @@ private:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_NO_MALLOC_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_NOMALLOCCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.h
index 462e9864a3f5..248b5c2190e0 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_OWNING_MEMORY_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_OWNING_MEMORY_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_OWNINGMEMORYCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_OWNINGMEMORYCHECK_H
#include "../ClangTidyCheck.h"
@@ -61,4 +61,4 @@ private:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_OWNING_MEMORY_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_OWNINGMEMORYCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.h
index cea4bfacd664..2d4b40b3bfb9 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_ARRAY_TO_POINTER_DECAY_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_ARRAY_TO_POINTER_DECAY_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSARRAYTOPOINTERDECAYCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSARRAYTOPOINTERDECAYCHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_ARRAY_TO_POINTER_DECAY_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSARRAYTOPOINTERDECAYCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.h
index 0755da7ce440..2462e7a42466 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccess.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_AVOID_UNCHECKED_CONTAINER_ACCESS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_AVOID_UNCHECKED_CONTAINER_ACCESS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSAVOIDUNCHECKEDCONTAINERACCESS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSAVOIDUNCHECKEDCONTAINERACCESS_H
#include "../ClangTidyCheck.h"
@@ -53,4 +53,4 @@ struct OptionEnumMapping<
getEnumMapping();
};
} // namespace clang::tidy
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_AVOID_UNCHECKED_CONTAINER_ACCESS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSAVOIDUNCHECKEDCONTAINERACCESS_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.h
index 73f185529e1e..7c8fec0d60c5 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_CONSTANT_ARRAY_INDEX_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_CONSTANT_ARRAY_INDEX_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSCONSTANTARRAYINDEXCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSCONSTANTARRAYINDEXCHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/IncludeInserter.h"
@@ -37,4 +37,4 @@ public:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_CONSTANT_ARRAY_INDEX_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSCONSTANTARRAYINDEXCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.h
index 45b798527ed4..4f6b17f15c9f 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_POINTER_ARITHMETIC_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_POINTER_ARITHMETIC_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSPOINTERARITHMETICCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSPOINTERARITHMETICCHECK_H
#include "../ClangTidyCheck.h"
@@ -35,4 +35,4 @@ private:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_BOUNDS_POINTER_ARITHMETIC_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROBOUNDSPOINTERARITHMETICCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h
index 0b8cfc830854..a0a368cbc6a1 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_CONST_CAST_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_CONST_CAST_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPECONSTCASTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPECONSTCASTCHECK_H
#include "../ClangTidyCheck.h"
@@ -36,4 +36,4 @@ private:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_CONST_CAST_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPECONSTCASTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.h
index f8e1d5a893da..5fd0208ea991 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_CSTYLE_CAST_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_CSTYLE_CAST_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPECSTYLECASTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPECSTYLECASTCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ public:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_CSTYLE_CAST_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPECSTYLECASTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h
index 8beaab394f04..89d3074fb0a9 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_MEMBER_INIT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_MEMBER_INIT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEMEMBERINITCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEMEMBERINITCHECK_H
#include "../ClangTidyCheck.h"
#include "llvm/ADT/DenseSet.h"
@@ -79,4 +79,4 @@ private:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_MEMBER_INIT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEMEMBERINITCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.h
index 4948d0ac2d78..566944dfda60 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_REINTERPRETCAST_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_REINTERPRETCAST_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEREINTERPRETCASTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEREINTERPRETCASTCHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_REINTERPRETCAST_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEREINTERPRETCASTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.h
index 3d01fb9e5280..02d54a5e25c2 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_STATIC_CAST_DOWNCAST_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_STATIC_CAST_DOWNCAST_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPESTATICCASTDOWNCASTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPESTATICCASTDOWNCASTCHECK_H
#include "../ClangTidyCheck.h"
@@ -37,4 +37,4 @@ private:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_STATIC_CAST_DOWNCAST_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPESTATICCASTDOWNCASTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.h
index fe82ce963058..41154e8eedce 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_UNION_ACCESS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_UNION_ACCESS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEUNIONACCESSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEUNIONACCESSCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ public:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_UNION_ACCESS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEUNIONACCESSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.h
index b28d3657703b..5be6163f3b45 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_VARARG_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_VARARG_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEVARARGCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEVARARGCHECK_H
#include "../ClangTidyCheck.h"
@@ -33,4 +33,4 @@ public:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_VARARG_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PROTYPEVARARGCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h
index 61990e6b493d..520a763f5abf 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SLICING_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SLICING_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SLICINGCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SLICINGCHECK_H
#include "../ClangTidyCheck.h"
@@ -36,4 +36,4 @@ private:
} // namespace clang::tidy::cppcoreguidelines
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SLICING_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SLICINGCHECK_H
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h
index 507aaa1cb9d7..3e83f0b19c51 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SPECIAL_MEMBER_FUNCTIONS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SPECIAL_MEMBER_FUNCTIONS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SPECIALMEMBERFUNCTIONSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SPECIALMEMBERFUNCTIONSCHECK_H
#include "../ClangTidyCheck.h"
@@ -112,4 +112,4 @@ struct DenseMapInfo<
} // namespace llvm
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SPECIAL_MEMBER_FUNCTIONS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_SPECIALMEMBERFUNCTIONSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.h b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.h
index 51bb15325c95..ee08b76b740c 100644
--- a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.h
+++ b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULT_ARGUMENTS_CALLS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULT_ARGUMENTS_CALLS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULTARGUMENTSCALLSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULTARGUMENTSCALLSCHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::fuchsia
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULT_ARGUMENTS_CALLS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULTARGUMENTSCALLSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.h b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.h
index 1b0e3dd0a16f..aa991f8a6adf 100644
--- a/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.h
+++ b/clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULT_ARGUMENTS_DECLARATIONS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULT_ARGUMENTS_DECLARATIONS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULTARGUMENTSDECLARATIONSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULTARGUMENTSDECLARATIONSCHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::fuchsia
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULT_ARGUMENTS_DECLARATIONS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_DEFAULTARGUMENTSDECLARATIONSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h b/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h
index 66be18267ab8..2e268432c17c 100644
--- a/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h
+++ b/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_MULTIPLE_INHERITANCE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_MULTIPLE_INHERITANCE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_MULTIPLEINHERITANCECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_MULTIPLEINHERITANCECHECK_H
#include "../ClangTidyCheck.h"
@@ -43,4 +43,4 @@ private:
} // namespace clang::tidy::fuchsia
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_MULTIPLE_INHERITANCE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_MULTIPLEINHERITANCECHECK_H
diff --git a/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.h b/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.h
index d91ecf8e468d..4945ad213037 100644
--- a/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.h
+++ b/clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_OVERLOADED_OPERATOR_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_OVERLOADED_OPERATOR_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_OVERLOADEDOPERATORCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_OVERLOADEDOPERATORCHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::fuchsia
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_OVERLOADED_OPERATOR_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_OVERLOADEDOPERATORCHECK_H
diff --git a/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.h b/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.h
index 42d643e62f28..d2403b04a206 100644
--- a/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.h
+++ b/clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_STATICALLY_CONSTRUCTED_OBJECTS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_STATICALLY_CONSTRUCTED_OBJECTS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_STATICALLYCONSTRUCTEDOBJECTSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_STATICALLYCONSTRUCTEDOBJECTSCHECK_H
#include "../ClangTidyCheck.h"
@@ -32,4 +32,4 @@ public:
} // namespace clang::tidy::fuchsia
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_STATICALLY_CONSTRUCTED_OBJECTS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_STATICALLYCONSTRUCTEDOBJECTSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.h b/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.h
index c644e875b3a3..ba1dbeb27ca5 100644
--- a/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.h
+++ b/clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_TRAILING_RETURN_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_TRAILING_RETURN_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_TRAILINGRETURNCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_TRAILINGRETURNCHECK_H
#include "../ClangTidyCheck.h"
@@ -32,4 +32,4 @@ public:
} // namespace clang::tidy::fuchsia
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_TRAILING_RETURN_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_TRAILINGRETURNCHECK_H
diff --git a/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.h b/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.h
index 45c6019f3abe..e940602e144d 100644
--- a/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.h
+++ b/clang-tools-extra/clang-tidy/fuchsia/VirtualInheritanceCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_VIRTUAL_INHERITANCE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_VIRTUAL_INHERITANCE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_VIRTUALINHERITANCECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_VIRTUALINHERITANCECHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::fuchsia
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_VIRTUAL_INHERITANCE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_FUCHSIA_VIRTUALINHERITANCECHECK_H
diff --git a/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.h b/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.h
index 26a0465bc197..417bb8ffad18 100644
--- a/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.h
+++ b/clang-tools-extra/clang-tidy/google/AvoidThrowingObjCExceptionCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_OBJC_AVOID_THROWING_EXCEPTION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_OBJC_AVOID_THROWING_EXCEPTION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_AVOIDTHROWINGOBJCEXCEPTIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_AVOIDTHROWINGOBJCEXCEPTIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -32,4 +32,4 @@ public:
} // namespace clang::tidy::google::objc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_OBJC_AVOID_THROWING_EXCEPTION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_AVOIDTHROWINGOBJCEXCEPTIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.h b/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.h
index 1d1e4e31f0c6..0f397b46122d 100644
--- a/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.h
+++ b/clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_DEFAULT_ARGUMENTS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_DEFAULT_ARGUMENTS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_DEFAULTARGUMENTSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_DEFAULTARGUMENTSCHECK_H
#include "../ClangTidyCheck.h"
@@ -32,4 +32,4 @@ public:
} // namespace clang::tidy::google
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_DEFAULT_ARGUMENTS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_DEFAULTARGUMENTSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.h b/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.h
index e4efadfd217a..6acc184f9f05 100644
--- a/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.h
+++ b/clang-tools-extra/clang-tidy/google/FunctionNamingCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_OBJC_FUNCTION_NAMING_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_OBJC_FUNCTION_NAMING_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_FUNCTIONNAMINGCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_FUNCTIONNAMINGCHECK_H
#include "../ClangTidyCheck.h"
#include "llvm/ADT/StringRef.h"
@@ -36,4 +36,4 @@ public:
} // namespace clang::tidy::google::objc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_OBJC_FUNCTION_NAMING_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_FUNCTIONNAMINGCHECK_H
diff --git a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.h b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.h
index 9b55855b1fc8..e0693d4bb38d 100644
--- a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.h
+++ b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_OBJC_GLOBAL_VARIABLE_DECLARATION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_OBJC_GLOBAL_VARIABLE_DECLARATION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_GLOBALVARIABLEDECLARATIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_GLOBALVARIABLEDECLARATIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -32,4 +32,4 @@ public:
} // namespace clang::tidy::google::objc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_OBJC_GLOBAL_VARIABLE_DECLARATION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_GLOBALVARIABLEDECLARATIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.h b/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.h
index 12fe7f7eb340..800e7ac9663d 100644
--- a/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.h
+++ b/clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_EXCEPTION_BASECLASS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_EXCEPTION_BASECLASS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_EXCEPTIONBASECLASSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_EXCEPTIONBASECLASSCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ public:
} // namespace clang::tidy::hicpp
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_EXCEPTION_BASECLASS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_EXCEPTIONBASECLASSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.h b/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.h
index 902be2d9d324..e22e31ac7b05 100644
--- a/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.h
+++ b/clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_MULTIWAY_PATHS_COVERED_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_MULTIWAY_PATHS_COVERED_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_MULTIWAYPATHSCOVEREDCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_MULTIWAYPATHSCOVEREDCHECK_H
#include "../ClangTidyCheck.h"
@@ -41,4 +41,4 @@ private:
} // namespace clang::tidy::hicpp
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_MULTIWAY_PATHS_COVERED_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_MULTIWAYPATHSCOVEREDCHECK_H
diff --git a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h b/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h
index 1ff40eae4622..15d646fd97af 100644
--- a/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h
+++ b/clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NO_ASSEMBLER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NO_ASSEMBLER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NOASSEMBLERCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NOASSEMBLERCHECK_H
#include "../ClangTidyCheck.h"
@@ -27,4 +27,4 @@ public:
} // namespace clang::tidy::hicpp
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NO_ASSEMBLER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_NOASSEMBLERCHECK_H
diff --git a/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.h b/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.h
index 499a4e7bebc1..ef92a4d13f43 100644
--- a/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.h
+++ b/clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_SIGNED_BITWISE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_SIGNED_BITWISE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_SIGNEDBITWISECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_SIGNEDBITWISECHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ private:
} // namespace clang::tidy::hicpp
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_SIGNED_BITWISE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_HICPP_SIGNEDBITWISECHECK_H
diff --git a/clang-tools-extra/clang-tidy/llvmlibc/NamespaceConstants.h b/clang-tools-extra/clang-tidy/llvmlibc/NamespaceConstants.h
index 50669dc07329..8b8b719df62f 100644
--- a/clang-tools-extra/clang-tidy/llvmlibc/NamespaceConstants.h
+++ b/clang-tools-extra/clang-tidy/llvmlibc/NamespaceConstants.h
@@ -6,6 +6,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_LLVMLIBC_NAMESPACECONSTANTS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_LLVMLIBC_NAMESPACECONSTANTS_H
+
#include "llvm/ADT/StringRef.h"
namespace clang::tidy::llvm_libc {
@@ -18,3 +21,5 @@ const static llvm::StringRef RequiredNamespaceDeclMacroName =
"LIBC_NAMESPACE_DECL";
} // namespace clang::tidy::llvm_libc
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_LLVMLIBC_NAMESPACECONSTANTS_H
diff --git a/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h b/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h
index b341d03083c9..5b98d48780eb 100644
--- a/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLE_IDENTIFIER_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLE_IDENTIFIER_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLEIDENTIFIERCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLEIDENTIFIERCHECK_H
#include "../ClangTidyCheck.h"
#include "llvm/ADT/DenseMap.h"
@@ -41,4 +41,4 @@ private:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLE_IDENTIFIER_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLEIDENTIFIERCHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h
index e100509ea261..768b62ef07f9 100644
--- a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h
+++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINEHOSTILERAIICHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINEHOSTILERAIICHECK_H
#include "../ClangTidyCheck.h"
#include "clang/AST/ASTTypeTraits.h"
@@ -50,4 +50,4 @@ private:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINESHOSTILERAIICHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_COROUTINEHOSTILERAIICHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h b/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h
index 0c162cc53ff5..e52fa20460c9 100644
--- a/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_DEFINITIONS_IN_HEADERS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_DEFINITIONS_IN_HEADERS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_DEFINITIONSINHEADERSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_DEFINITIONSINHEADERSCHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/FileExtensionsUtils.h"
@@ -38,4 +38,4 @@ private:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_DEFINITIONS_IN_HEADERS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_DEFINITIONSINHEADERSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h
index 43e1ed894a16..619d8191ab41 100644
--- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_INCLUDECLEANER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_INCLUDECLEANER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_INCLUDECLEANERCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_INCLUDECLEANERCHECK_H
#include "../ClangTidyCheck.h"
#include "../ClangTidyDiagnosticConsumer.h"
@@ -57,4 +57,4 @@ private:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_INCLUDECLEANER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_INCLUDECLEANERCHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h b/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
index ba895b95b9a2..50e70cb8b5b1 100644
--- a/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
+++ b/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONAL_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONAL_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ private:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONAL_H
diff --git a/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h b/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h
index 06b83d567a9d..5c184f886887 100644
--- a/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h
+++ b/clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGIDENTIFIERCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGIDENTIFIERCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGIDENTIFIER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGIDENTIFIER_H
#include "../ClangTidyCheck.h"
@@ -24,4 +24,4 @@ public:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGIDENTIFIERCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGIDENTIFIER_H
diff --git a/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.h b/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.h
index 2b8a05d003fa..5f5a4cfdc675 100644
--- a/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISPLACED_CONST_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISPLACED_CONST_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISPLACEDCONSTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISPLACEDCONSTCHECK_H
#include "../ClangTidyCheck.h"
@@ -28,4 +28,4 @@ public:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISPLACED_CONST_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISPLACEDCONSTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.h b/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.h
index 93c39fc7005c..9c7aff082f8c 100644
--- a/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_NEWDELETEOVERLOADS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_NEWDELETEOVERLOADS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_NEWDELETEOVERLOADSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_NEWDELETEOVERLOADSCHECK_H
#include "../ClangTidyCheck.h"
#include "llvm/ADT/SmallVector.h"
@@ -33,4 +33,4 @@ public:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_NEWDELETEOVERLOADS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_NEWDELETEOVERLOADSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.h b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.h
index 57289c39df22..f1270076ced1 100644
--- a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_REDUNDANT_EXPRESSION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_REDUNDANT_EXPRESSION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_REDUNDANTEXPRESSIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_REDUNDANTEXPRESSIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -33,4 +33,4 @@ private:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_REDUNDANT_EXPRESSION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_REDUNDANTEXPRESSIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h b/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h
index 15c17e7fa8f6..56e4c12e97ed 100644
--- a/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_THROW_BY_VALUE_CATCH_BY_REFERENCE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_THROW_BY_VALUE_CATCH_BY_REFERENCE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_THROWBYVALUECATCHBYREFERENCECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_THROWBYVALUECATCHBYREFERENCECHECK_H
#include "../ClangTidyCheck.h"
@@ -49,4 +49,4 @@ private:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_THROW_BY_VALUE_CATCH_BY_REFERENCE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_THROWBYVALUECATCHBYREFERENCECHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.h b/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.h
index be9e7b971256..941fe7208814 100644
--- a/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_ASSIGNOPERATORSIGNATURECHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_ASSIGNOPERATORSIGNATURECHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNCONVENTIONALASSIGNOPERATORCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNCONVENTIONALASSIGNOPERATORCHECK_H
#include "../ClangTidyCheck.h"
@@ -37,4 +37,4 @@ public:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_ASSIGNOPERATORSIGNATURECHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNCONVENTIONALASSIGNOPERATORCHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.h b/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.h
index ffe82ca989d1..b9d85c139fc4 100644
--- a/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_ALIAS_DECLS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_ALIAS_DECLS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSEDALIASDECLSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSEDALIASDECLSCHECK_H
#include "../ClangTidyCheck.h"
#include "llvm/ADT/DenseMap.h"
@@ -32,4 +32,4 @@ private:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_ALIAS_DECLS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSEDALIASDECLSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.h b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.h
index 877fc4d6503d..fe2cc6e46c34 100644
--- a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_PARAMETERS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_PARAMETERS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSEDPARAMETERSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSEDPARAMETERSCHECK_H
#include "../ClangTidyCheck.h"
@@ -36,4 +36,4 @@ private:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_PARAMETERS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSEDPARAMETERSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h
index 96d8d9da3ceb..986bf37e259e 100644
--- a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_USING_DECLS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_USING_DECLS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSEDUSINGDECLSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSEDUSINGDECLSCHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/FileExtensionsUtils.h"
@@ -56,4 +56,4 @@ private:
} // namespace clang::tidy::misc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_USING_DECLS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSEDUSINGDECLSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h
index 94838cb1b5a7..22e629f3826c 100644
--- a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_AVOID_BIND_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_AVOID_BIND_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_AVOIDBINDCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_AVOIDBINDCHECK_H
#include "../ClangTidyCheck.h"
@@ -34,4 +34,4 @@ private:
};
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_AVOID_BIND_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_AVOIDBINDCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h
index badb2b41f164..015404ee9503 100644
--- a/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_C_HEADERS_TO_CXX_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_C_HEADERS_TO_CXX_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_DEPRECATEDHEADERSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_DEPRECATEDHEADERSCHECK_H
#include "../ClangTidyCheck.h"
@@ -57,4 +57,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_C_HEADERS_TO_CXX_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_DEPRECATEDHEADERSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h b/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
index d495087f4949..ce0d7c04107a 100644
--- a/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
+++ b/clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_INTEGRAL_LITERAL_EXPRESSION_MATCHER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_INTEGRAL_LITERAL_EXPRESSION_MATCHER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_INTEGRALLITERALEXPRESSIONMATCHER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_INTEGRALLITERALEXPRESSIONMATCHER_H
#include <clang/Lex/Token.h>
#include <llvm/ADT/ArrayRef.h>
@@ -73,4 +73,4 @@ private:
} // namespace clang::tidy::modernize
-#endif
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_INTEGRALLITERALEXPRESSIONMATCHER_H
diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.h b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.h
index 55487828ca69..958b4eb4ea2a 100644
--- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOPCONVERTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOPCONVERTCHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/IncludeInserter.h"
@@ -85,4 +85,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOPCONVERTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.h b/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.h
index 0a0db5e6c633..5d0800d8e788 100644
--- a/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.h
+++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOPCONVERTUTILS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOPCONVERTUTILS_H
#include "clang/AST/ASTContext.h"
#include "clang/AST/RecursiveASTVisitor.h"
@@ -466,4 +466,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOPCONVERTUTILS_H
diff --git a/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.h b/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.h
index 063b35fc46d4..4b7f6250a319 100644
--- a/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKE_SHARED_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKE_SHARED_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKESHAREDCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKESHAREDCHECK_H
#include "MakeSmartPtrCheck.h"
@@ -35,4 +35,4 @@ protected:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKE_SHARED_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKESHAREDCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.h b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.h
index 28d5b459dd91..1d70f62d4be4 100644
--- a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKE_SMART_PTR_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKE_SMART_PTR_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKESMARTPTRCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKESMARTPTRCHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/IncludeInserter.h"
@@ -64,4 +64,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKE_SMART_PTR_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKESMARTPTRCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.h b/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.h
index 9c4f6bc74639..170343b9fca2 100644
--- a/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKE_UNIQUE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKE_UNIQUE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKEUNIQUECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKEUNIQUECHECK_H
#include "MakeSmartPtrCheck.h"
@@ -37,4 +37,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKE_UNIQUE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MAKEUNIQUECHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.h b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.h
index f27871c1a98b..eb51f4a4c46a 100644
--- a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_PASS_BY_VALUE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_PASS_BY_VALUE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_PASSBYVALUECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_PASSBYVALUECHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/IncludeInserter.h"
@@ -33,4 +33,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_PASS_BY_VALUE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_PASSBYVALUECHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h
index 8ce6ec0bef63..5be38dd9dc5b 100644
--- a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAWSTRINGLITERALCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAWSTRINGLITERALCHECK_H
#include "../ClangTidyCheck.h"
#include <bitset>
@@ -40,4 +40,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAWSTRINGLITERALCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.h b/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.h
index 53de74b68ff2..d6edd9950dda 100644
--- a/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REDUNDANT_VOID_ARG_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REDUNDANT_VOID_ARG_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REDUNDANTVOIDARGCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REDUNDANTVOIDARGCHECK_H
#include "../ClangTidyCheck.h"
#include "clang/Lex/Token.h"
@@ -73,4 +73,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REDUNDANT_VOID_ARG_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REDUNDANTVOIDARGCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h
index 9a6e2bb0e074..18f4740567d5 100644
--- a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACE_AUTO_PTR_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACE_AUTO_PTR_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACEAUTOPTRCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACEAUTOPTRCHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/IncludeInserter.h"
@@ -56,4 +56,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACE_AUTO_PTR_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACEAUTOPTRCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.h b/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.h
index 5f2be10ca66b..3ffa3878bc42 100644
--- a/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACE_RANDOM_SHUFFLE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACE_RANDOM_SHUFFLE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACERANDOMSHUFFLECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACERANDOMSHUFFLECHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/IncludeInserter.h"
@@ -37,4 +37,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACE_RANDOM_SHUFFLE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_REPLACERANDOMSHUFFLECHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.h b/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.h
index ef465ea5e189..be785716611a 100644
--- a/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RETURN_BRACED_INIT_LIST_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RETURN_BRACED_INIT_LIST_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RETURNBRACEDINITLISTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RETURNBRACEDINITLISTCHECK_H
#include "../ClangTidyCheck.h"
@@ -34,4 +34,4 @@ public:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RETURN_BRACED_INIT_LIST_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RETURNBRACEDINITLISTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.h b/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.h
index 95611c9b13e7..ebe77b986d8a 100644
--- a/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_UNARY_STATIC_ASSERT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_UNARY_STATIC_ASSERT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_UNARYSTATICASSERTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_UNARYSTATICASSERTCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ public:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_UNARY_STATIC_ASSERT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_UNARYSTATICASSERTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.h b/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.h
index dc39077d5ac9..85e87fe918e3 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_AUTO_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_AUTO_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEAUTOCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEAUTOCHECK_H
#include "../ClangTidyCheck.h"
@@ -35,4 +35,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_AUTO_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEAUTOCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.h b/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.h
index 64aff84b1be6..95bce0791258 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_BOOL_LITERALS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_BOOL_LITERALS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEBOOLLITERALSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEBOOLLITERALSCHECK_H
#include "../ClangTidyCheck.h"
@@ -33,4 +33,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_BOOL_LITERALS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEBOOLLITERALSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.h b/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.h
index be6a18ad66d9..f37921430871 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_DEFAULT_MEMBER_INIT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_DEFAULT_MEMBER_INIT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEDEFAULTMEMBERINITCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEDEFAULTMEMBERINITCHECK_H
#include "../ClangTidyCheck.h"
@@ -44,4 +44,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_DEFAULT_MEMBER_INIT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEDEFAULTMEMBERINITCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.h b/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.h
index 87ebf6ff98c2..a7ad5bb166b6 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_EMPLACE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_EMPLACE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEEMPLACECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEEMPLACECHECK_H
#include "../ClangTidyCheck.h"
#include <string>
@@ -45,4 +45,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_EMPLACE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEEMPLACECHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.h b/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.h
index 519f1899170c..a17d3d894e3c 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_EQUALS_DEFAULT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_EQUALS_DEFAULT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEEQUALSDEFAULTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEEQUALSDEFAULTCHECK_H
#include "../ClangTidyCheck.h"
@@ -48,4 +48,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_EQUALS_DEFAULT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEEQUALSDEFAULTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.h b/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.h
index 31a956bc49c5..17155febbd37 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_EQUALS_DELETE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_EQUALS_DELETE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEEQUALSDELETECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEEQUALSDELETECHECK_H
#include "../ClangTidyCheck.h"
@@ -38,4 +38,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_EQUALS_DELETE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEEQUALSDELETECHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.h b/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.h
index 30b5d4ecd1cf..a97b39bf54c4 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_NOEXCEPT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_NOEXCEPT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USENOEXCEPTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USENOEXCEPTCHECK_H
#include "../ClangTidyCheck.h"
@@ -44,4 +44,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_NOEXCEPT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USENOEXCEPTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h
index 7c7b5ae02f1c..1caa07afe352 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_NULLPTR_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_NULLPTR_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USENULLPTRCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USENULLPTRCHECK_H
#include "../ClangTidyCheck.h"
@@ -31,4 +31,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_NULLPTR_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USENULLPTRCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.h b/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.h
index f5b3f719c56c..18cff9aa962b 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_USESTDPRINTCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_USESTDPRINTCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USESTDPRINTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USESTDPRINTCHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/IncludeInserter.h"
@@ -48,4 +48,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_USESTDPRINTCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USESTDPRINTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.h b/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.h
index 0af729b54cfc..936eaf1b2306 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_TRANSPARENT_FUNCTORS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_TRANSPARENT_FUNCTORS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USETRANSPARENTFUNCTORSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USETRANSPARENTFUNCTORSCHECK_H
#include "../ClangTidyCheck.h"
@@ -33,4 +33,4 @@ private:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_TRANSPARENT_FUNCTORS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USETRANSPARENTFUNCTORSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.h b/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.h
index 772133d492a9..09b0ba517caa 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseUncaughtExceptionsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_UNCAUGHT_EXCEPTIONS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_UNCAUGHT_EXCEPTIONS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEUNCAUGHTEXCEPTIONSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEUNCAUGHTEXCEPTIONSCHECK_H
#include "../ClangTidyCheck.h"
@@ -33,4 +33,4 @@ public:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_UNCAUGHT_EXCEPTIONS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEUNCAUGHTEXCEPTIONSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.h b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.h
index 5ecabc7a17a4..60813cd04c66 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.h
+++ b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_USING_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_USING_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEUSINGCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEUSINGCHECK_H
#include "../ClangTidyCheck.h"
@@ -40,4 +40,4 @@ public:
} // namespace clang::tidy::modernize
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USE_USING_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEUSINGCHECK_H
diff --git a/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h b/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h
index 07ee68a55b6b..a44ef31a683a 100644
--- a/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h
+++ b/clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_BUFFER_DEREF_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_BUFFER_DEREF_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_BUFFERDEREFCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_BUFFERDEREFCHECK_H
#include "../ClangTidyCheck.h"
#include "clang/StaticAnalyzer/Checkers/MPIFunctionClassifier.h"
@@ -48,4 +48,4 @@ private:
} // namespace clang::tidy::mpi
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_BUFFER_DEREF_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_BUFFERDEREFCHECK_H
diff --git a/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h b/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h
index 5a7db1781996..043f99ee84bd 100644
--- a/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h
+++ b/clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_TYPE_MISMATCH_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_TYPE_MISMATCH_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_TYPEMISMATCHCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_TYPEMISMATCHCHECK_H
#include "../ClangTidyCheck.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
@@ -49,4 +49,4 @@ private:
} // namespace clang::tidy::mpi
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_TYPE_MISMATCH_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MPI_TYPEMISMATCHCHECK_H
diff --git a/clang-tools-extra/clang-tidy/objc/AssertEquals.h b/clang-tools-extra/clang-tidy/objc/AssertEquals.h
index 8c21f9bd3a75..140506560d9c 100644
--- a/clang-tools-extra/clang-tidy/objc/AssertEquals.h
+++ b/clang-tools-extra/clang-tidy/objc/AssertEquals.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef THIRD_PARTY_LLVM_LLVM_PROJECT_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_OBJCASSERTEQUALS_H_
-#define THIRD_PARTY_LLVM_LLVM_PROJECT_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_OBJCASSERTEQUALS_H_
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_ASSERTEQUALS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_ASSERTEQUALS_H
#include "../ClangTidyCheck.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
@@ -32,4 +32,4 @@ public:
} // namespace clang::tidy::objc
-#endif // THIRD_PARTY_LLVM_LLVM_PROJECT_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_OBJCASSERTEQUALS_H_
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_ASSERTEQUALS_H
diff --git a/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h b/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h
index 2d238690d627..6e0a12a00dd5 100644
--- a/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h
+++ b/clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_FORBIDDEN_SUBCLASSING_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_FORBIDDEN_SUBCLASSING_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_FORBIDDENSUBCLASSINGCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_FORBIDDENSUBCLASSINGCHECK_H
#include "../ClangTidyCheck.h"
#include "llvm/ADT/StringRef.h"
@@ -36,4 +36,4 @@ private:
} // namespace clang::tidy::objc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_FORBIDDEN_SUBCLASSING_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_FORBIDDENSUBCLASSINGCHECK_H
diff --git a/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.h b/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.h
index daaebb11673a..1e185b910cd0 100644
--- a/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.h
+++ b/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_PROPERTY_DECLARATION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_PROPERTY_DECLARATION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_PROPERTYDECLARATIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_PROPERTYDECLARATIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -34,4 +34,4 @@ public:
} // namespace clang::tidy::objc
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_PROPERTY_DECLARATION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_OBJC_PROPERTYDECLARATIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.h b/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.h
index 2452d2e66ecd..74067c1f5792 100644
--- a/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.h
+++ b/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_FASTER_STRING_FIND_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_FASTER_STRING_FIND_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_FASTERSTRINGFINDCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_FASTERSTRINGFINDCHECK_H
#include "../ClangTidyCheck.h"
@@ -38,4 +38,4 @@ private:
} // namespace clang::tidy::performance
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_FASTER_STRING_FIND_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_FASTERSTRINGFINDCHECK_H
diff --git a/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h b/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h
index 786081a35107..4690caa8b523 100644
--- a/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h
+++ b/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_IMPLICIT_CONVERSION_IN_LOOP_CHECK_H_
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_IMPLICIT_CONVERSION_IN_LOOP_CHECK_H_
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_IMPLICITCONVERSIONINLOOPCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_IMPLICITCONVERSIONINLOOPCHECK_H
#include "../ClangTidyCheck.h"
@@ -33,4 +33,4 @@ private:
} // namespace clang::tidy::performance
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_IMPLICIT_CONVERSION_IN_LOOP_CHECK_H_
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_IMPLICITCONVERSIONINLOOPCHECK_H
diff --git a/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.h b/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.h
index b82a838e737d..0fae10f2a5f1 100644
--- a/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.h
+++ b/clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENTSTRINGCONCATENATION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENTSTRINGCONCATENATION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENTSTRINGCONCATENATIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENTSTRINGCONCATENATIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -35,4 +35,4 @@ private:
} // namespace clang::tidy::performance
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENTSTRINGCONCATENATION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENTSTRINGCONCATENATIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h b/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h
index 18f7c1937edf..5f3b88f51d62 100644
--- a/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h
+++ b/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENT_VECTOR_OPERATION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENT_VECTOR_OPERATION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENTVECTOROPERATIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENTVECTOROPERATIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -44,4 +44,4 @@ private:
} // namespace clang::tidy::performance
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENT_VECTOR_OPERATION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_INEFFICIENTVECTOROPERATIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h
index 9f67f6485716..ff1d67ba77a9 100644
--- a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h
+++ b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MOVECONSTANTARGUMENTCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MOVECONSTANTARGUMENTCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_MOVECONSTARGCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_MOVECONSTARGCHECK_H
#include "../ClangTidyCheck.h"
#include "llvm/ADT/DenseSet.h"
@@ -43,4 +43,4 @@ private:
} // namespace clang::tidy::performance
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MOVECONSTANTARGUMENTCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_MOVECONSTARGCHECK_H
diff --git a/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.h b/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.h
index 56a1e4af010a..6ed30255e0f9 100644
--- a/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.h
+++ b/clang-tools-extra/clang-tidy/performance/NoexceptFunctionBaseCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_NOEXCEPTFUNCTIONCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_NOEXCEPTFUNCTIONCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_NOEXCEPTFUNCTIONBASECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_NOEXCEPTFUNCTIONBASECHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/ExceptionSpecAnalyzer.h"
@@ -46,4 +46,4 @@ private:
} // namespace clang::tidy::performance
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_NOEXCEPTFUNCTIONCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_NOEXCEPTFUNCTIONBASECHECK_H
diff --git a/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.h b/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.h
index cf74f8000627..21a7f4d040cd 100644
--- a/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.h
+++ b/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_TYPE_PROMOTION_IN_MATH_FN_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_TYPE_PROMOTION_IN_MATH_FN_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_TYPEPROMOTIONINMATHFNCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_TYPEPROMOTIONINMATHFNCHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/IncludeInserter.h"
@@ -39,4 +39,4 @@ private:
} // namespace clang::tidy::performance
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_TYPE_PROMOTION_IN_MATH_FN_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_TYPEPROMOTIONINMATHFNCHECK_H
diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.h b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.h
index 66231889b801..5edc744003c3 100644
--- a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.h
+++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARY_COPY_INITIALIZATION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARY_COPY_INITIALIZATION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARYCOPYINITIALIZATION_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARYCOPYINITIALIZATION_H
#include "../ClangTidyCheck.h"
#include "clang/AST/Decl.h"
@@ -64,4 +64,4 @@ private:
} // namespace clang::tidy::performance
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARY_COPY_INITIALIZATION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARYCOPYINITIALIZATION_H
diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h
index d59fb4105381..22df689298fb 100644
--- a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h
+++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARY_VALUE_PARAM_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARY_VALUE_PARAM_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARYVALUEPARAMCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARYVALUEPARAMCHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/IncludeInserter.h"
@@ -51,4 +51,4 @@ private:
} // namespace clang::tidy::performance
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARY_VALUE_PARAM_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PERFORMANCE_UNNECESSARYVALUEPARAMCHECK_H
diff --git a/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h b/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h
index e37f89336bc9..0dd640224f01 100644
--- a/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h
+++ b/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_RESTRICTINCLUDESSCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_RESTRICTINCLUDESSCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_RESTRICTSYSTEMINCLUDESCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_RESTRICTSYSTEMINCLUDESCHECK_H
#include "../ClangTidyCheck.h"
#include "../GlobList.h"
@@ -79,4 +79,4 @@ private:
} // namespace clang::tidy::portability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_RESTRICTINCLUDESSCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_RESTRICTSYSTEMINCLUDESCHECK_H
diff --git a/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.h b/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.h
index db2d2307b194..addcecbcb937 100644
--- a/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.h
+++ b/clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_SIMD_INTRINSICS_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_SIMD_INTRINSICS_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_SIMDINTRINSICSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_SIMDINTRINSICSCHECK_H
#include "../ClangTidyCheck.h"
@@ -37,4 +37,4 @@ private:
} // namespace clang::tidy::portability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_SIMD_INTRINSICS_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_PORTABILITY_SIMDINTRINSICSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.h b/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.h
index 1dd28fde217e..64905085d7de 100644
--- a/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.h
+++ b/clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOID_CONST_PARAMS_IN_DECLS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOID_CONST_PARAMS_IN_DECLS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOIDCONSTPARAMSINDECLS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOIDCONSTPARAMSINDECLS_H
#include "../ClangTidyCheck.h"
@@ -34,4 +34,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOID_CONST_PARAMS_IN_DECLS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOIDCONSTPARAMSINDECLS_H
diff --git a/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.h b/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.h
index 260c84304e13..0e729ecb0134 100644
--- a/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/AvoidNestedConditionalOperatorCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOID_NESTED_CONDITIONAL_OPERATOR_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOID_NESTED_CONDITIONAL_OPERATOR_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOIDNESTEDCONDITIONALOPERATORCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOIDNESTEDCONDITIONALOPERATORCHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ public:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOID_NESTED_CONDITIONAL_OPERATOR_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_AVOIDNESTEDCONDITIONALOPERATORCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h
index 2aab03f1a896..20cb7f212d11 100644
--- a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h
+++ b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMFUNCTOSTATIC_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMFUNCTOSTATIC_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMBERFUNCTIONSTOSTATIC_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMBERFUNCTIONSTOSTATIC_H
#include "../ClangTidyCheck.h"
@@ -32,4 +32,4 @@ public:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMFUNCTOSTATIC_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMBERFUNCTIONSTOSTATIC_H
diff --git a/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h b/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h
index 52b1b2625e40..b346f6856277 100644
--- a/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DELETE_NULL_POINTER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DELETE_NULL_POINTER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DELETENULLPOINTERCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DELETENULLPOINTERCHECK_H
#include "../ClangTidyCheck.h"
@@ -34,4 +34,4 @@ public:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DELETE_NULL_POINTER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DELETENULLPOINTERCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h
index 297999cf4f92..ca3679108e60 100644
--- a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATE_INCLUDE_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATE_INCLUDE_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATEINCLUDECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATEINCLUDECHECK_H
#include "../ClangTidyCheck.h"
@@ -28,4 +28,4 @@ public:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATE_INCLUDE_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATEINCLUDECHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h
index f88ceb1dd5a0..101089ccfb2e 100644
--- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_IMPLICIT_BOOL_CONVERSION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_IMPLICIT_BOOL_CONVERSION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_IMPLICITBOOLCONVERSIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_IMPLICITBOOLCONVERSIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -41,4 +41,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_IMPLICIT_BOOL_CONVERSION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_IMPLICITBOOLCONVERSIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.h b/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.h
index 289e131d0d97..32218e1ffc1c 100644
--- a/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_INCONSISTENT_DECLARATION_PARAMETER_NAME_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_INCONSISTENT_DECLARATION_PARAMETER_NAME_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_INCONSISTENTDECLARATIONPARAMETERNAMECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_INCONSISTENTDECLARATIONPARAMETERNAMECHECK_H
#include "../ClangTidyCheck.h"
@@ -45,4 +45,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_INCONSISTENT_DECLARATION_PARAMETER_NAME_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_INCONSISTENTDECLARATIONPARAMETERNAMECHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.h b/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.h
index 750b4d887de5..0bf22e5c518f 100644
--- a/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_ISOLATEDECLCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_ISOLATEDECLCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_ISOLATEDECLARATIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_ISOLATEDECLARATIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -28,4 +28,4 @@ public:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_ISOLATEDECLCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_ISOLATEDECLARATIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h b/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h
index 8347f1a3611d..edd2b1a1ff73 100644
--- a/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISLEADING_INDENTATION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISLEADING_INDENTATION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISLEADINGINDENTATIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISLEADINGINDENTATIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -38,4 +38,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISLEADING_INDENTATION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISLEADINGINDENTATIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.h b/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.h
index 0a6e0c8fb25a..f0c565b1d737 100644
--- a/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISPLACED_ARRAY_INDEX_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISPLACED_ARRAY_INDEX_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISPLACEDARRAYINDEXCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISPLACEDARRAYINDEXCHECK_H
#include "../ClangTidyCheck.h"
@@ -28,4 +28,4 @@ public:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISPLACED_ARRAY_INDEX_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MISPLACEDARRAYINDEXCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.h b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.h
index b0156183c0b8..7dcb16e4253b 100644
--- a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_NON_CONST_PARAMETER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_NON_CONST_PARAMETER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_NONCONSTPARAMETERCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_NONCONSTPARAMETERCHECK_H
#include "../ClangTidyCheck.h"
@@ -59,4 +59,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_NON_CONST_PARAMETER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_NONCONSTPARAMETERCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.h
index 3018b1f8d14e..fde305039d4c 100644
--- a/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_CONTROL_FLOW_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_CONTROL_FLOW_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTCONTROLFLOWCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTCONTROLFLOWCHECK_H
#include "../ClangTidyCheck.h"
@@ -47,4 +47,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_CONTROL_FLOW_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTCONTROLFLOWCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.h
index b22cef9a2b77..9b1b09f914a0 100644
--- a/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_DECLARATION_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_DECLARATION_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTDECLARATIONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTDECLARATIONCHECK_H
#include "../ClangTidyCheck.h"
@@ -30,4 +30,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_DECLARATION_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTDECLARATIONCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.h
index 5c82a5e02645..49cbf69c06f3 100644
--- a/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/RedundantFunctionPtrDereferenceCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_FUNCTION_PTR_DEREFERENCE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_FUNCTION_PTR_DEREFERENCE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTFUNCTIONPTRDEREFERENCECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTFUNCTIONPTRDEREFERENCECHECK_H
#include "../ClangTidyCheck.h"
@@ -28,4 +28,4 @@ public:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_FUNCTION_PTR_DEREFERENCE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTFUNCTIONPTRDEREFERENCECHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.h
index 64d365d1e3f4..ff8b02d141a4 100644
--- a/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_MEMBER_INIT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_MEMBER_INIT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTMEMBERINITCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTMEMBERINITCHECK_H
#include "../ClangTidyCheck.h"
@@ -40,4 +40,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_MEMBER_INIT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTMEMBERINITCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.h
index 853ea2fcd031..5c4b744c6445 100644
--- a/clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_STRING_INIT_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_STRING_INIT_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTSTRINGINITCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTSTRINGINITCHECK_H
#include "../ClangTidyCheck.h"
#include <string>
@@ -32,4 +32,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANT_STRING_INIT_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_REDUNDANTSTRINGINITCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h
index 466bc411bf80..99520d76c6c6 100644
--- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_SIMPLIFY_BOOLEAN_EXPR_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_SIMPLIFY_BOOLEAN_EXPR_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_SIMPLIFYBOOLEANEXPRCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_SIMPLIFYBOOLEANEXPRCHECK_H
#include "../ClangTidyCheck.h"
@@ -75,4 +75,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_SIMPLIFY_BOOLEAN_EXPR_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_SIMPLIFYBOOLEANEXPRCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h b/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h
index c376806d0009..38a2ea641975 100644
--- a/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATIC_ACCESSED_THROUGH_INSTANCE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATIC_ACCESSED_THROUGH_INSTANCE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATICACCESSEDTHROUGHINSTANCECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATICACCESSEDTHROUGHINSTANCECHECK_H
#include "../ClangTidyCheck.h"
@@ -41,4 +41,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATIC_ACCESSED_THROUGH_INSTANCE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATICACCESSEDTHROUGHINSTANCECHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.h b/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.h
index 55306556fb0a..e096682ad031 100644
--- a/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATIC_DEFINITION_IN_ANONYMOUS_NAMESPACE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATIC_DEFINITION_IN_ANONYMOUS_NAMESPACE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATICDEFINITIONINANONYMOUSNAMESPACECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATICDEFINITIONINANONYMOUSNAMESPACECHECK_H
#include "../ClangTidyCheck.h"
@@ -34,4 +34,4 @@ public:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATIC_DEFINITION_IN_ANONYMOUS_NAMESPACE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STATICDEFINITIONINANONYMOUSNAMESPACECHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.h b/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.h
index ab6449e3fd41..28742087bad5 100644
--- a/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_UNIQUEPTR_DELETE_RELEASE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_UNIQUEPTR_DELETE_RELEASE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_UNIQUEPTRDELETERELEASECHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_UNIQUEPTRDELETERELEASECHECK_H
#include "../ClangTidyCheck.h"
@@ -37,4 +37,4 @@ private:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_UNIQUEPTR_DELETE_RELEASE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_UNIQUEPTRDELETERELEASECHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.h b/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.h
index f431311b4282..32983e48450f 100644
--- a/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_USEALGORITHMCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_USEALGORITHMCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_USEANYOFALLOFCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_USEANYOFALLOFCHECK_H
#include "../ClangTidyCheck.h"
#include "../utils/IncludeInserter.h"
@@ -33,4 +33,4 @@ public:
} // namespace clang::tidy::readability
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_USEALGORITHMCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_USEANYOFALLOFCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.cpp b/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.cpp
index 40aaff4cb389..ef495d3bf0f6 100644
--- a/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/UseConcisePreprocessorDirectivesCheck.cpp
@@ -91,7 +91,10 @@ private:
Check.diag(
DirectiveLoc,
"preprocessor condition can be written more concisely using '#%0'")
- << FixItHint::CreateReplacement(DirectiveLoc, Replacements[Inverted])
+ << FixItHint::CreateReplacement(
+ CharSourceRange::getCharRange(DirectiveLoc,
+ ConditionRange.getBegin()),
+ (Replacements[Inverted].str() + " "))
<< FixItHint::CreateReplacement(ConditionRange, Macro)
<< Replacements[Inverted];
}
diff --git a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.h b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.h
index f86828e8c46e..44b7a379e527 100644
--- a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.h
+++ b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.h
@@ -14,8 +14,13 @@
///
//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_TOOL_CLANGTIDYMAIN_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_TOOL_CLANGTIDYMAIN_H
+
namespace clang::tidy {
int clangTidyMain(int argc, const char **argv);
} // namespace clang::tidy
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_TOOL_CLANGTIDYMAIN_H
diff --git a/clang-tools-extra/clang-tidy/utils/ASTUtils.h b/clang-tools-extra/clang-tidy/utils/ASTUtils.h
index c2127f074698..808cd4a54fd1 100644
--- a/clang-tools-extra/clang-tidy/utils/ASTUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/ASTUtils.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ASTUTILS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ASTUTILS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_ASTUTILS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_ASTUTILS_H
#include "clang/AST/AST.h"
@@ -47,4 +47,4 @@ findOutermostIndirectFieldDeclForField(const FieldDecl *FD);
} // namespace clang::tidy::utils
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ASTUTILS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_ASTUTILS_H
diff --git a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h
index 53ce2e0ea859..879c84d108d7 100644
--- a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h
+++ b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h
@@ -11,6 +11,9 @@
///
//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_BRACESAROUNDSTATEMENT_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_BRACESAROUNDSTATEMENT_H
+
#include "clang/AST/Stmt.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceLocation.h"
@@ -73,3 +76,5 @@ getBraceInsertionsHints(const Stmt *S, const LangOptions &LangOpts,
SourceLocation EndLocHint = SourceLocation());
} // namespace clang::tidy::utils
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_BRACESAROUNDSTATEMENT_H
diff --git a/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.h b/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.h
index 910960137ddb..1960eabf074c 100644
--- a/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.h
+++ b/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.h
@@ -11,6 +11,9 @@
///
//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_DESIGNATEDINITIALIZERS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_DESIGNATEDINITIALIZERS_H
+
#include "clang/AST/Expr.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
@@ -40,3 +43,5 @@ llvm::DenseMap<clang::SourceLocation, std::string>
getUnwrittenDesignators(const clang::InitListExpr *Syn);
} // namespace clang::tidy::utils
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_DESIGNATEDINITIALIZERS_H
diff --git a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.h b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.h
index 1ab6dcb2eb25..1a277c8a6d3b 100644
--- a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.h
+++ b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTION_ANALYZER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTION_ANALYZER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTIONANALYZER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTIONANALYZER_H
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
@@ -158,4 +158,4 @@ private:
} // namespace clang::tidy::utils
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTION_ANALYZER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTIONANALYZER_H
diff --git a/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.h b/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.h
index 3fd6fe170c73..06d11c888a0c 100644
--- a/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.h
+++ b/clang-tools-extra/clang-tidy/utils/ExceptionSpecAnalyzer.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTION_SPEC_ANALYZER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTION_SPEC_ANALYZER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTIONSPECANALYZER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTIONSPECANALYZER_H
#include "clang/AST/DeclCXX.h"
#include "llvm/ADT/DenseMap.h"
@@ -86,4 +86,4 @@ private:
} // namespace clang::tidy::utils
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTION_SPEC_ANALYZER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXCEPTIONSPECANALYZER_H
diff --git a/clang-tools-extra/clang-tidy/utils/ExprSequence.h b/clang-tools-extra/clang-tidy/utils/ExprSequence.h
index 9ef94e0e3bcd..2aea99e1440c 100644
--- a/clang-tools-extra/clang-tidy/utils/ExprSequence.h
+++ b/clang-tools-extra/clang-tidy/utils/ExprSequence.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_EXPRSEQUENCE_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_EXPRSEQUENCE_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXPRSEQUENCE_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXPRSEQUENCE_H
#include "clang/Analysis/CFG.h"
#include "clang/Lex/Lexer.h"
@@ -117,4 +117,4 @@ private:
} // namespace clang::tidy::utils
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_EXPRSEQUENCE_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_EXPRSEQUENCE_H
diff --git a/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.h b/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.h
index dfab141e3241..425c29c3b3b1 100644
--- a/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_FILE_EXTENSIONS_UTILS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_FILE_EXTENSIONS_UTILS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_FILEEXTENSIONSUTILS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_FILEEXTENSIONSUTILS_H
#include "../FileExtensionsSet.h"
#include "clang/Basic/SourceLocation.h"
@@ -60,4 +60,4 @@ bool isFileExtension(StringRef FileName,
} // namespace clang::tidy::utils
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_FILE_EXTENSIONS_UTILS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_FILEEXTENSIONSUTILS_H
diff --git a/clang-tools-extra/clang-tidy/utils/IncludeInserter.h b/clang-tools-extra/clang-tidy/utils/IncludeInserter.h
index f6ca7d63632d..9dbf2a76369c 100644
--- a/clang-tools-extra/clang-tidy/utils/IncludeInserter.h
+++ b/clang-tools-extra/clang-tidy/utils/IncludeInserter.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDEINSERTER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDEINSERTER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_INCLUDEINSERTER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_INCLUDEINSERTER_H
#include "IncludeSorter.h"
#include "clang/Basic/Diagnostic.h"
@@ -100,4 +100,4 @@ private:
} // namespace tidy::utils
} // namespace clang
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDEINSERTER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_INCLUDEINSERTER_H
diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.h b/clang-tools-extra/clang-tidy/utils/IncludeSorter.h
index 66830ee7f1ef..6efec976847b 100644
--- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.h
+++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDESORTER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDESORTER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_INCLUDESORTER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_INCLUDESORTER_H
#include "../ClangTidyCheck.h"
#include <optional>
@@ -73,4 +73,4 @@ template <> struct OptionEnumMapping<utils::IncludeSorter::IncludeStyle> {
getEnumMapping();
};
} // namespace clang::tidy
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDESORTER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_INCLUDESORTER_H
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
index b76a37874b51..c74d1e02b931 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_LEXER_UTILS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_LEXER_UTILS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_LEXERUTILS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_LEXERUTILS_H
#include "clang/AST/ASTContext.h"
#include "clang/Basic/TokenKinds.h"
@@ -130,4 +130,4 @@ SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
} // namespace tidy::utils::lexer
} // namespace clang
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_LEXER_UTILS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_LEXERUTILS_H
diff --git a/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.h b/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.h
index 497b67e82a90..fbf0ade3b0cc 100644
--- a/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.h
+++ b/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NAMESPACEALIASER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NAMESPACEALIASER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_NAMESPACEALIASER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_NAMESPACEALIASER_H
#include "clang/AST/ASTContext.h"
#include "clang/AST/Stmt.h"
@@ -45,4 +45,4 @@ private:
} // namespace clang::tidy::utils
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NAMESPACEALIASER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_NAMESPACEALIASER_H
diff --git a/clang-tools-extra/clang-tidy/utils/OptionsUtils.h b/clang-tools-extra/clang-tidy/utils/OptionsUtils.h
index aec24ab0a84b..3a123484fae6 100644
--- a/clang-tools-extra/clang-tidy/utils/OptionsUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/OptionsUtils.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_OPTIONUTILS_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_OPTIONUTILS_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_OPTIONSUTILS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_OPTIONSUTILS_H
#include "clang/Basic/LLVM.h"
#include <string>
@@ -26,4 +26,4 @@ std::string serializeStringList(ArrayRef<StringRef> Strings);
} // namespace clang::tidy::utils::options
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_OPTIONUTILS_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_OPTIONSUTILS_H
diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h
index b38bc082644c..fe1059d5e5b8 100644
--- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h
+++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_RENAMERCLANGTIDYCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_RENAMERCLANGTIDYCHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_RENAMERCLANGTIDYCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_RENAMERCLANGTIDYCHECK_H
#include "../ClangTidyCheck.h"
#include "llvm/ADT/DenseMap.h"
@@ -167,4 +167,4 @@ private:
} // namespace tidy
} // namespace clang
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_RENAMERCLANGTIDYCHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_RENAMERCLANGTIDYCHECK_H
diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h
index ad20fbd47575..e77f84b0cdc1 100644
--- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h
+++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_TRANSFORMER_CLANG_TIDY_CHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_TRANSFORMER_CLANG_TIDY_CHECK_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_TRANSFORMERCLANGTIDYCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_TRANSFORMERCLANGTIDYCHECK_H
#include "../ClangTidyCheck.h"
#include "IncludeInserter.h"
@@ -83,4 +83,4 @@ private:
} // namespace clang::tidy::utils
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_TRANSFORMER_CLANG_TIDY_CHECK_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_TRANSFORMERCLANGTIDYCHECK_H
diff --git a/clang-tools-extra/clang-tidy/utils/UsingInserter.h b/clang-tools-extra/clang-tidy/utils/UsingInserter.h
index 23c317581c19..3e943569047a 100644
--- a/clang-tools-extra/clang-tidy/utils/UsingInserter.h
+++ b/clang-tools-extra/clang-tidy/utils/UsingInserter.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_USINGINSERTER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_USINGINSERTER_H
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_USINGINSERTER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_USINGINSERTER_H
#include "clang/AST/Decl.h"
#include "clang/AST/Stmt.h"
@@ -43,4 +43,4 @@ private:
};
} // namespace clang::tidy::utils
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_USINGINSERTER_H
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_USINGINSERTER_H
diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index 0f765e96fb15..f8e6da73bbb1 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -456,7 +456,6 @@ private:
ClangdLSPServer &Server;
};
-constexpr int ClangdLSPServer::MessageHandler::MaxReplayCallbacks;
// call(), notify(), and reply() wrap the Transport, adding logging and locking.
void ClangdLSPServer::callMethod(StringRef Method, llvm::json::Value Params,
diff --git a/clang-tools-extra/clangd/FileDistance.cpp b/clang-tools-extra/clangd/FileDistance.cpp
index 06c1a8bc92a8..d587c26a8214 100644
--- a/clang-tools-extra/clangd/FileDistance.cpp
+++ b/clang-tools-extra/clangd/FileDistance.cpp
@@ -54,7 +54,6 @@ static llvm::SmallString<128> canonicalize(llvm::StringRef Path) {
return Result;
}
-constexpr const unsigned FileDistance::Unreachable;
const llvm::hash_code FileDistance::RootHash =
llvm::hash_value(llvm::StringRef("/"));
diff --git a/clang-tools-extra/clangd/FuzzyMatch.cpp b/clang-tools-extra/clangd/FuzzyMatch.cpp
index de7280d80361..cf5182bc1b2d 100644
--- a/clang-tools-extra/clangd/FuzzyMatch.cpp
+++ b/clang-tools-extra/clangd/FuzzyMatch.cpp
@@ -62,9 +62,6 @@
namespace clang {
namespace clangd {
-constexpr int FuzzyMatcher::MaxPat;
-constexpr int FuzzyMatcher::MaxWord;
-
static char lower(char C) { return C >= 'A' && C <= 'Z' ? C + ('a' - 'A') : C; }
// A "negative infinity" score that won't overflow.
// We use this to mark unreachable states and forbidden solutions.
diff --git a/clang-tools-extra/clangd/index/SymbolLocation.cpp b/clang-tools-extra/clangd/index/SymbolLocation.cpp
index 61da267b93ce..058cb1e0945f 100644
--- a/clang-tools-extra/clangd/index/SymbolLocation.cpp
+++ b/clang-tools-extra/clangd/index/SymbolLocation.cpp
@@ -11,9 +11,6 @@
namespace clang {
namespace clangd {
-constexpr uint32_t SymbolLocation::Position::MaxLine;
-constexpr uint32_t SymbolLocation::Position::MaxColumn;
-
void SymbolLocation::Position::setLine(uint32_t L) {
if (L > MaxLine)
L = MaxLine;
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 8ba1d1a78bf9..92a2d33d8fa1 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -284,6 +284,11 @@ New check aliases
<clang-tidy/checks/bugprone/raw-memory-call-on-non-trivial-type>`
keeping initial check as an alias to the new one.
+- Renamed :doc:`cert-oop58-cpp <clang-tidy/checks/cert/oop58-cpp>` to
+ :doc:`bugprone-copy-constructor-mutates-argument
+ <clang-tidy/checks/bugprone/copy-constructor-mutates-argument>`
+ keeping initial check as an alias to the new one.
+
Changes in existing checks
^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -480,6 +485,10 @@ Changes in existing checks
<clang-tidy/checks/readability/uppercase-literal-suffix>` check to recognize
literal suffixes added in C++23 and C23.
+- Improved :doc:`readability-use-concise-preprocessor-directives
+ <clang-tidy/checks/readability/use-concise-preprocessor-directives>` check to
+ generate correct fix-its for forms without a space after the directive.
+
Removed checks
^^^^^^^^^^^^^^
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/copy-constructor-mutates-argument.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/copy-constructor-mutates-argument.rst
new file mode 100644
index 000000000000..28e5015beeaa
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/copy-constructor-mutates-argument.rst
@@ -0,0 +1,11 @@
+.. title:: clang-tidy - bugprone-copy-constructor-mutates-argument
+
+bugprone-copy-constructor-mutates-argument
+==========================================
+
+Finds assignments to the copied object and its direct or indirect members
+in copy constructors and copy assignment operators.
+
+This check corresponds to the CERT C Coding Standard rule
+`OOP58-CPP. Copy operations must not mutate the source object
+<https://wiki.sei.cmu.edu/confluence/display/cplusplus/OOP58-CPP.+Copy+operations+must+not+mutate+the+source+object>`_. \ No newline at end of file
diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/oop58-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/oop58-cpp.rst
index 399fb1b7e927..e435490f0711 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/cert/oop58-cpp.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/cert/oop58-cpp.rst
@@ -1,11 +1,10 @@
-.. title:: clang-tidy - cert-mutating-copy
+.. title:: clang-tidy - cert-oop58-cpp
+.. meta::
+ :http-equiv=refresh: 5;URL=../bugprone/copy-constructor-mutates-argument.html
cert-oop58-cpp
==============
-Finds assignments to the copied object and its direct or indirect members
-in copy constructors and copy assignment operators.
-
-This check corresponds to the CERT C Coding Standard rule
-`OOP58-CPP. Copy operations must not mutate the source object
-<https://wiki.sei.cmu.edu/confluence/display/cplusplus/OOP58-CPP.+Copy+operations+must+not+mutate+the+source+object>`_.
+The `cert-oop58-cpp` check is an alias, please see
+:doc:`bugprone-copy-constructor-mutates-argument <../bugprone/copy-constructor-mutates-argument>`
+for more information. \ No newline at end of file
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index be509abab00a..e2875604af72 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -90,6 +90,7 @@ Clang-Tidy Checks
:doc:`bugprone-command-processor <bugprone/command-processor>`,
:doc:`bugprone-compare-pointer-to-member-virtual-function <bugprone/compare-pointer-to-member-virtual-function>`,
:doc:`bugprone-copy-constructor-init <bugprone/copy-constructor-init>`, "Yes"
+ :doc:`bugprone-copy-constructor-mutates-argument <bugprone/copy-constructor-mutates-argument>`,
:doc:`bugprone-crtp-constructor-accessibility <bugprone/crtp-constructor-accessibility>`, "Yes"
:doc:`bugprone-dangling-handle <bugprone/dangling-handle>`,
:doc:`bugprone-default-operator-new-on-overaligned-type <bugprone/default-operator-new-on-overaligned-type>`,
@@ -463,6 +464,7 @@ Check aliases
:doc:`cert-oop11-cpp <cert/oop11-cpp>`, :doc:`performance-move-constructor-init <performance/move-constructor-init>`,
:doc:`cert-oop54-cpp <cert/oop54-cpp>`, :doc:`bugprone-unhandled-self-assignment <bugprone/unhandled-self-assignment>`,
:doc:`cert-oop57-cpp <cert/oop57-cpp>`, :doc:`bugprone-raw-memory-call-on-non-trivial-type <bugprone/raw-memory-call-on-non-trivial-type>`,
+ :doc:`cert-oop58-cpp <cert/oop58-cpp>`, :doc:`bugprone-copy-constructor-mutates-argument <bugprone/copy-constructor-mutates-argument>`,
:doc:`cert-pos44-c <cert/pos44-c>`, :doc:`bugprone-bad-signal-to-kill-thread <bugprone/bad-signal-to-kill-thread>`,
:doc:`cert-pos47-c <cert/pos47-c>`, :doc:`concurrency-thread-canceltype-asynchronous <concurrency/thread-canceltype-asynchronous>`,
:doc:`cert-sig30-c <cert/sig30-c>`, :doc:`bugprone-signal-handler <bugprone/signal-handler>`,
diff --git a/clang-tools-extra/test/clang-tidy/checkers/cert/oop58-cpp.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/copy-constructor-mutates-argument.cpp
index 223248cb8847..9fdbb7af90f9 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/cert/oop58-cpp.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/copy-constructor-mutates-argument.cpp
@@ -1,4 +1,4 @@
-// RUN: %check_clang_tidy %s cert-oop58-cpp %t
+// RUN: %check_clang_tidy %s bugprone-copy-constructor-mutates-argument %t
// Example test cases from CERT rule
// https://wiki.sei.cmu.edu/confluence/display/cplusplus/OOP58-CPP.+Copy+operations+must+not+mutate+the+source+object
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/use-concise-preprocessor-directives.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/use-concise-preprocessor-directives.cpp
index 53e079bcca40..b8a4953161d8 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/use-concise-preprocessor-directives.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/use-concise-preprocessor-directives.cpp
@@ -30,6 +30,14 @@
// CHECK-MESSAGES: :[[@LINE+2]]:2: warning: preprocessor condition can be written more concisely using '#ifdef' [readability-use-concise-preprocessor-directives]
// CHECK-FIXES: #ifdef FOO
+#if(defined(FOO))
+// CHECK-MESSAGES-23: :[[@LINE+2]]:2: warning: preprocessor condition can be written more concisely using '#elifdef' [readability-use-concise-preprocessor-directives]
+// CHECK-FIXES-23: #elifdef BAR
+#elif(defined(BAR))
+#endif
+
+// CHECK-MESSAGES: :[[@LINE+2]]:2: warning: preprocessor condition can be written more concisely using '#ifdef' [readability-use-concise-preprocessor-directives]
+// CHECK-FIXES: #ifdef FOO
#if (defined FOO)
// CHECK-MESSAGES-23: :[[@LINE+2]]:4: warning: preprocessor condition can be written more concisely using '#elifdef' [readability-use-concise-preprocessor-directives]
// CHECK-FIXES-23: # elifdef BAR
diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst
index 1d16ea9fe563..847d37d12408 100644
--- a/clang/Maintainers.rst
+++ b/clang/Maintainers.rst
@@ -242,7 +242,7 @@ ARM EABI
Compiler-Wide Topics
--------------------
The following people are responsible for functionality that does not fit into
-a single part of the compiler, but instead span multiple components within the
+a single part of the compiler, but instead spans multiple components within the
compiler.
Attributes
diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py
index c48bc9c2eb7d..d352373e85c6 100644
--- a/clang/bindings/python/clang/cindex.py
+++ b/clang/bindings/python/clang/cindex.py
@@ -333,18 +333,18 @@ class SourceLocation(Structure):
@property
def is_in_system_header(self):
"""Returns true if the given source location is in a system header."""
- return conf.lib.clang_Location_isInSystemHeader(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_Location_isInSystemHeader(self))
def __eq__(self, other):
- return isinstance(other, SourceLocation) and conf.lib.clang_equalLocations(
- self, other
+ return isinstance(other, SourceLocation) and bool(
+ conf.lib.clang_equalLocations(self, other)
)
def __ne__(self, other):
return not self.__eq__(other)
def __lt__(self, other: SourceLocation) -> bool:
- return conf.lib.clang_isBeforeInTranslationUnit(self, other) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isBeforeInTranslationUnit(self, other))
def __le__(self, other: SourceLocation) -> bool:
return self < other or self == other
@@ -396,8 +396,8 @@ class SourceRange(Structure):
return conf.lib.clang_getRangeEnd(self) # type: ignore [no-any-return]
def __eq__(self, other):
- return isinstance(other, SourceRange) and conf.lib.clang_equalRanges(
- self, other
+ return isinstance(other, SourceRange) and bool(
+ conf.lib.clang_equalRanges(self, other)
)
def __ne__(self, other):
@@ -674,39 +674,39 @@ class CursorKind(BaseEnumeration):
def is_declaration(self):
"""Test if this is a declaration kind."""
- return conf.lib.clang_isDeclaration(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isDeclaration(self))
def is_reference(self):
"""Test if this is a reference kind."""
- return conf.lib.clang_isReference(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isReference(self))
def is_expression(self):
"""Test if this is an expression kind."""
- return conf.lib.clang_isExpression(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isExpression(self))
def is_statement(self):
"""Test if this is a statement kind."""
- return conf.lib.clang_isStatement(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isStatement(self))
def is_attribute(self):
"""Test if this is an attribute kind."""
- return conf.lib.clang_isAttribute(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isAttribute(self))
def is_invalid(self):
"""Test if this is an invalid kind."""
- return conf.lib.clang_isInvalid(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isInvalid(self))
def is_translation_unit(self):
"""Test if this is a translation unit kind."""
- return conf.lib.clang_isTranslationUnit(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isTranslationUnit(self))
def is_preprocessing(self):
"""Test if this is a preprocessing kind."""
- return conf.lib.clang_isPreprocessing(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isPreprocessing(self))
def is_unexposed(self):
"""Test if this is an unexposed kind."""
- return conf.lib.clang_isUnexposed(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isUnexposed(self))
###
# Declaration Kinds
@@ -1650,7 +1650,9 @@ class Cursor(Structure):
# This function is not null-guarded because it is used in cursor_null_guard itself
def __eq__(self, other: object) -> bool:
- return isinstance(other, Cursor) and conf.lib.clang_equalCursors(self, other)
+ return isinstance(other, Cursor) and bool(
+ conf.lib.clang_equalCursors(self, other)
+ )
# Not null-guarded for consistency with __eq__
def __ne__(self, other: object) -> bool:
@@ -1670,48 +1672,48 @@ class Cursor(Structure):
Returns true if the declaration pointed at by the cursor is also a
definition of that entity.
"""
- return conf.lib.clang_isCursorDefinition(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isCursorDefinition(self))
@cursor_null_guard
def is_const_method(self) -> bool:
"""Returns True if the cursor refers to a C++ member function or member
function template that is declared 'const'.
"""
- return conf.lib.clang_CXXMethod_isConst(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXMethod_isConst(self))
@cursor_null_guard
def is_converting_constructor(self) -> bool:
"""Returns True if the cursor refers to a C++ converting constructor."""
- return conf.lib.clang_CXXConstructor_isConvertingConstructor(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXConstructor_isConvertingConstructor(self))
@cursor_null_guard
def is_copy_constructor(self) -> bool:
"""Returns True if the cursor refers to a C++ copy constructor."""
- return conf.lib.clang_CXXConstructor_isCopyConstructor(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXConstructor_isCopyConstructor(self))
@cursor_null_guard
def is_default_constructor(self) -> bool:
"""Returns True if the cursor refers to a C++ default constructor."""
- return conf.lib.clang_CXXConstructor_isDefaultConstructor(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXConstructor_isDefaultConstructor(self))
@cursor_null_guard
def is_move_constructor(self) -> bool:
"""Returns True if the cursor refers to a C++ move constructor."""
- return conf.lib.clang_CXXConstructor_isMoveConstructor(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXConstructor_isMoveConstructor(self))
@cursor_null_guard
def is_default_method(self) -> bool:
"""Returns True if the cursor refers to a C++ member function or member
function template that is declared '= default'.
"""
- return conf.lib.clang_CXXMethod_isDefaulted(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXMethod_isDefaulted(self))
@cursor_null_guard
def is_deleted_method(self) -> bool:
"""Returns True if the cursor refers to a C++ member function or member
function template that is declared '= delete'.
"""
- return conf.lib.clang_CXXMethod_isDeleted(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXMethod_isDeleted(self))
@cursor_null_guard
def is_copy_assignment_operator_method(self) -> bool:
@@ -1737,7 +1739,7 @@ class Cursor(Structure):
Is not.
"""
- return conf.lib.clang_CXXMethod_isCopyAssignmentOperator(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXMethod_isCopyAssignmentOperator(self))
@cursor_null_guard
def is_move_assignment_operator_method(self) -> bool:
@@ -1763,7 +1765,7 @@ class Cursor(Structure):
Is not.
"""
- return conf.lib.clang_CXXMethod_isMoveAssignmentOperator(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXMethod_isMoveAssignmentOperator(self))
@cursor_null_guard
def is_explicit_method(self) -> bool:
@@ -1809,47 +1811,47 @@ class Cursor(Structure):
This method will return 0 for the constructor and 1 for
the conversion function.
"""
- return conf.lib.clang_CXXMethod_isExplicit(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXMethod_isExplicit(self))
@cursor_null_guard
def is_mutable_field(self) -> bool:
"""Returns True if the cursor refers to a C++ field that is declared
'mutable'.
"""
- return conf.lib.clang_CXXField_isMutable(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXField_isMutable(self))
@cursor_null_guard
def is_pure_virtual_method(self) -> bool:
"""Returns True if the cursor refers to a C++ member function or member
function template that is declared pure virtual.
"""
- return conf.lib.clang_CXXMethod_isPureVirtual(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXMethod_isPureVirtual(self))
@cursor_null_guard
def is_static_method(self) -> bool:
"""Returns True if the cursor refers to a C++ member function or member
function template that is declared 'static'.
"""
- return conf.lib.clang_CXXMethod_isStatic(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXMethod_isStatic(self))
@cursor_null_guard
def is_virtual_method(self) -> bool:
"""Returns True if the cursor refers to a C++ member function or member
function template that is declared 'virtual'.
"""
- return conf.lib.clang_CXXMethod_isVirtual(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXMethod_isVirtual(self))
@cursor_null_guard
def is_abstract_record(self) -> bool:
"""Returns True if the cursor refers to a C++ record declaration
that has pure virtual member functions.
"""
- return conf.lib.clang_CXXRecord_isAbstract(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_CXXRecord_isAbstract(self))
@cursor_null_guard
def is_scoped_enum(self) -> bool:
"""Returns True if the cursor refers to a scoped enum declaration."""
- return conf.lib.clang_EnumDecl_isScoped(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_EnumDecl_isScoped(self))
@cursor_null_guard
def get_definition(self) -> Cursor | None:
@@ -2322,7 +2324,7 @@ class Cursor(Structure):
@cursor_null_guard
def is_virtual_base(self) -> bool:
"""Returns whether the CXX_BASE_SPECIFIER pointed by this Cursor is virtual."""
- return conf.lib.clang_isVirtualBase(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isVirtualBase(self))
@cursor_null_guard
def is_anonymous(self) -> bool:
@@ -2335,7 +2337,7 @@ class Cursor(Structure):
"""
if self.kind == CursorKind.FIELD_DECL:
return self.type.get_declaration().is_anonymous()
- return conf.lib.clang_Cursor_isAnonymous(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_Cursor_isAnonymous(self))
@cursor_null_guard
def is_anonymous_record_decl(self) -> bool:
@@ -2346,14 +2348,14 @@ class Cursor(Structure):
"""
if self.kind == CursorKind.FIELD_DECL:
return self.type.get_declaration().is_anonymous_record_decl()
- return conf.lib.clang_Cursor_isAnonymousRecordDecl(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_Cursor_isAnonymousRecordDecl(self))
@cursor_null_guard
def is_bitfield(self) -> bool:
"""
Check if the field is a bitfield.
"""
- return conf.lib.clang_Cursor_isBitField(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_Cursor_isBitField(self))
@cursor_null_guard
def get_bitfield_width(self) -> int:
@@ -2822,7 +2824,7 @@ class Type(Structure):
This does not look through typedefs that may have added "const"
at a different level.
"""
- return conf.lib.clang_isConstQualifiedType(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isConstQualifiedType(self))
def is_volatile_qualified(self) -> bool:
"""Determine whether a Type has the "volatile" qualifier set.
@@ -2830,7 +2832,7 @@ class Type(Structure):
This does not look through typedefs that may have added "volatile"
at a different level.
"""
- return conf.lib.clang_isVolatileQualifiedType(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isVolatileQualifiedType(self))
def is_restrict_qualified(self) -> bool:
"""Determine whether a Type has the "restrict" qualifier set.
@@ -2838,13 +2840,13 @@ class Type(Structure):
This does not look through typedefs that may have added "restrict" at
a different level.
"""
- return conf.lib.clang_isRestrictQualifiedType(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isRestrictQualifiedType(self))
def is_function_variadic(self) -> bool:
"""Determine whether this function Type is a variadic function type."""
assert self.kind == TypeKind.FUNCTIONPROTO
- return conf.lib.clang_isFunctionTypeVariadic(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isFunctionTypeVariadic(self))
def get_address_space(self) -> int:
return conf.lib.clang_getAddressSpace(self) # type: ignore [no-any-return]
@@ -2854,7 +2856,7 @@ class Type(Structure):
def is_pod(self) -> bool:
"""Determine whether this Type represents plain old data (POD)."""
- return conf.lib.clang_isPODType(self) # type: ignore [no-any-return]
+ return bool(conf.lib.clang_isPODType(self))
def get_pointee(self) -> Type:
"""
@@ -2988,7 +2990,7 @@ class Type(Structure):
return _CXString.from_result(conf.lib.clang_getTypePrettyPrinted(self, policy))
def __eq__(self, other: object) -> bool:
- return isinstance(other, Type) and conf.lib.clang_equalTypes(self, other)
+ return isinstance(other, Type) and bool(conf.lib.clang_equalTypes(self, other))
def __ne__(self, other: object) -> bool:
return not self.__eq__(other)
@@ -4127,22 +4129,22 @@ FUNCTION_LIST: list[LibFunc] = [
("clang_CXRewriter_removeText", [Rewriter, SourceRange]),
("clang_CXRewriter_replaceText", [Rewriter, SourceRange, c_interop_string]),
("clang_CXRewriter_writeMainFileToStdOut", [Rewriter]),
- ("clang_CXXConstructor_isConvertingConstructor", [Cursor], bool),
- ("clang_CXXConstructor_isCopyConstructor", [Cursor], bool),
- ("clang_CXXConstructor_isDefaultConstructor", [Cursor], bool),
- ("clang_CXXConstructor_isMoveConstructor", [Cursor], bool),
- ("clang_CXXField_isMutable", [Cursor], bool),
- ("clang_CXXMethod_isConst", [Cursor], bool),
- ("clang_CXXMethod_isDefaulted", [Cursor], bool),
- ("clang_CXXMethod_isDeleted", [Cursor], bool),
- ("clang_CXXMethod_isCopyAssignmentOperator", [Cursor], bool),
- ("clang_CXXMethod_isMoveAssignmentOperator", [Cursor], bool),
- ("clang_CXXMethod_isExplicit", [Cursor], bool),
- ("clang_CXXMethod_isPureVirtual", [Cursor], bool),
- ("clang_CXXMethod_isStatic", [Cursor], bool),
- ("clang_CXXMethod_isVirtual", [Cursor], bool),
- ("clang_CXXRecord_isAbstract", [Cursor], bool),
- ("clang_EnumDecl_isScoped", [Cursor], bool),
+ ("clang_CXXConstructor_isConvertingConstructor", [Cursor], c_uint),
+ ("clang_CXXConstructor_isCopyConstructor", [Cursor], c_uint),
+ ("clang_CXXConstructor_isDefaultConstructor", [Cursor], c_uint),
+ ("clang_CXXConstructor_isMoveConstructor", [Cursor], c_uint),
+ ("clang_CXXField_isMutable", [Cursor], c_uint),
+ ("clang_CXXMethod_isConst", [Cursor], c_uint),
+ ("clang_CXXMethod_isDefaulted", [Cursor], c_uint),
+ ("clang_CXXMethod_isDeleted", [Cursor], c_uint),
+ ("clang_CXXMethod_isCopyAssignmentOperator", [Cursor], c_uint),
+ ("clang_CXXMethod_isMoveAssignmentOperator", [Cursor], c_uint),
+ ("clang_CXXMethod_isExplicit", [Cursor], c_uint),
+ ("clang_CXXMethod_isPureVirtual", [Cursor], c_uint),
+ ("clang_CXXMethod_isStatic", [Cursor], c_uint),
+ ("clang_CXXMethod_isVirtual", [Cursor], c_uint),
+ ("clang_CXXRecord_isAbstract", [Cursor], c_uint),
+ ("clang_EnumDecl_isScoped", [Cursor], c_uint),
("clang_defaultDiagnosticDisplayOptions", [], c_uint),
("clang_defaultSaveOptions", [TranslationUnit], c_uint),
("clang_disposeCodeCompleteResults", [CodeCompletionResults]),
@@ -4153,10 +4155,10 @@ FUNCTION_LIST: list[LibFunc] = [
("clang_disposeString", [_CXString]),
("clang_disposeTokens", [TranslationUnit, POINTER(Token), c_uint]),
("clang_disposeTranslationUnit", [TranslationUnit]),
- ("clang_equalCursors", [Cursor, Cursor], bool),
- ("clang_equalLocations", [SourceLocation, SourceLocation], bool),
- ("clang_equalRanges", [SourceRange, SourceRange], bool),
- ("clang_equalTypes", [Type, Type], bool),
+ ("clang_equalCursors", [Cursor, Cursor], c_uint),
+ ("clang_equalLocations", [SourceLocation, SourceLocation], c_uint),
+ ("clang_equalRanges", [SourceRange, SourceRange], c_uint),
+ ("clang_equalTypes", [Type, Type], c_uint),
("clang_formatDiagnostic", [Diagnostic, c_uint], _CXString),
("clang_getAddressSpace", [Type], c_uint),
("clang_getArgType", [Type, c_uint], Type),
@@ -4220,7 +4222,7 @@ FUNCTION_LIST: list[LibFunc] = [
("clang_getFile", [TranslationUnit, c_interop_string], c_object_p),
("clang_getFileName", [File], _CXString),
("clang_getFileTime", [File], c_uint),
- ("clang_File_isEqual", [File, File], bool),
+ ("clang_File_isEqual", [File, File], c_int),
("clang_getIBOutletCollectionType", [Cursor], Type),
("clang_getIncludedFile", [Cursor], c_object_p),
(
@@ -4269,25 +4271,25 @@ FUNCTION_LIST: list[LibFunc] = [
("clang_getTypePrettyPrinted", [Type, PrintingPolicy], _CXString),
("clang_getTypeSpelling", [Type], _CXString),
("clang_hashCursor", [Cursor], c_uint),
- ("clang_isAttribute", [CursorKind], bool),
+ ("clang_isAttribute", [CursorKind], c_uint),
("clang_getFullyQualifiedName", [Type, PrintingPolicy, c_uint], _CXString),
- ("clang_isConstQualifiedType", [Type], bool),
- ("clang_isCursorDefinition", [Cursor], bool),
- ("clang_isDeclaration", [CursorKind], bool),
- ("clang_isExpression", [CursorKind], bool),
- ("clang_isFileMultipleIncludeGuarded", [TranslationUnit, File], bool),
- ("clang_isFunctionTypeVariadic", [Type], bool),
- ("clang_isInvalid", [CursorKind], bool),
- ("clang_isPODType", [Type], bool),
- ("clang_isPreprocessing", [CursorKind], bool),
- ("clang_isReference", [CursorKind], bool),
- ("clang_isRestrictQualifiedType", [Type], bool),
- ("clang_isStatement", [CursorKind], bool),
- ("clang_isTranslationUnit", [CursorKind], bool),
- ("clang_isUnexposed", [CursorKind], bool),
- ("clang_isVirtualBase", [Cursor], bool),
- ("clang_isVolatileQualifiedType", [Type], bool),
- ("clang_isBeforeInTranslationUnit", [SourceLocation, SourceLocation], bool),
+ ("clang_isConstQualifiedType", [Type], c_uint),
+ ("clang_isCursorDefinition", [Cursor], c_uint),
+ ("clang_isDeclaration", [CursorKind], c_uint),
+ ("clang_isExpression", [CursorKind], c_uint),
+ ("clang_isFileMultipleIncludeGuarded", [TranslationUnit, File], c_uint),
+ ("clang_isFunctionTypeVariadic", [Type], c_uint),
+ ("clang_isInvalid", [CursorKind], c_uint),
+ ("clang_isPODType", [Type], c_uint),
+ ("clang_isPreprocessing", [CursorKind], c_uint),
+ ("clang_isReference", [CursorKind], c_uint),
+ ("clang_isRestrictQualifiedType", [Type], c_uint),
+ ("clang_isStatement", [CursorKind], c_uint),
+ ("clang_isTranslationUnit", [CursorKind], c_uint),
+ ("clang_isUnexposed", [CursorKind], c_uint),
+ ("clang_isVirtualBase", [Cursor], c_uint),
+ ("clang_isVolatileQualifiedType", [Type], c_uint),
+ ("clang_isBeforeInTranslationUnit", [SourceLocation, SourceLocation], c_uint),
(
"clang_parseTranslationUnit",
[Index, c_interop_string, c_void_p, c_int, c_void_p, c_int, c_int],
@@ -4314,11 +4316,11 @@ FUNCTION_LIST: list[LibFunc] = [
("clang_Cursor_getRawCommentText", [Cursor], _CXString),
("clang_Cursor_getOffsetOfField", [Cursor], c_longlong),
("clang_Cursor_getStorageClass", [Cursor], c_int),
- ("clang_Cursor_isAnonymous", [Cursor], bool),
- ("clang_Cursor_isAnonymousRecordDecl", [Cursor], bool),
- ("clang_Cursor_isBitField", [Cursor], bool),
+ ("clang_Cursor_isAnonymous", [Cursor], c_uint),
+ ("clang_Cursor_isAnonymousRecordDecl", [Cursor], c_uint),
+ ("clang_Cursor_isBitField", [Cursor], c_uint),
("clang_Cursor_isFunctionInlined", [Cursor], c_uint),
- ("clang_Location_isInSystemHeader", [SourceLocation], bool),
+ ("clang_Location_isInSystemHeader", [SourceLocation], c_int),
("clang_PrintingPolicy_dispose", [PrintingPolicy]),
("clang_PrintingPolicy_getProperty", [PrintingPolicy, c_int], c_uint),
("clang_PrintingPolicy_setProperty", [PrintingPolicy, c_int, c_uint]),
diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index 3d4d71a680d9..be3d0cfa2e65 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -200,16 +200,17 @@ endforeach()
if(FUCHSIA_SDK)
set(FUCHSIA_aarch64-unknown-fuchsia_NAME arm64)
+ set(FUCHSIA_arm-unknown-fuchsia_NAME arm)
set(FUCHSIA_i386-unknown-fuchsia_NAME x64)
set(FUCHSIA_x86_64-unknown-fuchsia_NAME x64)
set(FUCHSIA_riscv64-unknown-fuchsia_NAME riscv64)
- foreach(target i386-unknown-fuchsia;x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia)
+ foreach(target i386-unknown-fuchsia;x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;arm-unknown-fuchsia;riscv64-unknown-fuchsia)
set(FUCHSIA_${target}_COMPILER_FLAGS "--target=${target} -I${FUCHSIA_SDK}/pkg/sync/include -I${FUCHSIA_SDK}/pkg/fdio/include")
set(FUCHSIA_${target}_LINKER_FLAGS "-L${FUCHSIA_SDK}/arch/${FUCHSIA_${target}_NAME}/lib")
set(FUCHSIA_${target}_SYSROOT "${FUCHSIA_SDK}/arch/${FUCHSIA_${target}_NAME}/sysroot")
endforeach()
- foreach(target i386-unknown-fuchsia;x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia)
+ foreach(target i386-unknown-fuchsia;x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;arm-unknown-fuchsia;riscv64-unknown-fuchsia)
# Set the per-target builtins options.
list(APPEND BUILTIN_TARGETS "${target}")
set(BUILTINS_${target}_CMAKE_SYSTEM_NAME Fuchsia CACHE STRING "")
diff --git a/clang/docs/AddressSanitizer.rst b/clang/docs/AddressSanitizer.rst
index 21e1a3652192..2c2131b01d36 100644
--- a/clang/docs/AddressSanitizer.rst
+++ b/clang/docs/AddressSanitizer.rst
@@ -159,7 +159,7 @@ eliminating this check (``-fsanitize-address-use-after-return=never``).
To summarize: ``-fsanitize-address-use-after-return=<mode>``
* ``never``: Completely disables detection of UAR errors (reduces code size).
- * ``runtime``: Adds the code for detection, but it can be disable via the
+ * ``runtime``: Adds the code for detection, but it can be disabled via the
runtime environment (``ASAN_OPTIONS=detect_stack_use_after_return=0``).
* ``always``: Enables detection of UAR errors in all cases. (reduces code
size, but not as much as ``never``).
@@ -239,7 +239,7 @@ from adding redzones around it and detecting out of bounds accesses.
AddressSanitizer also supports
``__attribute__((disable_sanitizer_instrumentation))``. This attribute
-works similar to ``__attribute__((no_sanitize("address")))``, but it also
+works similarly to ``__attribute__((no_sanitize("address")))``, but it also
prevents instrumentation performed by other sanitizers.
Suppressing Errors in Recompiled Code (Ignorelist)
@@ -305,7 +305,7 @@ Limitations
===========
* AddressSanitizer uses more real memory than a native run. Exact overhead
- depends on the allocations sizes. The smaller the allocations you make the
+ depends on the allocation sizes. The smaller the allocations you make the
bigger the overhead is.
* AddressSanitizer uses more stack memory. We have seen up to 3x increase.
* On 64-bit platforms AddressSanitizer maps (but not reserves) 16+ Terabytes of
diff --git a/clang/docs/BlockLanguageSpec.rst b/clang/docs/BlockLanguageSpec.rst
index 3632d566838a..0c3a000be5c8 100644
--- a/clang/docs/BlockLanguageSpec.rst
+++ b/clang/docs/BlockLanguageSpec.rst
@@ -279,7 +279,7 @@ copy. The net effect is that instance variables can be mutated.
The :block-term:`Block_copy` operator retains all objects held in
variables of automatic storage referenced within the Block expression
-(or form strong references if running under garbage collection).
+(or forms strong references if running under garbage collection).
Object variables of ``__block`` storage type are assumed to hold
normal pointers with no provision for retain and release messages.
diff --git a/clang/docs/BoundsSafety.rst b/clang/docs/BoundsSafety.rst
index 519c7b685e60..b0f77c38b28a 100644
--- a/clang/docs/BoundsSafety.rst
+++ b/clang/docs/BoundsSafety.rst
@@ -58,7 +58,7 @@ adopt, offering these properties that make it widely adoptable in practice:
* It has a relatively low adoption cost.
This document discusses the key designs of ``-fbounds-safety``. The document is
-subject to be actively updated with a more detailed specification.
+subject to active updates with a more detailed specification.
Programming Model
=================
@@ -574,7 +574,7 @@ When ``sizeof()`` takes a type name, the compiler doesn't apply an implicit
bounds annotation on the named pointer types. This means if a bounds annotation
is not specified, the evaluated pointer type is treated identically to a plain C
pointer type. Therefore, ``sizeof(int*)`` remains the same with or without
-``-fbounds-safety``. That said, programmers can explicitly add attribute to the
+``-fbounds-safety``. That said, programmers can explicitly add attributes to the
types, e.g., ``sizeof(int *__bidi_indexable)``, in which case the sizeof
evaluates to the size of type ``int *__bidi_indexable`` (the value equivalent to
``3 * sizeof(int*)``).
diff --git a/clang/docs/BoundsSafetyImplPlans.rst b/clang/docs/BoundsSafetyImplPlans.rst
index 34276c920f31..b374b0a0c68a 100644
--- a/clang/docs/BoundsSafetyImplPlans.rst
+++ b/clang/docs/BoundsSafetyImplPlans.rst
@@ -154,7 +154,7 @@ verify its bounds safety. The implementation relies on LLVM optimizations to
remove redundant run-time checks. Using this optimization strategy, if the
original source code already has bounds checks, the fewer additional checks
``-fbounds-safety`` will introduce. The LLVM ``ConstraintElimination`` pass is
-design to remove provable redundant checks (please check Florian Hahn’s
+designed to remove provable redundant checks (please check Florian Hahn’s
presentation in 2021 LLVM Dev Meeting and the implementation to learn more). In
the following example, ``-fbounds-safety`` implicitly adds the redundant bounds
checks that the optimizer can remove:
diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 0b4a4849f6cc..94d6f0d27619 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -1673,6 +1673,16 @@ the configuration (without a prefix: ``Auto``).
int abcdef; // but this isn't
+ * ``bool AlignPPAndNotPP`` If comments following preprocessor directive should be aligned with
+ comments that don't.
+
+ .. code-block:: c++
+
+ true: false:
+ #define A // Comment vs. #define A // Comment
+ #define AB // Aligned #define AB // Aligned
+ int i; // Aligned int i; // Not aligned
+
.. _AllowAllArgumentsOnNextLine:
diff --git a/clang/docs/ClangLinkerWrapper.rst b/clang/docs/ClangLinkerWrapper.rst
index 28f48fce6fe3..3637bdb84827 100644
--- a/clang/docs/ClangLinkerWrapper.rst
+++ b/clang/docs/ClangLinkerWrapper.rst
@@ -27,7 +27,7 @@ only for the linker wrapper will be forwarded to the wrapped linker job.
.. code-block:: console
- USAGE: clang-linker-wrapper [options] -- <options to passed to the linker>
+ USAGE: clang-linker-wrapper [options] -- <options to pass to the linker>
OPTIONS:
--cuda-path=<dir> Set the system CUDA path
diff --git a/clang/docs/ClangNVLinkWrapper.rst b/clang/docs/ClangNVLinkWrapper.rst
index 2acdb054572f..28763b3891f5 100644
--- a/clang/docs/ClangNVLinkWrapper.rst
+++ b/clang/docs/ClangNVLinkWrapper.rst
@@ -10,7 +10,7 @@ Clang nvlink Wrapper
Introduction
============
-This tools works as a wrapper around the NVIDIA ``nvlink`` linker. The purpose
+This tool works as a wrapper around the NVIDIA ``nvlink`` linker. The purpose
of this wrapper is to provide an interface similar to the ``ld.lld`` linker
while still relying on NVIDIA's proprietary linker to produce the final output.
@@ -37,7 +37,7 @@ only for the linker wrapper will be forwarded to ``nvlink``.
--arch <value> Specify the 'sm_' name of the target architecture.
--cuda-path=<dir> Set the system CUDA path
--dry-run Print generated commands without running.
- --feature <value> Specify the '+ptx' freature to use for LTO.
+ --feature <value> Specify the '+ptx' feature to use for LTO.
-g Specify that this was a debug compile.
-help-hidden Display all available options
-help Display available options (--help-hidden for more)
diff --git a/clang/docs/ClangPlugins.rst b/clang/docs/ClangPlugins.rst
index 92e41fb5877f..3bd9e963d48a 100644
--- a/clang/docs/ClangPlugins.rst
+++ b/clang/docs/ClangPlugins.rst
@@ -150,7 +150,7 @@ passed to the plugin can.
-fplugin-arg-call_super_plugin-help \
test.cpp
-If your plugin name contains dashes, either rename the plugin or used the
+If your plugin name contains dashes, either rename the plugin or use the
cc1 command line options listed below.
diff --git a/clang/docs/ClangTools.rst b/clang/docs/ClangTools.rst
index 3216328bbb6a..b53c125f5b42 100644
--- a/clang/docs/ClangTools.rst
+++ b/clang/docs/ClangTools.rst
@@ -66,7 +66,7 @@ in a fast, command line interface. It can also accept flags to re-display the
diagnostics in different formats with different flags, suitable for use driving
an IDE or editor. Furthermore, it can be used in fixit-mode to directly apply
fixit-hints offered by clang. See :doc:`HowToSetupToolingForLLVM` for
-instructions on how to setup and used `clang-check`.
+instructions on how to setup and use `clang-check`.
``clang-format``
----------------
diff --git a/clang/docs/ConstantInterpreter.rst b/clang/docs/ConstantInterpreter.rst
index a71ee4b430a6..3b1bd4b3bda1 100644
--- a/clang/docs/ConstantInterpreter.rst
+++ b/clang/docs/ConstantInterpreter.rst
@@ -140,7 +140,7 @@ pointer goes out of scope, dead blocks are also deallocated.
The lifetime of blocks is managed through 3 methods stored in the
descriptor of the block:
-* **CtorFn**: initializes the metadata which is store in the block,
+* **CtorFn**: initializes the metadata which is stored in the block,
alongside actual data. Invokes the default constructors of objects
which are not trivial (``Pointer``, ``RealFP``, etc.)
diff --git a/clang/docs/ControlFlowIntegrity.rst b/clang/docs/ControlFlowIntegrity.rst
index baff9ab54ff2..cfe5bd836cac 100644
--- a/clang/docs/ControlFlowIntegrity.rst
+++ b/clang/docs/ControlFlowIntegrity.rst
@@ -135,7 +135,7 @@ Bad Cast Checking
This scheme checks that pointer casts are made to an object of the correct
dynamic type; that is, the dynamic type of the object must be a derived class
of the pointee type of the cast. The checks are currently only introduced
-where the class being casted to is a polymorphic class.
+where the class being cast to is a polymorphic class.
Bad casts are not in themselves control flow integrity violations, but they
can also create security vulnerabilities, and the implementation uses many
diff --git a/clang/docs/DataFlowSanitizer.rst b/clang/docs/DataFlowSanitizer.rst
index 5ff50b85dcdc..154229f9780b 100644
--- a/clang/docs/DataFlowSanitizer.rst
+++ b/clang/docs/DataFlowSanitizer.rst
@@ -243,7 +243,7 @@ labels of just ``v1`` and ``v2``.
This signature is the same when origin tracking is disabled - in this case
the dfsan_origin passed in it will always be 0.
- The callback will be called when a tained value reach stack/registers
+ The callback will be called when a tainted value reaches stack/registers
in the context of a function. Tainted values can reach a function:
* via the arguments of the function
* via the return value of a call that occurs in the function
diff --git a/clang/docs/HardwareAssistedAddressSanitizerDesign.rst b/clang/docs/HardwareAssistedAddressSanitizerDesign.rst
index 014d10382e72..f2e76d6faa40 100644
--- a/clang/docs/HardwareAssistedAddressSanitizerDesign.rst
+++ b/clang/docs/HardwareAssistedAddressSanitizerDesign.rst
@@ -15,7 +15,7 @@ Introduction
tags every 8 bytes of the application memory with a 1 byte tag (using *shadow memory*),
uses *redzones* to find buffer-overflows and
*quarantine* to find use-after-free.
-The redzones, the quarantine, and, to a less extent, the shadow, are the
+The redzones, the quarantine, and, to a lesser extent, the shadow, are the
sources of AddressSanitizer's memory overhead.
See the `AddressSanitizer paper`_ for details.
diff --git a/clang/docs/JSONCompilationDatabase.rst b/clang/docs/JSONCompilationDatabase.rst
index f5432278bd4d..936ba11b087b 100644
--- a/clang/docs/JSONCompilationDatabase.rst
+++ b/clang/docs/JSONCompilationDatabase.rst
@@ -54,7 +54,7 @@ python bindings also support this (since clang 3.2); see
Format
======
-A compilation database is a JSON file, which consist of an array of
+A compilation database is a JSON file, which consists of an array of
"command objects", where each command object specifies one way a
translation unit is compiled in the project.
diff --git a/clang/docs/LibASTImporter.rst b/clang/docs/LibASTImporter.rst
index f5d40928d01e..e438de6624fd 100644
--- a/clang/docs/LibASTImporter.rst
+++ b/clang/docs/LibASTImporter.rst
@@ -35,12 +35,12 @@ Importing one AST node copies that node into the destination ``ASTContext``.
Why do we have to copy the node?
Isn't enough to insert the pointer to that node into the destination context?
One reason is that the "from" context may outlive the "to" context.
-Also, the Clang AST consider nodes (or certain properties of nodes) equivalent if they have the same address!
+Also, the Clang AST considers nodes (or certain properties of nodes) equivalent if they have the same address!
The import algorithm has to ensure that the structurally equivalent nodes in the different translation units are not getting duplicated in the merged AST.
E.g. if we include the definition of the vector template (``#include <vector>``) in two translation units, then their merged AST should have only one node which represents the template.
Also, we have to discover *one definition rule* (ODR) violations.
-For instance, if there is a class definition with the same name in both translation units, but one of the definition contains a different number of fields.
+For instance, if there is a class definition with the same name in both translation units, but one of the definitions contains a different number of fields.
So, we look up existing definitions, and then we check the structural equivalency on those nodes.
The following pseudo-code demonstrates the basics of the import mechanism:
diff --git a/clang/docs/LibASTMatchers.rst b/clang/docs/LibASTMatchers.rst
index 3b9f0a66db13..0aa7923fda9a 100644
--- a/clang/docs/LibASTMatchers.rst
+++ b/clang/docs/LibASTMatchers.rst
@@ -95,7 +95,7 @@ and flexibility.
``VariadicDynCastAllOfMatcher<Base, Derived>``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Those match all nodes of type *Base* if they can be dynamically casted to
+Those match all nodes of type *Base* if they can be dynamically cast to
*Derived*. The names of those matchers are nouns, which closely resemble
*Derived*. ``VariadicDynCastAllOfMatchers`` are the backbone of the matcher
hierarchy. Most often, your match expression will start with one of them, and
diff --git a/clang/docs/LibASTMatchersTutorial.rst b/clang/docs/LibASTMatchersTutorial.rst
index d2883688ebfa..e901eb9481fd 100644
--- a/clang/docs/LibASTMatchersTutorial.rst
+++ b/clang/docs/LibASTMatchersTutorial.rst
@@ -209,7 +209,7 @@ and traversal matchers to get from one kind of AST node to another. For
a complete list of AST matchers, take a look at the `AST Matcher
References <LibASTMatchersReference.html>`_
-All matcher that are nouns describe entities in the AST and can be
+All matchers that are nouns describe entities in the AST and can be
bound, so that they can be referred to whenever a match is found. To do
so, simply call the method ``bind`` on these matchers, e.g.:
diff --git a/clang/docs/LibFormat.rst b/clang/docs/LibFormat.rst
index 833f768c54a6..9450073b4841 100644
--- a/clang/docs/LibFormat.rst
+++ b/clang/docs/LibFormat.rst
@@ -3,7 +3,7 @@ LibFormat
=========
LibFormat is a library that implements automatic source code formatting based
-on Clang. This documents describes the LibFormat interface and design as well
+on Clang. This document describes the LibFormat interface and design as well
as some basic style discussions.
If you just want to use `clang-format` as a tool or integrated into an editor,
diff --git a/clang/docs/MatrixTypes.rst b/clang/docs/MatrixTypes.rst
index 32949c6c4352..b3a2c8cf5367 100644
--- a/clang/docs/MatrixTypes.rst
+++ b/clang/docs/MatrixTypes.rst
@@ -53,7 +53,7 @@ type of the *typedef* becomes a matrix type with the given dimensions and an
element type of the former underlying type.
If a declaration of a *typedef-name* has a ``matrix_type`` attribute, then all
-declaration of that *typedef-name* shall have a matrix_type attribute with the
+declarations of that *typedef-name* shall have a matrix_type attribute with the
same element type, number of rows, and number of columns.
Standard Conversions
diff --git a/clang/docs/MemorySanitizer.rst b/clang/docs/MemorySanitizer.rst
index 9f0d3f13a9d6..4f581427c36a 100644
--- a/clang/docs/MemorySanitizer.rst
+++ b/clang/docs/MemorySanitizer.rst
@@ -176,7 +176,7 @@ for `lifetime <https://eel.is/c++draft/basic.life#1>`_ definition.
This feature can be disabled with either:
-#. Pass addition Clang option ``-fno-sanitize-memory-use-after-dtor`` during
+#. Pass additional Clang option ``-fno-sanitize-memory-use-after-dtor`` during
compilation.
#. Set environment variable `MSAN_OPTIONS=poison_in_dtor=0` before running
the program.
diff --git a/clang/docs/Modules.rst b/clang/docs/Modules.rst
index e45ee9ff9eac..0abb85c1d656 100644
--- a/clang/docs/Modules.rst
+++ b/clang/docs/Modules.rst
@@ -115,7 +115,7 @@ Objective-C provides syntax for importing a module via an *@import declaration*,
@import std;
-The ``@import`` declaration above imports the entire contents of the ``std`` module (which would contain, e.g., the entire C or C++ standard library) and make its API available within the current translation unit. To import only part of a module, one may use dot syntax to specific a particular submodule, e.g.,
+The ``@import`` declaration above imports the entire contents of the ``std`` module (which would contain, e.g., the entire C or C++ standard library) and make its API available within the current translation unit. To import only part of a module, one may use dot syntax to specify a particular submodule, e.g.,
.. parsed-literal::
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ae21c69b2d3c..3a4e1fce2511 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -69,6 +69,16 @@ Potentially Breaking Changes
call the member ``operator delete`` instead of the expected global
delete operator. The old behavior is retained under ``-fclang-abi-compat=21``
flag.
+- Trailing null statements in GNU statement expressions are no longer
+ ignored by Clang; they now result in a void type. Clang previously
+ matched GCC's behavior, which was recently clarified to be incorrect.
+
+ .. code-block:: c++
+
+ // The resulting type is 'void', not 'int'
+ void foo(void) {
+ return ({ 1;; });
+ }
C/C++ Language Potentially Breaking Changes
-------------------------------------------
@@ -447,6 +457,7 @@ Bug Fixes in This Version
- Fixed a failed assertion with empty filename in ``#embed`` directive. (#GH162951)
- Fixed a crash triggered by unterminated ``__has_embed``. (#GH162953)
- Accept empty enumerations in MSVC-compatible C mode. (#GH114402)
+- Fixed false-positive shadow diagnostics for lambdas in explicit object member functions. (#GH163731)
Bug Fixes to Compiler Builtins
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -513,6 +524,7 @@ Bug Fixes to C++ Support
- Fixed a template depth issue when parsing lambdas inside a type constraint. (#GH162092)
- Diagnose unresolved overload sets in non-dependent compound requirements. (#GH51246) (#GH97753)
- Fix a crash when extracting unavailable member type from alias in template deduction. (#GH165560)
+- Fix incorrect diagnostics for lambdas with init-captures inside braced initializers. (#GH163498)
Bug Fixes to AST Handling
^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -560,6 +572,8 @@ X86 Support
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
+- More intrinsics for the following AArch64 instructions:
+ FCVTZ[US], FCVTN[US], FCVTM[US], FCVTP[US], FCVTA[US]
Android Support
^^^^^^^^^^^^^^^
@@ -639,6 +653,7 @@ clang-format
- Deprecate ``AlwaysBreak`` and ``BlockIndent`` suboptions from the
``AlignAfterOpenBracket`` option, and make ``AlignAfterOpenBracket`` a
``bool`` type.
+- Add ``AlignPPAndNotPP`` suboption to ``AlignTrailingComments``.
libclang
--------
diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h
index 427a9c51ece1..d364795a0581 100644
--- a/clang/include/clang/AST/JSONNodeDumper.h
+++ b/clang/include/clang/AST/JSONNodeDumper.h
@@ -149,7 +149,7 @@ class JSONNodeDumper
void writeIncludeStack(PresumedLoc Loc, bool JustFirst = false);
// Writes the attributes of a SourceLocation object without.
- void writeBareSourceLocation(SourceLocation Loc, bool IsSpelling);
+ void writeBareSourceLocation(SourceLocation Loc);
// Writes the attributes of a SourceLocation to JSON based on its presumed
// spelling location. If the given location represents a macro invocation,
diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index 76942f1a84f9..bec4066cc16e 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -1831,26 +1831,6 @@ public:
return const_reverse_body_iterator(body_begin());
}
- // Get the Stmt that StmtExpr would consider to be the result of this
- // compound statement. This is used by StmtExpr to properly emulate the GCC
- // compound expression extension, which ignores trailing NullStmts when
- // getting the result of the expression.
- // i.e. ({ 5;;; })
- // ^^ ignored
- // If we don't find something that isn't a NullStmt, just return the last
- // Stmt.
- Stmt *getStmtExprResult() {
- for (auto *B : llvm::reverse(body())) {
- if (!isa<NullStmt>(B))
- return B;
- }
- return body_back();
- }
-
- const Stmt *getStmtExprResult() const {
- return const_cast<CompoundStmt *>(this)->getStmtExprResult();
- }
-
SourceLocation getBeginLoc() const { return LBraceLoc; }
SourceLocation getEndLoc() const { return RBraceLoc; }
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h
index 6a90aeb01e63..b9cad5340c94 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h
@@ -16,6 +16,7 @@
#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h"
#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h"
#include "clang/Analysis/AnalysisDeclContext.h"
#include "clang/Analysis/CFG.h"
#include "llvm/ADT/SmallVector.h"
@@ -23,6 +24,9 @@
#include <cstdint>
namespace clang::lifetimes::internal {
+
+using FactID = utils::ID<struct FactTag>;
+
/// An abstract base class for a single, atomic lifetime-relevant event.
class Fact {
@@ -48,6 +52,7 @@ public:
private:
Kind K;
+ FactID ID;
protected:
Fact(Kind K) : K(K) {}
@@ -56,6 +61,9 @@ public:
virtual ~Fact() = default;
Kind getKind() const { return K; }
+ void setID(FactID ID) { this->ID = ID; }
+ FactID getID() const { return ID; }
+
template <typename T> const T *getAs() const {
if (T::classof(this))
return static_cast<const T *>(this);
@@ -144,6 +152,7 @@ public:
class UseFact : public Fact {
const Expr *UseExpr;
+ OriginID OID;
// True if this use is a write operation (e.g., left-hand side of assignment).
// Write operations are exempted from use-after-free checks.
bool IsWritten = false;
@@ -151,12 +160,10 @@ class UseFact : public Fact {
public:
static bool classof(const Fact *F) { return F->getKind() == Kind::Use; }
- UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {}
+ UseFact(const Expr *UseExpr, OriginManager &OM)
+ : Fact(Kind::Use), UseExpr(UseExpr), OID(OM.get(*UseExpr)) {}
- OriginID getUsedOrigin(const OriginManager &OM) const {
- // TODO: Remove const cast and make OriginManager::get as const.
- return const_cast<OriginManager &>(OM).get(*UseExpr);
- }
+ OriginID getUsedOrigin() const { return OID; }
const Expr *getUseExpr() const { return UseExpr; }
void markAsWritten() { IsWritten = true; }
bool isWritten() const { return IsWritten; }
@@ -184,22 +191,26 @@ public:
class FactManager {
public:
+ void init(const CFG &Cfg) {
+ assert(BlockToFacts.empty() && "FactManager already initialized");
+ BlockToFacts.resize(Cfg.getNumBlockIDs());
+ }
+
llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const {
- auto It = BlockToFactsMap.find(B);
- if (It != BlockToFactsMap.end())
- return It->second;
- return {};
+ return BlockToFacts[B->getBlockID()];
}
void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) {
if (!NewFacts.empty())
- BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end());
+ BlockToFacts[B->getBlockID()].assign(NewFacts.begin(), NewFacts.end());
}
template <typename FactType, typename... Args>
FactType *createFact(Args &&...args) {
void *Mem = FactAllocator.Allocate<FactType>();
- return new (Mem) FactType(std::forward<Args>(args)...);
+ FactType *Res = new (Mem) FactType(std::forward<Args>(args)...);
+ Res->setID(NextFactID++);
+ return Res;
}
void dump(const CFG &Cfg, AnalysisDeclContext &AC) const;
@@ -215,16 +226,19 @@ public:
/// \note This is intended for testing only.
llvm::StringMap<ProgramPoint> getTestPoints() const;
+ unsigned getNumFacts() const { return NextFactID.Value; }
+
LoanManager &getLoanMgr() { return LoanMgr; }
const LoanManager &getLoanMgr() const { return LoanMgr; }
OriginManager &getOriginMgr() { return OriginMgr; }
const OriginManager &getOriginMgr() const { return OriginMgr; }
private:
+ FactID NextFactID{0};
LoanManager LoanMgr;
OriginManager OriginMgr;
- llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>>
- BlockToFactsMap;
+ /// Facts for each CFG block, indexed by block ID.
+ llvm::SmallVector<llvm::SmallVector<const Fact *>> BlockToFacts;
llvm::BumpPtrAllocator FactAllocator;
};
} // namespace clang::lifetimes::internal
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
index ba138b078b37..56b9010f41fa 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
@@ -74,6 +74,8 @@ public:
OriginID getOrCreate(const ValueDecl &D);
+ unsigned getNumOrigins() const { return NextOriginID.Value; }
+
void dump(OriginID OID, llvm::raw_ostream &OS) const;
private:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 4388c09423a2..edff241a9873 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -3167,31 +3167,31 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def kordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
}
-let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in {
def kortestcqi : X86Builtin<"int(unsigned char, unsigned char)">;
def kortestzqi : X86Builtin<"int(unsigned char, unsigned char)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in {
def kortestchi : X86Builtin<"int(unsigned short, unsigned short)">;
def kortestzhi : X86Builtin<"int(unsigned short, unsigned short)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def kortestcsi : X86Builtin<"int(unsigned int, unsigned int)">;
def kortestzsi : X86Builtin<"int(unsigned int, unsigned int)">;
def kortestcdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
def kortestzdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
}
-let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in {
def ktestcqi : X86Builtin<"int(unsigned char, unsigned char)">;
def ktestzqi : X86Builtin<"int(unsigned char, unsigned char)">;
def ktestchi : X86Builtin<"int(unsigned short, unsigned short)">;
def ktestzhi : X86Builtin<"int(unsigned short, unsigned short)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def ktestcsi : X86Builtin<"int(unsigned int, unsigned int)">;
def ktestzsi : X86Builtin<"int(unsigned int, unsigned int)">;
def ktestcdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 83980e3ac35b..afd44a110bc7 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -312,6 +312,8 @@ def warn_drv_yc_multiple_inputs_clang_cl : Warning<
def warn_drv_potentially_misspelled_joined_argument : Warning<
"joined argument treated as '%0'; did you mean '%1'?">, InGroup<UnknownArgument>;
+def err_drv_too_many_actions: Error<
+ "only one action option is allowed. Got %0">;
def err_drv_invalid_value : Error<"invalid value '%1' in '%0'">;
def err_drv_invalid_int_value : Error<"invalid integral value '%1' in '%0'">;
def err_drv_invalid_value_with_suggestion : Error<
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f43707e51335..04f2e8d654fd 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8116,6 +8116,17 @@ def ext_gnu_ptr_func_arith : Extension<
"arithmetic on%select{ a|}0 pointer%select{|s}0 to%select{ the|}2 function "
"type%select{|s}2 %1%select{| and %3}2 is a GNU extension">,
InGroup<GNUPointerArith>;
+def ext_gnu_counted_by_void_ptr
+ : Extension<
+ "'%select{counted_by|sized_by|counted_by_or_null|sized_by_or_null}0' "
+ "on a pointer to void is a GNU extension, treated as "
+ "'%select{sized_by|sized_by|sized_by_or_null|sized_by_or_null}0'">,
+ InGroup<GNUPointerArith>;
+def note_gnu_counted_by_void_ptr_use_sized_by
+ : Note<"use "
+ "'%select{__sized_by|__sized_by|__sized_by_or_null|__sized_by_or_"
+ "null}0' "
+ "to suppress this warning">;
def err_readonly_message_assignment : Error<
"assigning to 'readonly' return result of an Objective-C message not allowed">;
def ext_c2y_increment_complex : Extension<
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index ef196103035e..e91d7ce975d3 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -1466,26 +1466,51 @@ def SCALAR_UCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "SUl">;
////////////////////////////////////////////////////////////////////////////////
// Scalar Floating-point Converts
def SCALAR_FCVTXN : IInst<"vcvtx_f32", "(1F<)(1!)", "Sd">;
-def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "(1S)1", "Sf">;
-def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "(1U)1", "Sf">;
-def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "(1S)1", "Sd">;
-def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "(1U)1", "Sd">;
-def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "(1S)1", "Sf">;
-def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "(1U)1", "Sf">;
-def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "(1S)1", "Sd">;
-def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "(1U)1", "Sd">;
-def SCALAR_FCVTASS : SInst<"vcvta_s32", "(1S)1", "Sf">;
-def SCALAR_FCVTAUS : SInst<"vcvta_u32", "(1U)1", "Sf">;
-def SCALAR_FCVTASD : SInst<"vcvta_s64", "(1S)1", "Sd">;
-def SCALAR_FCVTAUD : SInst<"vcvta_u64", "(1U)1", "Sd">;
-def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "(1S)1", "Sf">;
-def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "(1U)1", "Sf">;
-def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "(1S)1", "Sd">;
-def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "(1U)1", "Sd">;
-def SCALAR_FCVTZSS : SInst<"vcvt_s32", "(1S)1", "Sf">;
-def SCALAR_FCVTZUS : SInst<"vcvt_u32", "(1U)1", "Sf">;
-def SCALAR_FCVTZSD : SInst<"vcvt_s64", "(1S)1", "Sd">;
-def SCALAR_FCVTZUD : SInst<"vcvt_u64", "(1U)1", "Sd">;
+
+def SCALAR_FCVTN_F32toSS : SInst<"vcvtn_s32", "(1S)1", "Sf">;
+def SCALAR_FCVTN_F32toUS : SInst<"vcvtn_u32", "(1U)1", "Sf">;
+def SCALAR_FCVTN_F64toSS : SInst<"vcvtn_s32", "(1S<)1", "Sd">;
+def SCALAR_FCVTN_F64toUS : SInst<"vcvtn_u32", "(1U<)1", "Sd">;
+def SCALAR_FCVTN_F32toSD : SInst<"vcvtn_s64", "(1S>)1", "Sf">;
+def SCALAR_FCVTN_F32toUD : SInst<"vcvtn_u64", "(1U>)1", "Sf">;
+def SCALAR_FCVTN_F64toSD : SInst<"vcvtn_s64", "(1S)1", "Sd">;
+def SCALAR_FCVTN_F64toUD : SInst<"vcvtn_u64", "(1U)1", "Sd">;
+
+def SCALAR_FCVTM_F32toSS : SInst<"vcvtm_s32", "(1S)1", "Sf">;
+def SCALAR_FCVTM_F32toUS : SInst<"vcvtm_u32", "(1U)1", "Sf">;
+def SCALAR_FCVTM_F64toSS : SInst<"vcvtm_s32", "(1S<)1", "Sd">;
+def SCALAR_FCVTM_F64toUS : SInst<"vcvtm_u32", "(1U<)1", "Sd">;
+def SCALAR_FCVTM_F32toSD : SInst<"vcvtm_s64", "(1S>)1", "Sf">;
+def SCALAR_FCVTM_F32toUD : SInst<"vcvtm_u64", "(1U>)1", "Sf">;
+def SCALAR_FCVTM_F64toSD : SInst<"vcvtm_s64", "(1S)1", "Sd">;
+def SCALAR_FCVTM_F64toUD : SInst<"vcvtm_u64", "(1U)1", "Sd">;
+
+def SCALAR_FCVTA_F32toSS : SInst<"vcvta_s32", "(1S)1", "Sf">;
+def SCALAR_FCVTA_F32toUS : SInst<"vcvta_u32", "(1U)1", "Sf">;
+def SCALAR_FCVTA_F64toSS : SInst<"vcvta_s32", "(1S<)1", "Sd">;
+def SCALAR_FCVTA_F64toUS : SInst<"vcvta_u32", "(1U<)1", "Sd">;
+def SCALAR_FCVTA_F32toSD : SInst<"vcvta_s64", "(1S>)1", "Sf">;
+def SCALAR_FCVTA_F32toUD : SInst<"vcvta_u64", "(1U>)1", "Sf">;
+def SCALAR_FCVTA_F64toSD : SInst<"vcvta_s64", "(1S)1", "Sd">;
+def SCALAR_FCVTA_F64toUD : SInst<"vcvta_u64", "(1U)1", "Sd">;
+
+def SCALAR_FCVTP_F32toSS : SInst<"vcvtp_s32", "(1S)1", "Sf">;
+def SCALAR_FCVTP_F32toUS : SInst<"vcvtp_u32", "(1U)1", "Sf">;
+def SCALAR_FCVTP_F64toSS : SInst<"vcvtp_s32", "(1S<)1", "Sd">;
+def SCALAR_FCVTP_F64toUS : SInst<"vcvtp_u32", "(1U<)1", "Sd">;
+def SCALAR_FCVTP_F32toSD : SInst<"vcvtp_s64", "(1S>)1", "Sf">;
+def SCALAR_FCVTP_F32toUD : SInst<"vcvtp_u64", "(1U>)1", "Sf">;
+def SCALAR_FCVTP_F64toSD : SInst<"vcvtp_s64", "(1S)1", "Sd">;
+def SCALAR_FCVTP_F64toUD : SInst<"vcvtp_u64", "(1U)1", "Sd">;
+
+def SCALAR_FCVTZ_F32toSS : SInst<"vcvt_s32", "(1S)1", "Sf">;
+def SCALAR_FCVTZ_F32toUS : SInst<"vcvt_u32", "(1U)1", "Sf">;
+def SCALAR_FCVTZ_F64toSS : SInst<"vcvt_s32", "(1S<)1", "Sd">;
+def SCALAR_FCVTZ_F64toUS : SInst<"vcvt_u32", "(1U<)1", "Sd">;
+def SCALAR_FCVTZ_F32toSD : SInst<"vcvt_s64", "(1S>)1", "Sf">;
+def SCALAR_FCVTZ_F32toUD : SInst<"vcvt_u64", "(1U>)1", "Sf">;
+def SCALAR_FCVTZ_F64toSD : SInst<"vcvt_s64", "(1S)1", "Sd">;
+def SCALAR_FCVTZ_F64toUD : SInst<"vcvt_u64", "(1U)1", "Sd">;
////////////////////////////////////////////////////////////////////////////////
// Scalar Floating-point Reciprocal Estimate
@@ -1896,6 +1921,14 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>;
}
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "f8f16mm,neon" in {
+ def VMMLA_F16_MF8 : VInst<"vmmla_f16_mf8_fpm", "(>F)(>F)..V", "Qm">;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "f8f32mm,neon" in {
+ def VMMLA_F32_MF8 : VInst<"vmmla_f32_mf8_fpm", "(>>F)(>>F)..V", "Qm">;
+}
+
let TargetGuard = "i8mm,neon" in {
def VMMLA : SInst<"vmmla", "..(<<)(<<)", "QUiQi">;
def VUSMMLA : SInst<"vusmmla", "..(<<U)(<<)", "Qi">;
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index f246defc1fe8..b6f124f948b5 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -601,9 +601,19 @@ struct FormatStyle {
/// int abcdef; // but this isn't
/// \endcode
unsigned OverEmptyLines;
+ /// If comments following preprocessor directive should be aligned with
+ /// comments that don't.
+ /// \code
+ /// true: false:
+ /// #define A // Comment vs. #define A // Comment
+ /// #define AB // Aligned #define AB // Aligned
+ /// int i; // Aligned int i; // Not aligned
+ /// \endcode
+ bool AlignPPAndNotPP;
bool operator==(const TrailingCommentsAlignmentStyle &R) const {
- return Kind == R.Kind && OverEmptyLines == R.OverEmptyLines;
+ return Kind == R.Kind && OverEmptyLines == R.OverEmptyLines &&
+ AlignPPAndNotPP == R.AlignPPAndNotPP;
}
bool operator!=(const TrailingCommentsAlignmentStyle &R) const {
return !(*this == R);
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index eb8d7d111201..0470645a9e7a 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11309,9 +11309,6 @@ public:
InventedParameterInfos.end());
}
- /// The number of SFINAE diagnostics that have been trapped.
- unsigned NumSFINAEErrors;
-
ArrayRef<sema::FunctionScopeInfo *> getFunctionScopes() const {
return llvm::ArrayRef(FunctionScopes.begin() + FunctionScopesStart,
FunctionScopes.end());
@@ -12385,49 +12382,65 @@ public:
///@{
public:
- /// When true, access checking violations are treated as SFINAE
- /// failures rather than hard errors.
- bool AccessCheckingSFINAE;
+ class SFINAETrap;
+
+ struct SFINAEContextBase {
+ SFINAEContextBase(Sema &S, SFINAETrap *Cur)
+ : S(S), Prev(std::exchange(S.CurrentSFINAEContext, Cur)) {}
+
+ protected:
+ Sema &S;
+ ~SFINAEContextBase() { S.CurrentSFINAEContext = Prev; }
+
+ private:
+ SFINAETrap *Prev;
+ };
+
+ struct NonSFINAEContext : SFINAEContextBase {
+ NonSFINAEContext(Sema &S) : SFINAEContextBase(S, nullptr) {}
+ };
/// RAII class used to determine whether SFINAE has
/// trapped any errors that occur during template argument
/// deduction.
- class SFINAETrap {
- Sema &SemaRef;
- unsigned PrevSFINAEErrors;
- bool PrevInNonInstantiationSFINAEContext;
- bool PrevAccessCheckingSFINAE;
- bool PrevLastDiagnosticIgnored;
+ class SFINAETrap : SFINAEContextBase {
+ bool HasErrorOcurred = false;
+ bool WithAccessChecking = false;
+ bool PrevLastDiagnosticIgnored =
+ S.getDiagnostics().isLastDiagnosticIgnored();
+ sema::TemplateDeductionInfo *DeductionInfo = nullptr;
+
+ SFINAETrap(Sema &S, sema::TemplateDeductionInfo *Info,
+ bool WithAccessChecking)
+ : SFINAEContextBase(S, this), WithAccessChecking(WithAccessChecking),
+ DeductionInfo(Info) {}
public:
- /// \param ForValidityCheck If true, discard all diagnostics (from the
+ /// \param WithAccessChecking If true, discard all diagnostics (from the
/// immediate context) instead of adding them to the currently active
- /// \ref TemplateDeductionInfo (as returned by \ref isSFINAEContext).
- explicit SFINAETrap(Sema &SemaRef, bool ForValidityCheck = false)
- : SemaRef(SemaRef), PrevSFINAEErrors(SemaRef.NumSFINAEErrors),
- PrevInNonInstantiationSFINAEContext(
- SemaRef.InNonInstantiationSFINAEContext),
- PrevAccessCheckingSFINAE(SemaRef.AccessCheckingSFINAE),
- PrevLastDiagnosticIgnored(
- SemaRef.getDiagnostics().isLastDiagnosticIgnored()) {
- if (ForValidityCheck || !SemaRef.isSFINAEContext())
- SemaRef.InNonInstantiationSFINAEContext = true;
- SemaRef.AccessCheckingSFINAE = ForValidityCheck;
- }
+ /// \ref TemplateDeductionInfo.
+ explicit SFINAETrap(Sema &S, bool WithAccessChecking = false)
+ : SFINAETrap(S, /*Info=*/nullptr, WithAccessChecking) {}
+
+ SFINAETrap(Sema &S, sema::TemplateDeductionInfo &Info)
+ : SFINAETrap(S, &Info, /*WithAccessChecking=*/false) {}
~SFINAETrap() {
- SemaRef.NumSFINAEErrors = PrevSFINAEErrors;
- SemaRef.InNonInstantiationSFINAEContext =
- PrevInNonInstantiationSFINAEContext;
- SemaRef.AccessCheckingSFINAE = PrevAccessCheckingSFINAE;
- SemaRef.getDiagnostics().setLastDiagnosticIgnored(
- PrevLastDiagnosticIgnored);
+ S.getDiagnostics().setLastDiagnosticIgnored(PrevLastDiagnosticIgnored);
}
- /// Determine whether any SFINAE errors have been trapped.
- bool hasErrorOccurred() const {
- return SemaRef.NumSFINAEErrors > PrevSFINAEErrors;
+ SFINAETrap(const SFINAETrap &) = delete;
+ SFINAETrap &operator=(const SFINAETrap &) = delete;
+
+ sema::TemplateDeductionInfo *getDeductionInfo() const {
+ return DeductionInfo;
}
+
+ /// Determine whether any SFINAE errors have been trapped.
+ bool hasErrorOccurred() const { return HasErrorOcurred; }
+ void setErrorOccurred() { HasErrorOcurred = true; }
+
+ bool withAccessChecking() const { return WithAccessChecking; }
};
/// RAII class used to indicate that we are performing provisional
@@ -13148,9 +13161,6 @@ public:
PartialOrderingTTP,
} Kind;
- /// Was the enclosing context a non-instantiation SFINAE context?
- bool SavedInNonInstantiationSFINAEContext;
-
/// Whether we're substituting into constraints.
bool InConstraintSubstitution;
@@ -13195,22 +13205,15 @@ public:
return {TemplateArgs, NumTemplateArgs};
}
- /// The template deduction info object associated with the
- /// substitution or checking of explicit or deduced template arguments.
- sema::TemplateDeductionInfo *DeductionInfo;
-
/// The source range that covers the construct that cause
/// the instantiation, e.g., the template-id that causes a class
/// template instantiation.
SourceRange InstantiationRange;
CodeSynthesisContext()
- : Kind(TemplateInstantiation),
- SavedInNonInstantiationSFINAEContext(false),
- InConstraintSubstitution(false),
+ : Kind(TemplateInstantiation), InConstraintSubstitution(false),
InParameterMappingSubstitution(false), Entity(nullptr),
- Template(nullptr), TemplateArgs(nullptr), NumTemplateArgs(0),
- DeductionInfo(nullptr) {}
+ Template(nullptr), TemplateArgs(nullptr), NumTemplateArgs(0) {}
/// Determines whether this template is an actual instantiation
/// that should be counted toward the maximum instantiation depth.
@@ -13262,7 +13265,6 @@ public:
FunctionTemplateDecl *FunctionTemplate,
ArrayRef<TemplateArgument> TemplateArgs,
CodeSynthesisContext::SynthesisKind Kind,
- sema::TemplateDeductionInfo &DeductionInfo,
SourceRange InstantiationRange = SourceRange());
/// Note that we are instantiating as part of template
@@ -13270,7 +13272,6 @@ public:
InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation,
TemplateDecl *Template,
ArrayRef<TemplateArgument> TemplateArgs,
- sema::TemplateDeductionInfo &DeductionInfo,
SourceRange InstantiationRange = SourceRange());
/// Note that we are instantiating as part of template
@@ -13279,7 +13280,6 @@ public:
InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation,
ClassTemplatePartialSpecializationDecl *PartialSpec,
ArrayRef<TemplateArgument> TemplateArgs,
- sema::TemplateDeductionInfo &DeductionInfo,
SourceRange InstantiationRange = SourceRange());
/// Note that we are instantiating as part of template
@@ -13288,7 +13288,6 @@ public:
InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation,
VarTemplatePartialSpecializationDecl *PartialSpec,
ArrayRef<TemplateArgument> TemplateArgs,
- sema::TemplateDeductionInfo &DeductionInfo,
SourceRange InstantiationRange = SourceRange());
/// Note that we are instantiating a default argument for a function
@@ -13334,7 +13333,6 @@ public:
/// concept.
InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation,
ConstraintSubstitution, NamedDecl *Template,
- sema::TemplateDeductionInfo &DeductionInfo,
SourceRange InstantiationRange);
struct ConstraintNormalization {};
@@ -13354,7 +13352,6 @@ public:
/// a requirement of a requires expression.
InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation,
concepts::Requirement *Req,
- sema::TemplateDeductionInfo &DeductionInfo,
SourceRange InstantiationRange = SourceRange());
/// \brief Note that we are checking the satisfaction of the constraint
@@ -13366,7 +13363,6 @@ public:
/// \brief Note that we are checking a requires clause.
InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation,
const RequiresExpr *E,
- sema::TemplateDeductionInfo &DeductionInfo,
SourceRange InstantiationRange);
struct BuildingDeductionGuidesTag {};
@@ -13399,8 +13395,7 @@ public:
SourceLocation PointOfInstantiation,
SourceRange InstantiationRange, Decl *Entity,
NamedDecl *Template = nullptr,
- ArrayRef<TemplateArgument> TemplateArgs = {},
- sema::TemplateDeductionInfo *DeductionInfo = nullptr);
+ ArrayRef<TemplateArgument> TemplateArgs = {});
InstantiatingTemplate(const InstantiatingTemplate &) = delete;
@@ -13541,12 +13536,7 @@ public:
/// recent visible declaration of that namespace.
llvm::DenseMap<NamedDecl *, NamedDecl *> VisibleNamespaceCache;
- /// Whether we are in a SFINAE context that is not associated with
- /// template instantiation.
- ///
- /// This is used when setting up a SFINAE trap (\c see SFINAETrap) outside
- /// of a template instantiation or template argument deduction.
- bool InNonInstantiationSFINAEContext;
+ SFINAETrap *CurrentSFINAEContext = nullptr;
/// The number of \p CodeSynthesisContexts that are not template
/// instantiations and, therefore, should not be counted as part of the
@@ -13617,15 +13607,13 @@ public:
PrintInstantiationStack(getDefaultDiagFunc());
}
- /// Determines whether we are currently in a context where
- /// template argument substitution failures are not considered
- /// errors.
- ///
- /// \returns An empty \c Optional if we're not in a SFINAE context.
- /// Otherwise, contains a pointer that, if non-NULL, contains the nearest
- /// template-deduction context object, which can be used to capture
- /// diagnostics that will be suppressed.
- std::optional<sema::TemplateDeductionInfo *> isSFINAEContext() const;
+ /// Returns a pointer to the current SFINAE context, if any.
+ [[nodiscard]] SFINAETrap *getSFINAEContext() const {
+ return CurrentSFINAEContext;
+ }
+ [[nodiscard]] bool isSFINAEContext() const {
+ return CurrentSFINAEContext != nullptr;
+ }
/// Perform substitution on the type T with a given set of template
/// arguments.
@@ -14637,7 +14625,8 @@ public:
ArrayRef<UnexpandedParameterPack> Unexpanded,
const MultiLevelTemplateArgumentList &TemplateArgs,
bool FailOnPackProducingTemplates, bool &ShouldExpand,
- bool &RetainExpansion, UnsignedOrNone &NumExpansions);
+ bool &RetainExpansion, UnsignedOrNone &NumExpansions,
+ bool Diagnose = true);
/// Determine the number of arguments in the given pack expansion
/// type.
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 84f7e6287609..7353cbce5558 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -4175,7 +4175,7 @@ bool Compiler<Emitter>::VisitStmtExpr(const StmtExpr *E) {
StmtExprScope<Emitter> SS(this);
const CompoundStmt *CS = E->getSubStmt();
- const Stmt *Result = CS->getStmtExprResult();
+ const Stmt *Result = CS->body_back();
for (const Stmt *S : CS->body()) {
if (S != Result) {
if (!this->visitStmt(S))
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9991e365addb..0ef130c0a55d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3831,6 +3831,42 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return Result;
});
+ case clang::X86::BI__builtin_ia32_ktestcqi:
+ case clang::X86::BI__builtin_ia32_ktestchi:
+ case clang::X86::BI__builtin_ia32_ktestcsi:
+ case clang::X86::BI__builtin_ia32_ktestcdi:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
+ return APInt(sizeof(unsigned char) * 8, (~A & B) == 0);
+ });
+
+ case clang::X86::BI__builtin_ia32_ktestzqi:
+ case clang::X86::BI__builtin_ia32_ktestzhi:
+ case clang::X86::BI__builtin_ia32_ktestzsi:
+ case clang::X86::BI__builtin_ia32_ktestzdi:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
+ return APInt(sizeof(unsigned char) * 8, (A & B) == 0);
+ });
+
+ case clang::X86::BI__builtin_ia32_kortestcqi:
+ case clang::X86::BI__builtin_ia32_kortestchi:
+ case clang::X86::BI__builtin_ia32_kortestcsi:
+ case clang::X86::BI__builtin_ia32_kortestcdi:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
+ return APInt(sizeof(unsigned char) * 8, ~(A | B) == 0);
+ });
+
+ case clang::X86::BI__builtin_ia32_kortestzqi:
+ case clang::X86::BI__builtin_ia32_kortestzhi:
+ case clang::X86::BI__builtin_ia32_kortestzsi:
+ case clang::X86::BI__builtin_ia32_kortestzdi:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
+ return APInt(sizeof(unsigned char) * 8, (A | B) == 0);
+ });
+
case clang::X86::BI__builtin_ia32_lzcnt_u16:
case clang::X86::BI__builtin_ia32_lzcnt_u32:
case clang::X86::BI__builtin_ia32_lzcnt_u64:
diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp
index 4d34e0b0a9b4..c468303efea7 100644
--- a/clang/lib/AST/ByteCode/Program.cpp
+++ b/clang/lib/AST/ByteCode/Program.cpp
@@ -197,7 +197,8 @@ UnsignedOrNone Program::createGlobal(const ValueDecl *VD, const Expr *Init) {
// global variable and points to the block we just created.
if (auto DummyIt = DummyVariables.find(Redecl);
DummyIt != DummyVariables.end()) {
- assert(!Globals[DummyIt->second]->block()->hasPointers());
+ Global *Dummy = Globals[DummyIt->second];
+ Dummy->block()->movePointersTo(NewGlobal->block());
Globals[DummyIt->second] = NewGlobal;
DummyVariables.erase(DummyIt);
}
diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp
index e0cf0deb12bd..638080ea781a 100644
--- a/clang/lib/AST/ComputeDependence.cpp
+++ b/clang/lib/AST/ComputeDependence.cpp
@@ -178,7 +178,7 @@ ExprDependence clang::computeDependence(StmtExpr *E, unsigned TemplateDepth) {
auto D = toExprDependenceForImpliedType(E->getType()->getDependence());
// Propagate dependence of the result.
if (const auto *CompoundExprResult =
- dyn_cast_or_null<ValueStmt>(E->getSubStmt()->getStmtExprResult()))
+ dyn_cast_or_null<ValueStmt>(E->getSubStmt()->body_back()))
if (const Expr *ResultExpr = CompoundExprResult->getExprStmt())
D |= ResultExpr->getDependence();
// Note: we treat a statement-expression in a dependent context as always
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 4f6f52bf839e..972d9fe3b5e4 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16244,6 +16244,54 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(Val, E);
}
+ case clang::X86::BI__builtin_ia32_ktestcqi:
+ case clang::X86::BI__builtin_ia32_ktestchi:
+ case clang::X86::BI__builtin_ia32_ktestcsi:
+ case clang::X86::BI__builtin_ia32_ktestcdi: {
+ APSInt A, B;
+ if (!EvaluateInteger(E->getArg(0), A, Info) ||
+ !EvaluateInteger(E->getArg(1), B, Info))
+ return false;
+
+ return Success((~A & B) == 0, E);
+ }
+
+ case clang::X86::BI__builtin_ia32_ktestzqi:
+ case clang::X86::BI__builtin_ia32_ktestzhi:
+ case clang::X86::BI__builtin_ia32_ktestzsi:
+ case clang::X86::BI__builtin_ia32_ktestzdi: {
+ APSInt A, B;
+ if (!EvaluateInteger(E->getArg(0), A, Info) ||
+ !EvaluateInteger(E->getArg(1), B, Info))
+ return false;
+
+ return Success((A & B) == 0, E);
+ }
+
+ case clang::X86::BI__builtin_ia32_kortestcqi:
+ case clang::X86::BI__builtin_ia32_kortestchi:
+ case clang::X86::BI__builtin_ia32_kortestcsi:
+ case clang::X86::BI__builtin_ia32_kortestcdi: {
+ APSInt A, B;
+ if (!EvaluateInteger(E->getArg(0), A, Info) ||
+ !EvaluateInteger(E->getArg(1), B, Info))
+ return false;
+
+ return Success(~(A | B) == 0, E);
+ }
+
+ case clang::X86::BI__builtin_ia32_kortestzqi:
+ case clang::X86::BI__builtin_ia32_kortestzhi:
+ case clang::X86::BI__builtin_ia32_kortestzsi:
+ case clang::X86::BI__builtin_ia32_kortestzdi: {
+ APSInt A, B;
+ if (!EvaluateInteger(E->getArg(0), A, Info) ||
+ !EvaluateInteger(E->getArg(1), B, Info))
+ return false;
+
+ return Success((A | B) == 0, E);
+ }
+
case clang::X86::BI__builtin_ia32_lzcnt_u16:
case clang::X86::BI__builtin_ia32_lzcnt_u32:
case clang::X86::BI__builtin_ia32_lzcnt_u64: {
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index 9f4dba9f14fa..89abf888cbbb 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -272,15 +272,13 @@ void JSONNodeDumper::writeIncludeStack(PresumedLoc Loc, bool JustFirst) {
JOS.attributeEnd();
}
-void JSONNodeDumper::writeBareSourceLocation(SourceLocation Loc,
- bool IsSpelling) {
+void JSONNodeDumper::writeBareSourceLocation(SourceLocation Loc) {
PresumedLoc Presumed = SM.getPresumedLoc(Loc);
- unsigned ActualLine = IsSpelling ? SM.getSpellingLineNumber(Loc)
- : SM.getExpansionLineNumber(Loc);
- StringRef ActualFile = SM.getBufferName(Loc);
-
if (Presumed.isValid()) {
- JOS.attribute("offset", SM.getDecomposedLoc(Loc).second);
+ StringRef ActualFile = SM.getBufferName(Loc);
+ auto [FID, FilePos] = SM.getDecomposedLoc(Loc);
+ unsigned ActualLine = SM.getLineNumber(FID, FilePos);
+ JOS.attribute("offset", FilePos);
if (LastLocFilename != ActualFile) {
JOS.attribute("file", ActualFile);
JOS.attribute("line", ActualLine);
@@ -318,18 +316,17 @@ void JSONNodeDumper::writeSourceLocation(SourceLocation Loc) {
if (Expansion != Spelling) {
// If the expansion and the spelling are different, output subobjects
// describing both locations.
- JOS.attributeObject("spellingLoc", [Spelling, this] {
- writeBareSourceLocation(Spelling, /*IsSpelling*/ true);
- });
+ JOS.attributeObject(
+ "spellingLoc", [Spelling, this] { writeBareSourceLocation(Spelling); });
JOS.attributeObject("expansionLoc", [Expansion, Loc, this] {
- writeBareSourceLocation(Expansion, /*IsSpelling*/ false);
+ writeBareSourceLocation(Expansion);
// If there is a macro expansion, add extra information if the interesting
// bit is the macro arg expansion.
if (SM.isMacroArgExpansion(Loc))
JOS.attribute("isMacroArgExpansion", true);
});
} else
- writeBareSourceLocation(Spelling, /*IsSpelling*/ true);
+ writeBareSourceLocation(Spelling);
}
void JSONNodeDumper::writeSourceRange(SourceRange R) {
diff --git a/clang/lib/Analysis/LifetimeSafety/Dataflow.h b/clang/lib/Analysis/LifetimeSafety/Dataflow.h
index 2f7bcb6e5dc8..de821bb17eb6 100644
--- a/clang/lib/Analysis/LifetimeSafety/Dataflow.h
+++ b/clang/lib/Analysis/LifetimeSafety/Dataflow.h
@@ -67,10 +67,10 @@ private:
llvm::DenseMap<const CFGBlock *, Lattice> InStates;
/// The dataflow state after a basic block is processed.
llvm::DenseMap<const CFGBlock *, Lattice> OutStates;
- /// The dataflow state at a Program Point.
+ /// Dataflow state at each program point, indexed by Fact ID.
/// In a forward analysis, this is the state after the Fact at that point has
/// been applied, while in a backward analysis, it is the state before.
- llvm::DenseMap<ProgramPoint, Lattice> PerPointStates;
+ llvm::SmallVector<Lattice> PointToState;
static constexpr bool isForward() { return Dir == Direction::Forward; }
@@ -86,6 +86,8 @@ public:
Derived &D = static_cast<Derived &>(*this);
llvm::TimeTraceScope Time(D.getAnalysisName());
+ PointToState.resize(FactMgr.getNumFacts());
+
using Worklist =
std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist,
BackwardDataflowWorklist>;
@@ -116,7 +118,9 @@ public:
}
protected:
- Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); }
+ Lattice getState(ProgramPoint P) const {
+ return PointToState[P->getID().Value];
+ }
std::optional<Lattice> getInState(const CFGBlock *B) const {
auto It = InStates.find(B);
@@ -144,12 +148,12 @@ private:
if constexpr (isForward()) {
for (const Fact *F : Facts) {
State = transferFact(State, F);
- PerPointStates[F] = State;
+ PointToState[F->getID().Value] = State;
}
} else {
for (const Fact *F : llvm::reverse(Facts)) {
// In backward analysis, capture the state before applying the fact.
- PerPointStates[F] = State;
+ PointToState[F->getID().Value] = State;
State = transferFact(State, F);
}
}
diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp
index 1aea64f86436..4a4172fe55bf 100644
--- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp
@@ -53,7 +53,7 @@ void ReturnOfOriginFact::dump(llvm::raw_ostream &OS, const LoanManager &,
void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &,
const OriginManager &OM) const {
OS << "Use (";
- OM.dump(getUsedOrigin(OM), OS);
+ OM.dump(getUsedOrigin(), OS);
OS << ", " << (isWritten() ? "Write" : "Read") << ")\n";
}
@@ -64,8 +64,8 @@ void TestPointFact::dump(llvm::raw_ostream &OS, const LoanManager &,
llvm::StringMap<ProgramPoint> FactManager::getTestPoints() const {
llvm::StringMap<ProgramPoint> AnnotationToPointMap;
- for (const CFGBlock *Block : BlockToFactsMap.keys()) {
- for (const Fact *F : getFacts(Block)) {
+ for (const auto &BlockFacts : BlockToFacts) {
+ for (const Fact *F : BlockFacts) {
if (const auto *TPF = F->getAs<TestPointFact>()) {
StringRef PointName = TPF->getAnnotation();
assert(AnnotationToPointMap.find(PointName) ==
@@ -88,12 +88,9 @@ void FactManager::dump(const CFG &Cfg, AnalysisDeclContext &AC) const {
// Print blocks in the order as they appear in code for a stable ordering.
for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) {
llvm::dbgs() << " Block B" << B->getBlockID() << ":\n";
- auto It = BlockToFactsMap.find(B);
- if (It != BlockToFactsMap.end()) {
- for (const Fact *F : It->second) {
- llvm::dbgs() << " ";
- F->dump(llvm::dbgs(), LoanMgr, OriginMgr);
- }
+ for (const Fact *F : getFacts(B)) {
+ llvm::dbgs() << " ";
+ F->dump(llvm::dbgs(), LoanMgr, OriginMgr);
}
llvm::dbgs() << " End of Block\n";
}
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index 9b68de107e31..bec8e1dabb0b 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -333,7 +333,7 @@ void FactsGenerator::handleAssignment(const Expr *LHSExpr,
// (e.g. on the left-hand side of an assignment).
void FactsGenerator::handleUse(const DeclRefExpr *DRE) {
if (isPointerType(DRE->getType())) {
- UseFact *UF = FactMgr.createFact<UseFact>(DRE);
+ UseFact *UF = FactMgr.createFact<UseFact>(DRE, FactMgr.getOriginMgr());
CurrentBlockFacts.push_back(UF);
assert(!UseFacts.contains(DRE));
UseFacts[DRE] = UF;
diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp
index 00c7ed90503e..a51ba4280f28 100644
--- a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp
@@ -41,6 +41,7 @@ void LifetimeSafetyAnalysis::run() {
const CFG &Cfg = *AC.getCFG();
DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(),
/*ShowColors=*/true));
+ FactMgr.init(Cfg);
FactsGenerator FactGen(FactMgr, AC);
FactGen.run();
diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
index cddb3f3ce4c1..59f594e50fb4 100644
--- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
@@ -111,7 +111,7 @@ public:
/// dominates this program point. A write operation kills the liveness of
/// the origin since it overwrites the value.
Lattice transfer(Lattice In, const UseFact &UF) {
- OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr());
+ OriginID OID = UF.getUsedOrigin();
// Write kills liveness.
if (UF.isWritten())
return Lattice(Factory.remove(In.LiveOrigins, OID));
diff --git a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp
index 387097e705f9..0e6c194123df 100644
--- a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp
@@ -5,36 +5,114 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h"
-#include "Dataflow.h"
+#include <cassert>
#include <memory>
+#include "Dataflow.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
+
namespace clang::lifetimes::internal {
+
+// Prepass to find persistent origins. An origin is persistent if it is
+// referenced in more than one basic block.
+static llvm::BitVector computePersistentOrigins(const FactManager &FactMgr,
+ const CFG &C) {
+ llvm::TimeTraceScope("ComputePersistentOrigins");
+ unsigned NumOrigins = FactMgr.getOriginMgr().getNumOrigins();
+ llvm::BitVector PersistentOrigins(NumOrigins);
+
+ llvm::SmallVector<const CFGBlock *> OriginToFirstSeenBlock(NumOrigins,
+ nullptr);
+ for (const CFGBlock *B : C) {
+ for (const Fact *F : FactMgr.getFacts(B)) {
+ auto CheckOrigin = [&](OriginID OID) {
+ if (PersistentOrigins.test(OID.Value))
+ return;
+ auto &FirstSeenBlock = OriginToFirstSeenBlock[OID.Value];
+ if (FirstSeenBlock == nullptr)
+ FirstSeenBlock = B;
+ if (FirstSeenBlock != B) {
+ // We saw this origin in more than one block.
+ PersistentOrigins.set(OID.Value);
+ }
+ };
+
+ switch (F->getKind()) {
+ case Fact::Kind::Issue:
+ CheckOrigin(F->getAs<IssueFact>()->getOriginID());
+ break;
+ case Fact::Kind::OriginFlow: {
+ const auto *OF = F->getAs<OriginFlowFact>();
+ CheckOrigin(OF->getDestOriginID());
+ CheckOrigin(OF->getSrcOriginID());
+ break;
+ }
+ case Fact::Kind::ReturnOfOrigin:
+ CheckOrigin(F->getAs<ReturnOfOriginFact>()->getReturnedOriginID());
+ break;
+ case Fact::Kind::Use:
+ CheckOrigin(F->getAs<UseFact>()->getUsedOrigin());
+ break;
+ case Fact::Kind::Expire:
+ case Fact::Kind::TestPoint:
+ break;
+ }
+ }
+ }
+ return PersistentOrigins;
+}
+
namespace {
+
/// Represents the dataflow lattice for loan propagation.
///
/// This lattice tracks which loans each origin may hold at a given program
/// point.The lattice has a finite height: An origin's loan set is bounded by
/// the total number of loans in the function.
-/// TODO(opt): To reduce the lattice size, propagate origins of declarations,
-/// not expressions, because expressions are not visible across blocks.
struct Lattice {
/// The map from an origin to the set of loans it contains.
- OriginLoanMap Origins = OriginLoanMap(nullptr);
-
- explicit Lattice(const OriginLoanMap &S) : Origins(S) {}
+ /// Origins that appear in multiple blocks. Participates in join operations.
+ OriginLoanMap PersistentOrigins = OriginLoanMap(nullptr);
+ /// Origins confined to a single block. Discarded at block boundaries.
+ OriginLoanMap BlockLocalOrigins = OriginLoanMap(nullptr);
+
+ explicit Lattice(const OriginLoanMap &Persistent,
+ const OriginLoanMap &BlockLocal)
+ : PersistentOrigins(Persistent), BlockLocalOrigins(BlockLocal) {}
Lattice() = default;
bool operator==(const Lattice &Other) const {
- return Origins == Other.Origins;
+ return PersistentOrigins == Other.PersistentOrigins &&
+ BlockLocalOrigins == Other.BlockLocalOrigins;
}
bool operator!=(const Lattice &Other) const { return !(*this == Other); }
void dump(llvm::raw_ostream &OS) const {
OS << "LoanPropagationLattice State:\n";
- if (Origins.isEmpty())
+ OS << " Persistent Origins:\n";
+ if (PersistentOrigins.isEmpty())
OS << " <empty>\n";
- for (const auto &Entry : Origins) {
+ for (const auto &Entry : PersistentOrigins) {
+ if (Entry.second.isEmpty())
+ OS << " Origin " << Entry.first << " contains no loans\n";
+ for (const LoanID &LID : Entry.second)
+ OS << " Origin " << Entry.first << " contains Loan " << LID << "\n";
+ }
+ OS << " Block-Local Origins:\n";
+ if (BlockLocalOrigins.isEmpty())
+ OS << " <empty>\n";
+ for (const auto &Entry : BlockLocalOrigins) {
if (Entry.second.isEmpty())
OS << " Origin " << Entry.first << " contains no loans\n";
for (const LoanID &LID : Entry.second)
@@ -50,7 +128,8 @@ public:
OriginLoanMap::Factory &OriginLoanMapFactory,
LoanSet::Factory &LoanSetFactory)
: DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory),
- LoanSetFactory(LoanSetFactory) {}
+ LoanSetFactory(LoanSetFactory),
+ PersistentOrigins(computePersistentOrigins(F, C)) {}
using Base::transfer;
@@ -59,10 +138,10 @@ public:
Lattice getInitialState() { return Lattice{}; }
/// Merges two lattices by taking the union of loans for each origin.
- // TODO(opt): Keep the state small by removing origins which become dead.
+ /// Only persistent origins are joined; block-local origins are discarded.
Lattice join(Lattice A, Lattice B) {
OriginLoanMap JoinedOrigins = utils::join(
- A.Origins, B.Origins, OriginLoanMapFactory,
+ A.PersistentOrigins, B.PersistentOrigins, OriginLoanMapFactory,
[&](const LoanSet *S1, const LoanSet *S2) {
assert((S1 || S2) && "unexpectedly merging 2 empty sets");
if (!S1)
@@ -74,16 +153,15 @@ public:
// Asymmetric join is a performance win. For origins present only on one
// branch, the loan set can be carried over as-is.
utils::JoinKind::Asymmetric);
- return Lattice(JoinedOrigins);
+ return Lattice(JoinedOrigins, OriginLoanMapFactory.getEmptyMap());
}
/// A new loan is issued to the origin. Old loans are erased.
Lattice transfer(Lattice In, const IssueFact &F) {
OriginID OID = F.getOriginID();
LoanID LID = F.getLoanID();
- return Lattice(OriginLoanMapFactory.add(
- In.Origins, OID,
- LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID)));
+ LoanSet NewLoans = LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID);
+ return setLoans(In, OID, NewLoans);
}
/// A flow from source to destination. If `KillDest` is true, this replaces
@@ -98,7 +176,7 @@ public:
LoanSet SrcLoans = getLoans(In, SrcOID);
LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory);
- return Lattice(OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans));
+ return setLoans(In, DestOID, MergedLoans);
}
LoanSet getLoans(OriginID OID, ProgramPoint P) const {
@@ -106,14 +184,33 @@ public:
}
private:
+ /// Returns true if the origin is persistent (referenced in multiple blocks).
+ bool isPersistent(OriginID OID) const {
+ return PersistentOrigins.test(OID.Value);
+ }
+
+ Lattice setLoans(Lattice L, OriginID OID, LoanSet Loans) {
+ if (isPersistent(OID))
+ return Lattice(OriginLoanMapFactory.add(L.PersistentOrigins, OID, Loans),
+ L.BlockLocalOrigins);
+ return Lattice(L.PersistentOrigins,
+ OriginLoanMapFactory.add(L.BlockLocalOrigins, OID, Loans));
+ }
+
LoanSet getLoans(Lattice L, OriginID OID) const {
- if (auto *Loans = L.Origins.lookup(OID))
+ const OriginLoanMap *Map =
+ isPersistent(OID) ? &L.PersistentOrigins : &L.BlockLocalOrigins;
+ if (auto *Loans = Map->lookup(OID))
return *Loans;
return LoanSetFactory.getEmptySet();
}
OriginLoanMap::Factory &OriginLoanMapFactory;
LoanSet::Factory &LoanSetFactory;
+ /// Boolean vector indexed by origin ID. If true, the origin appears in
+ /// multiple basic blocks and must participate in join operations. If false,
+ /// the origin is block-local and can be discarded at block boundaries.
+ llvm::BitVector PersistentOrigins;
};
} // namespace
diff --git a/clang/lib/Basic/BuiltinTargetFeatures.h b/clang/lib/Basic/BuiltinTargetFeatures.h
index 9754acda2a68..bf227a17f786 100644
--- a/clang/lib/Basic/BuiltinTargetFeatures.h
+++ b/clang/lib/Basic/BuiltinTargetFeatures.h
@@ -20,7 +20,7 @@ using llvm::StringRef;
namespace clang {
namespace Builtin {
/// TargetFeatures - This class is used to check whether the builtin function
-/// has the required tagert specific features. It is able to support the
+/// has the required target specific features. It is able to support the
/// combination of ','(and), '|'(or), and '()'. By default, the priority of
/// ',' is higher than that of '|' .
/// E.g:
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index 1eb7199ce6df..7bb8c2153056 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -66,7 +66,7 @@ static mlir::LogicalResult emitStmtWithResult(CIRGenFunction &cgf,
mlir::LogicalResult CIRGenFunction::emitCompoundStmtWithoutScope(
const CompoundStmt &s, Address *lastValue, AggValueSlot slot) {
mlir::LogicalResult result = mlir::success();
- const Stmt *exprResult = s.getStmtExprResult();
+ const Stmt *exprResult = s.body_back();
assert((!lastValue || (lastValue && exprResult)) &&
"If lastValue is not null then the CompoundStmt must have a "
"StmtExprResult");
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index bbcee34b384c..0a2ea416e5e4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1211,14 +1211,10 @@ llvm::Value *CodeGenFunction::emitCountedByPointerSize(
getContext().getTypeSizeInChars(ElementTy->getPointeeType());
if (ElementSize.isZero()) {
- // This might be a __sized_by on a 'void *', which counts bytes, not
- // elements.
- auto *CAT = ElementTy->getAs<CountAttributedType>();
- if (!CAT || (CAT->getKind() != CountAttributedType::SizedBy &&
- CAT->getKind() != CountAttributedType::SizedByOrNull))
- // Okay, not sure what it is now.
- // FIXME: Should this be an assert?
- return std::optional<CharUnits>();
+ // This might be a __sized_by (or __counted_by) on a
+ // 'void *', which counts bytes, not elements.
+ [[maybe_unused]] auto *CAT = ElementTy->getAs<CountAttributedType>();
+ assert(CAT && "must have an CountAttributedType");
ElementSize = CharUnits::One();
}
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index fdc1a11f6c55..36be3295950b 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -582,48 +582,45 @@ CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S,
bool GetLast,
AggValueSlot AggSlot) {
- const Stmt *ExprResult = S.getStmtExprResult();
- assert((!GetLast || (GetLast && ExprResult)) &&
- "If GetLast is true then the CompoundStmt must have a StmtExprResult");
+ for (CompoundStmt::const_body_iterator I = S.body_begin(),
+ E = S.body_end() - GetLast;
+ I != E; ++I)
+ EmitStmt(*I);
Address RetAlloca = Address::invalid();
-
- for (auto *CurStmt : S.body()) {
- if (GetLast && ExprResult == CurStmt) {
- // We have to special case labels here. They are statements, but when put
- // at the end of a statement expression, they yield the value of their
- // subexpression. Handle this by walking through all labels we encounter,
- // emitting them before we evaluate the subexpr.
- // Similar issues arise for attributed statements.
- while (!isa<Expr>(ExprResult)) {
- if (const auto *LS = dyn_cast<LabelStmt>(ExprResult)) {
- EmitLabel(LS->getDecl());
- ExprResult = LS->getSubStmt();
- } else if (const auto *AS = dyn_cast<AttributedStmt>(ExprResult)) {
- // FIXME: Update this if we ever have attributes that affect the
- // semantics of an expression.
- ExprResult = AS->getSubStmt();
- } else {
- llvm_unreachable("unknown value statement");
- }
+ if (GetLast) {
+ // We have to special case labels here. They are statements, but when put
+ // at the end of a statement expression, they yield the value of their
+ // subexpression. Handle this by walking through all labels we encounter,
+ // emitting them before we evaluate the subexpr.
+ // Similar issues arise for attributed statements.
+ const Stmt *LastStmt = S.body_back();
+ while (!isa<Expr>(LastStmt)) {
+ if (const auto *LS = dyn_cast<LabelStmt>(LastStmt)) {
+ EmitLabel(LS->getDecl());
+ LastStmt = LS->getSubStmt();
+ } else if (const auto *AS = dyn_cast<AttributedStmt>(LastStmt)) {
+ // FIXME: Update this if we ever have attributes that affect the
+ // semantics of an expression.
+ LastStmt = AS->getSubStmt();
+ } else {
+ llvm_unreachable("unknown value statement");
}
+ }
- EnsureInsertPoint();
+ EnsureInsertPoint();
- const Expr *E = cast<Expr>(ExprResult);
- QualType ExprTy = E->getType();
- if (hasAggregateEvaluationKind(ExprTy)) {
- EmitAggExpr(E, AggSlot);
- } else {
- // We can't return an RValue here because there might be cleanups at
- // the end of the StmtExpr. Because of that, we have to emit the result
- // here into a temporary alloca.
- RetAlloca = CreateMemTemp(ExprTy);
- EmitAnyExprToMem(E, RetAlloca, Qualifiers(),
- /*IsInit*/ false);
- }
+ const Expr *E = cast<Expr>(LastStmt);
+ QualType ExprTy = E->getType();
+ if (hasAggregateEvaluationKind(ExprTy)) {
+ EmitAggExpr(E, AggSlot);
} else {
- EmitStmt(CurStmt);
+ // We can't return an RValue here because there might be cleanups at
+ // the end of the StmtExpr. Because of that, we have to emit the result
+ // here into a temporary alloca.
+ RetAlloca = CreateMemTemp(ExprTy);
+ EmitAnyExprToMem(E, RetAlloca, Qualifiers(),
+ /*IsInit*/ false);
}
}
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 8f095649f87c..06d7380b4e37 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -58,9 +58,10 @@ enum PGOHashVersion : unsigned {
PGO_HASH_V1,
PGO_HASH_V2,
PGO_HASH_V3,
+ PGO_HASH_V4,
// Keep this set to the latest hash version.
- PGO_HASH_LATEST = PGO_HASH_V3
+ PGO_HASH_LATEST = PGO_HASH_V4
};
namespace {
@@ -152,7 +153,9 @@ static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
return PGO_HASH_V1;
if (PGOReader->getVersion() <= 5)
return PGO_HASH_V2;
- return PGO_HASH_V3;
+ if (PGOReader->getVersion() <= 12)
+ return PGO_HASH_V3;
+ return PGO_HASH_V4;
}
/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
@@ -1099,6 +1102,8 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) {
assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
NumRegionCounters = Walker.NextCounter;
FunctionHash = Walker.Hash.finalize();
+ if (HashVersion >= PGO_HASH_V4)
+ FunctionHash &= llvm::NamedInstrProfRecord::FUNC_HASH_MASK;
}
bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 15fa78ddba71..d4b0b81d3d87 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -590,6 +590,7 @@ struct ARMVectorIntrinsicInfo {
Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
TypeModifier }
+// clang-format off
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
NEONMAP0(splat_lane_v),
@@ -1217,35 +1218,55 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP0(vcvth_bf16_f32),
+ NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
@@ -1446,6 +1467,7 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
};
+// clang-format on
// Some intrinsics are equivalent for codegen.
static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
@@ -7624,6 +7646,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
}
+ case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
+ return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
+ {llvm::FixedVectorType::get(HalfTy, 8),
+ llvm::FixedVectorType::get(Int8Ty, 16)},
+ Ops, E, "fmmla");
+ case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
+ return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla,
+ {llvm::FixedVectorType::get(FloatTy, 4),
+ llvm::FixedVectorType::get(Int8Ty, 16)},
+ Ops, E, "fmmla");
case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
ExtractLow = true;
[[fallthrough]];
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index dd14fcd72922..9bbb33cb1450 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -876,27 +876,28 @@ template <> struct MappingTraits<FormatStyle::TrailingCommentsAlignmentStyle> {
FormatStyle::TrailingCommentsAlignmentStyle &Value) {
IO.enumCase(Value, "Leave",
FormatStyle::TrailingCommentsAlignmentStyle(
- {FormatStyle::TCAS_Leave, 0}));
+ {FormatStyle::TCAS_Leave, 0, true}));
IO.enumCase(Value, "Always",
FormatStyle::TrailingCommentsAlignmentStyle(
- {FormatStyle::TCAS_Always, 0}));
+ {FormatStyle::TCAS_Always, 0, true}));
IO.enumCase(Value, "Never",
FormatStyle::TrailingCommentsAlignmentStyle(
- {FormatStyle::TCAS_Never, 0}));
+ {FormatStyle::TCAS_Never, 0, true}));
// For backwards compatibility
IO.enumCase(Value, "true",
FormatStyle::TrailingCommentsAlignmentStyle(
- {FormatStyle::TCAS_Always, 0}));
+ {FormatStyle::TCAS_Always, 0, true}));
IO.enumCase(Value, "false",
FormatStyle::TrailingCommentsAlignmentStyle(
- {FormatStyle::TCAS_Never, 0}));
+ {FormatStyle::TCAS_Never, 0, true}));
}
static void mapping(IO &IO,
FormatStyle::TrailingCommentsAlignmentStyle &Value) {
+ IO.mapOptional("AlignPPAndNotPP", Value.AlignPPAndNotPP);
IO.mapOptional("Kind", Value.Kind);
IO.mapOptional("OverEmptyLines", Value.OverEmptyLines);
}
@@ -1651,6 +1652,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
LLVMStyle.AlignTrailingComments = {};
LLVMStyle.AlignTrailingComments.Kind = FormatStyle::TCAS_Always;
LLVMStyle.AlignTrailingComments.OverEmptyLines = 0;
+ LLVMStyle.AlignTrailingComments.AlignPPAndNotPP = true;
LLVMStyle.AllowAllArgumentsOnNextLine = true;
LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
LLVMStyle.AllowBreakBeforeNoexceptSpecifier = FormatStyle::BBNSS_Never;
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index 406c77cb3ae8..fece3843c647 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -1007,9 +1007,13 @@ void WhitespaceManager::alignTrailingComments() {
return;
const int Size = Changes.size();
+ if (Size == 0)
+ return;
+
int MinColumn = 0;
int StartOfSequence = 0;
bool BreakBeforeNext = false;
+ bool IsInPP = Changes.front().Tok->Tok.is(tok::hash);
int NewLineThreshold = 1;
if (Style.AlignTrailingComments.Kind == FormatStyle::TCAS_Always)
NewLineThreshold = Style.AlignTrailingComments.OverEmptyLines + 1;
@@ -1018,7 +1022,19 @@ void WhitespaceManager::alignTrailingComments() {
auto &C = Changes[I];
if (C.StartOfBlockComment)
continue;
- Newlines += C.NewlinesBefore;
+ if (C.NewlinesBefore != 0) {
+ Newlines += C.NewlinesBefore;
+ const bool WasInPP = std::exchange(
+ IsInPP, C.Tok->Tok.is(tok::hash) || (IsInPP && C.IsTrailingComment) ||
+ C.ContinuesPPDirective);
+ if (IsInPP != WasInPP && !Style.AlignTrailingComments.AlignPPAndNotPP) {
+ alignTrailingComments(StartOfSequence, I, MinColumn);
+ MinColumn = 0;
+ MaxColumn = INT_MAX;
+ StartOfSequence = I;
+ Newlines = 0;
+ }
+ }
if (!C.IsTrailingComment)
continue;
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 6b09f7f9fc1e..8034ce9c3f22 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -1058,7 +1058,9 @@ void CompilerInstance::printDiagnosticStats() {
if (!getLangOpts().CUDAIsDevice) {
OS << " when compiling for host";
} else {
- OS << " when compiling for " << getTargetOpts().CPU;
+ OS << " when compiling for "
+ << (!getTargetOpts().CPU.empty() ? getTargetOpts().CPU
+ : getTarget().getTriple().str());
}
}
OS << ".\n";
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index aab1f2b61ab8..203b60007884 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -92,69 +92,65 @@ _kxor_mask64(__mmask64 __A, __mmask64 __B) {
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) {
return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) {
return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) {
return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) {
return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index fef1a2d64d53..29156e7e9643 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -59,55 +59,49 @@ _kxor_mask8(__mmask8 __A, __mmask8 __B) {
return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) {
return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) {
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) {
return (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) {
return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) {
return (unsigned char)__builtin_ia32_ktestchi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) {
return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 5fc0afa49ce4..997e9608e112 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8068,31 +8068,27 @@ _mm512_kor(__mmask16 __A, __mmask16 __B) {
return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
}
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm512_kortestc (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kortestc(__mmask16 __A, __mmask16 __B) {
return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
}
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm512_kortestz (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kortestz(__mmask16 __A, __mmask16 __B) {
return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B) {
return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B) {
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 208776eb7840..2e2703de18cb 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1074,78 +1074,6 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
float4 f16tof32(uint4);
//===----------------------------------------------------------------------===//
-// firstbithigh builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn T firstbithigh(T Val)
-/// \brief Returns the location of the first set bit starting from the highest
-/// order bit and working downward, per component.
-/// \param Val the input value.
-
-#ifdef __HLSL_ENABLE_16_BIT
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int16_t4);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint16_t4);
-#endif
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int64_t4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint64_t4);
-
-//===----------------------------------------------------------------------===//
// firstbitlow builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index c877234479ad..3d8fe7ea701a 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -148,6 +148,18 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) {
return exp2(Exp) * X;
}
+template <typename K, typename T, int BitWidth>
+constexpr K firstbithigh_impl(T X) {
+ K FBH = __builtin_hlsl_elementwise_firstbithigh(X);
+#if defined(__DIRECTX__)
+ // The firstbithigh DXIL ops count bits from the wrong side, so we need to
+ // invert it for DirectX.
+ K Inversion = (BitWidth - 1) - FBH;
+ FBH = select(FBH == -1, FBH, Inversion);
+#endif
+ return FBH;
+}
+
} // namespace __detail
} // namespace hlsl
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 5ba5bfb9abde..33ed14328ee8 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -262,6 +262,67 @@ faceforward(__detail::HLSL_FIXED_VECTOR<float, L> N,
}
//===----------------------------------------------------------------------===//
+// firstbithigh builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T firstbithigh(T Val)
+/// \brief Returns the location of the first set bit starting from the lowest
+/// order bit and working upward, per component.
+/// \param Val the input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+
+template <typename T>
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+const inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value ||
+ __detail::is_same<uint16_t, T>::value,
+ uint> firstbithigh(T X) {
+ return __detail::firstbithigh_impl<uint, T, 16>(X);
+}
+
+template <typename T, int N>
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+const
+ inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value ||
+ __detail::is_same<uint16_t, T>::value,
+ vector<uint, N>> firstbithigh(vector<T, N> X) {
+ return __detail::firstbithigh_impl<vector<uint, N>, vector<T, N>, 16>(X);
+}
+
+#endif
+
+template <typename T>
+const inline __detail::enable_if_t<
+ __detail::is_same<int, T>::value || __detail::is_same<uint, T>::value, uint>
+firstbithigh(T X) {
+ return __detail::firstbithigh_impl<uint, T, 32>(X);
+}
+
+template <typename T, int N>
+const inline __detail::enable_if_t<__detail::is_same<int, T>::value ||
+ __detail::is_same<uint, T>::value,
+ vector<uint, N>>
+firstbithigh(vector<T, N> X) {
+ return __detail::firstbithigh_impl<vector<uint, N>, vector<T, N>, 32>(X);
+}
+
+template <typename T>
+const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value ||
+ __detail::is_same<uint64_t, T>::value,
+ uint>
+firstbithigh(T X) {
+ return __detail::firstbithigh_impl<uint, T, 64>(X);
+}
+
+template <typename T, int N>
+const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value ||
+ __detail::is_same<uint64_t, T>::value,
+ vector<uint, N>>
+firstbithigh(vector<T, N> X) {
+ return __detail::firstbithigh_impl<vector<uint, N>, vector<T, N>, 64>(X);
+}
+
+//===----------------------------------------------------------------------===//
// fmod builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Headers/llvm_libc_wrappers/stdlib.h b/clang/lib/Headers/llvm_libc_wrappers/stdlib.h
index 1da22abd0bc4..d79e7fa041ad 100644
--- a/clang/lib/Headers/llvm_libc_wrappers/stdlib.h
+++ b/clang/lib/Headers/llvm_libc_wrappers/stdlib.h
@@ -34,13 +34,13 @@ _Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!");
_Static_assert(__builtin_offsetof(ldiv_t, quot) == 0, "ABI mismatch!");
_Static_assert(__builtin_offsetof(lldiv_t, quot) == 0, "ABI mismatch!");
-#if defined(__GLIBC__) && __cplusplus >= 201703L
+#if defined(__GLIBC__) && __cplusplus >= 201103L
#define at_quick_exit atexit
#endif
#include <llvm-libc-decls/stdlib.h>
-#if defined(__GLIBC__) && __cplusplus >= 201703L
+#if defined(__GLIBC__) && __cplusplus >= 201103L
#undef at_quick_exit
#endif
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index cde354c9cd8d..76338065d223 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -394,36 +394,48 @@ Interpreter::outOfProcessJITBuilder(JITConfig Config) {
llvm::Expected<std::string>
Interpreter::getOrcRuntimePath(const driver::ToolChain &TC) {
- std::optional<std::string> CompilerRTPath = TC.getCompilerRTPath();
- std::optional<std::string> ResourceDir = TC.getRuntimePath();
+ const std::array<const char *, 3> OrcRTLibNames = {
+ "liborc_rt.a", "liborc_rt_osx.a", "liborc_rt-x86_64.a"};
+
+ auto findInDir = [&](llvm::StringRef Base) -> std::optional<std::string> {
+ for (const char *LibName : OrcRTLibNames) {
+ llvm::SmallString<256> CandidatePath(Base);
+ llvm::sys::path::append(CandidatePath, LibName);
+ if (llvm::sys::fs::exists(CandidatePath))
+ return std::string(CandidatePath.str());
+ }
+ return std::nullopt;
+ };
+
+ std::string SearchedPaths;
- if (!CompilerRTPath) {
+ if (std::optional<std::string> CompilerRTPath = TC.getCompilerRTPath()) {
+ if (auto Found = findInDir(*CompilerRTPath))
+ return *Found;
+ SearchedPaths += *CompilerRTPath;
+ } else {
return llvm::make_error<llvm::StringError>("CompilerRT path not found",
std::error_code());
}
- const std::array<const char *, 3> OrcRTLibNames = {
- "liborc_rt.a", "liborc_rt_osx.a", "liborc_rt-x86_64.a"};
-
- for (const char *LibName : OrcRTLibNames) {
- llvm::SmallString<256> CandidatePath((*CompilerRTPath).c_str());
- llvm::sys::path::append(CandidatePath, LibName);
-
- if (llvm::sys::fs::exists(CandidatePath)) {
- return CandidatePath.str().str();
- }
+ if (std::optional<std::string> ResourceDir = TC.getRuntimePath()) {
+ if (auto Found = findInDir(*ResourceDir))
+ return *Found;
+ if (!SearchedPaths.empty())
+ SearchedPaths += "; ";
+ SearchedPaths += *ResourceDir;
+ } else {
+ return llvm::make_error<llvm::StringError>("ResourceDir path not found",
+ std::error_code());
}
return llvm::make_error<llvm::StringError>(
- llvm::Twine("OrcRuntime library not found in: ") + (*CompilerRTPath),
+ llvm::Twine("OrcRuntime library not found in: ") + SearchedPaths,
std::error_code());
}
llvm::Expected<std::unique_ptr<Interpreter>>
Interpreter::create(std::unique_ptr<CompilerInstance> CI, JITConfig Config) {
- llvm::Error Err = llvm::Error::success();
-
- std::unique_ptr<llvm::orc::LLJITBuilder> JB;
if (Config.IsOutOfProcess) {
const TargetInfo &TI = CI->getTarget();
@@ -453,6 +465,9 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI, JITConfig Config) {
}
}
+ llvm::Error Err = llvm::Error::success();
+ std::unique_ptr<llvm::orc::LLJITBuilder> JB;
+
auto Interp = std::unique_ptr<Interpreter>(new Interpreter(
std::move(CI), Err, std::move(JB), /*Consumer=*/nullptr, Config));
if (auto E = std::move(Err))
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 74f87a8cb63c..7a5d28caf852 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -772,9 +772,11 @@ bool Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
// Produce a diagnostic if we're not tentatively parsing; otherwise track
// that our parse has failed.
- auto Invalid = [&](llvm::function_ref<void()> Action) {
+ auto Result = [&](llvm::function_ref<void()> Action,
+ LambdaIntroducerTentativeParse State =
+ LambdaIntroducerTentativeParse::Invalid) {
if (Tentative) {
- *Tentative = LambdaIntroducerTentativeParse::Invalid;
+ *Tentative = State;
return false;
}
Action();
@@ -824,7 +826,7 @@ bool Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
break;
}
- return Invalid([&] {
+ return Result([&] {
Diag(Tok.getLocation(), diag::err_expected_comma_or_rsquare);
});
}
@@ -861,7 +863,7 @@ bool Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
ConsumeToken();
Kind = LCK_StarThis;
} else {
- return Invalid([&] {
+ return Result([&] {
Diag(Tok.getLocation(), diag::err_expected_star_this_capture);
});
}
@@ -875,8 +877,9 @@ bool Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
// or the start of a capture (in the "&" case) with the rest of the
// capture missing. Both are an error but a misplaced capture-default
// is more likely if we don't already have a capture default.
- return Invalid(
- [&] { Diag(Tok.getLocation(), diag::err_capture_default_first); });
+ return Result(
+ [&] { Diag(Tok.getLocation(), diag::err_capture_default_first); },
+ LambdaIntroducerTentativeParse::Incomplete);
} else {
TryConsumeToken(tok::ellipsis, EllipsisLocs[0]);
@@ -899,14 +902,13 @@ bool Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
Id = Tok.getIdentifierInfo();
Loc = ConsumeToken();
} else if (Tok.is(tok::kw_this)) {
- return Invalid([&] {
+ return Result([&] {
// FIXME: Suggest a fixit here.
Diag(Tok.getLocation(), diag::err_this_captured_by_reference);
});
} else {
- return Invalid([&] {
- Diag(Tok.getLocation(), diag::err_expected_capture);
- });
+ return Result(
+ [&] { Diag(Tok.getLocation(), diag::err_expected_capture); });
}
TryConsumeToken(tok::ellipsis, EllipsisLocs[2]);
diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp
index fb45db113934..7e73d89c2a18 100644
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp
@@ -1079,16 +1079,10 @@ bool Parser::ConsumeNullStmt(StmtVector &Stmts) {
StmtResult Parser::handleExprStmt(ExprResult E, ParsedStmtContext StmtCtx) {
bool IsStmtExprResult = false;
if ((StmtCtx & ParsedStmtContext::InStmtExpr) != ParsedStmtContext()) {
- // For GCC compatibility we skip past NullStmts.
- unsigned LookAhead = 0;
- while (GetLookAheadToken(LookAhead).is(tok::semi)) {
- ++LookAhead;
- }
- // Then look to see if the next two tokens close the statement expression;
- // if so, this expression statement is the last statement in a statement
- // expression.
- IsStmtExprResult = GetLookAheadToken(LookAhead).is(tok::r_brace) &&
- GetLookAheadToken(LookAhead + 1).is(tok::r_paren);
+ // Look ahead to see if the next two tokens close the statement expression;
+ // if so, this expression statement is the last statement in a
+ // statment expression.
+ IsStmtExprResult = Tok.is(tok::r_brace) && NextToken().is(tok::r_paren);
}
if (IsStmtExprResult)
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 23bf7f217a01..46addea232b0 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -321,9 +321,8 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
static_cast<unsigned>(ComparisonCategoryType::Last) + 1),
StdSourceLocationImplDecl(nullptr), CXXTypeInfoDecl(nullptr),
GlobalNewDeleteDeclared(false), DisableTypoCorrection(false),
- TyposCorrected(0), IsBuildingRecoveryCallExpr(false), NumSFINAEErrors(0),
- AccessCheckingSFINAE(false), CurrentInstantiationScope(nullptr),
- InNonInstantiationSFINAEContext(false), NonInstantiationEntries(0),
+ TyposCorrected(0), IsBuildingRecoveryCallExpr(false),
+ CurrentInstantiationScope(nullptr), NonInstantiationEntries(0),
ArgPackSubstIndex(std::nullopt), SatisfactionCache(Context) {
assert(pp.TUKind == TUKind);
TUScope = nullptr;
@@ -670,7 +669,9 @@ void Sema::addExternalSource(IntrusiveRefCntPtr<ExternalSemaSource> E) {
void Sema::PrintStats() const {
llvm::errs() << "\n*** Semantic Analysis Stats:\n";
- llvm::errs() << NumSFINAEErrors << " SFINAE diagnostics trapped.\n";
+ if (SFINAETrap *Trap = getSFINAEContext())
+ llvm::errs() << int(Trap->hasErrorOccurred())
+ << " SFINAE diagnostics trapped.\n";
BumpAlloc.PrintStats();
AnalysisWarnings.PrintStats();
@@ -1681,7 +1682,8 @@ void Sema::EmitDiagnostic(unsigned DiagID, const DiagnosticBuilder &DB) {
// issue I am not seeing yet), then there should at least be a clarifying
// comment somewhere.
Diagnostic DiagInfo(&Diags, DB);
- if (std::optional<TemplateDeductionInfo *> Info = isSFINAEContext()) {
+ if (SFINAETrap *Trap = getSFINAEContext()) {
+ sema::TemplateDeductionInfo *Info = Trap->getDeductionInfo();
switch (DiagnosticIDs::getDiagnosticSFINAEResponse(DiagInfo.getID())) {
case DiagnosticIDs::SFINAE_Report:
// We'll report the diagnostic below.
@@ -1690,37 +1692,37 @@ void Sema::EmitDiagnostic(unsigned DiagID, const DiagnosticBuilder &DB) {
case DiagnosticIDs::SFINAE_SubstitutionFailure:
// Count this failure so that we know that template argument deduction
// has failed.
- ++NumSFINAEErrors;
+ Trap->setErrorOccurred();
// Make a copy of this suppressed diagnostic and store it with the
// template-deduction information.
- if (*Info && !(*Info)->hasSFINAEDiagnostic()) {
- (*Info)->addSFINAEDiagnostic(DiagInfo.getLocation(),
- PartialDiagnostic(DiagInfo, Context.getDiagAllocator()));
- }
+ if (Info && !Info->hasSFINAEDiagnostic())
+ Info->addSFINAEDiagnostic(
+ DiagInfo.getLocation(),
+ PartialDiagnostic(DiagInfo, Context.getDiagAllocator()));
Diags.setLastDiagnosticIgnored(true);
return;
case DiagnosticIDs::SFINAE_AccessControl: {
// Per C++ Core Issue 1170, access control is part of SFINAE.
- // Additionally, the AccessCheckingSFINAE flag can be used to temporarily
+ // Additionally, the WithAccessChecking flag can be used to temporarily
// make access control a part of SFINAE for the purposes of checking
// type traits.
- if (!AccessCheckingSFINAE && !getLangOpts().CPlusPlus11)
+ if (!Trap->withAccessChecking() && !getLangOpts().CPlusPlus11)
break;
SourceLocation Loc = DiagInfo.getLocation();
// Suppress this diagnostic.
- ++NumSFINAEErrors;
+ Trap->setErrorOccurred();
// Make a copy of this suppressed diagnostic and store it with the
// template-deduction information.
- if (*Info && !(*Info)->hasSFINAEDiagnostic()) {
- (*Info)->addSFINAEDiagnostic(DiagInfo.getLocation(),
- PartialDiagnostic(DiagInfo, Context.getDiagAllocator()));
- }
+ if (Info && !Info->hasSFINAEDiagnostic())
+ Info->addSFINAEDiagnostic(
+ DiagInfo.getLocation(),
+ PartialDiagnostic(DiagInfo, Context.getDiagAllocator()));
Diags.setLastDiagnosticIgnored(true);
@@ -1740,13 +1742,13 @@ void Sema::EmitDiagnostic(unsigned DiagID, const DiagnosticBuilder &DB) {
return;
// Make a copy of this suppressed diagnostic and store it with the
// template-deduction information;
- if (*Info) {
- (*Info)->addSuppressedDiagnostic(
+ if (Info) {
+ Info->addSuppressedDiagnostic(
DiagInfo.getLocation(),
PartialDiagnostic(DiagInfo, Context.getDiagAllocator()));
if (!Diags.getDiagnosticIDs()->isNote(DiagID))
PrintContextStack([Info](SourceLocation Loc, PartialDiagnostic PD) {
- (*Info)->addSuppressedDiagnostic(Loc, std::move(PD));
+ Info->addSuppressedDiagnostic(Loc, std::move(PD));
});
}
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 139c4abc040d..cece22092bb1 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -558,6 +558,8 @@ AMDGPUMaxNumWorkGroupsAttr *SemaAMDGPU::CreateAMDGPUMaxNumWorkGroupsAttr(
const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr) {
ASTContext &Context = getASTContext();
AMDGPUMaxNumWorkGroupsAttr TmpAttr(Context, CI, XExpr, YExpr, ZExpr);
+ assert(!SemaRef.isSFINAEContext() &&
+ "Can't produce SFINAE diagnostic pointing to temporary attribute");
if (checkAMDGPUMaxNumWorkGroupsArguments(SemaRef, XExpr, YExpr, ZExpr,
TmpAttr))
diff --git a/clang/lib/Sema/SemaBoundsSafety.cpp b/clang/lib/Sema/SemaBoundsSafety.cpp
index 39ab13653f5f..de9adf8ef5a1 100644
--- a/clang/lib/Sema/SemaBoundsSafety.cpp
+++ b/clang/lib/Sema/SemaBoundsSafety.cpp
@@ -132,9 +132,23 @@ bool Sema::CheckCountedByAttrOnField(FieldDecl *FD, Expr *E, bool CountInBytes,
// `BoundsSafetyCheckUseOfCountAttrPtr`
//
// * When the pointee type is always an incomplete type (e.g.
- // `void`) the attribute is disallowed by this method because we know the
- // type can never be completed so there's no reason to allow it.
- InvalidTypeKind = CountedByInvalidPointeeTypeKind::INCOMPLETE;
+ // `void` in strict C mode) the attribute is disallowed by this method
+ // because we know the type can never be completed so there's no reason
+ // to allow it.
+ //
+ // Exception: void has an implicit size of 1 byte for pointer arithmetic
+ // (following GNU convention). Therefore, counted_by on void* is allowed
+ // and behaves equivalently to sized_by (treating the count as bytes).
+ bool IsVoidPtr = PointeeTy->isVoidType();
+ if (IsVoidPtr) {
+ // Emit a warning that this is a GNU extension.
+ Diag(FD->getBeginLoc(), diag::ext_gnu_counted_by_void_ptr) << Kind;
+ Diag(FD->getBeginLoc(), diag::note_gnu_counted_by_void_ptr_use_sized_by)
+ << Kind;
+ assert(InvalidTypeKind == CountedByInvalidPointeeTypeKind::VALID);
+ } else {
+ InvalidTypeKind = CountedByInvalidPointeeTypeKind::INCOMPLETE;
+ }
} else if (PointeeTy->isSizelessType()) {
InvalidTypeKind = CountedByInvalidPointeeTypeKind::SIZELESS;
} else if (PointeeTy->isFunctionType()) {
@@ -272,6 +286,9 @@ GetCountedByAttrOnIncompletePointee(QualType Ty, NamedDecl **ND) {
if (!PointeeTy->isIncompleteType(ND))
return {};
+ if (PointeeTy->isVoidType())
+ return {};
+
return {CATy, PointeeTy};
}
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index fb4d0b458268..883e3410a35e 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -526,12 +526,12 @@ ExprResult ConstraintSatisfactionChecker::EvaluateAtomicConstraint(
S, AtomicExpr->getBeginLoc(),
Sema::InstantiatingTemplate::ConstraintSubstitution{},
// FIXME: improve const-correctness of InstantiatingTemplate
- const_cast<NamedDecl *>(Template), Info, AtomicExpr->getSourceRange());
+ const_cast<NamedDecl *>(Template), AtomicExpr->getSourceRange());
if (Inst.isInvalid())
return ExprError();
// We do not want error diagnostics escaping here.
- Sema::SFINAETrap Trap(S);
+ Sema::SFINAETrap Trap(S, Info);
SubstitutedExpression =
S.SubstConstraintExpr(const_cast<Expr *>(AtomicExpr), MLTAL);
@@ -599,16 +599,15 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments(
return MultiLevelTemplateArgumentList();
TemplateDeductionInfo Info(Constraint.getBeginLoc());
+ Sema::SFINAETrap Trap(S, Info);
Sema::InstantiatingTemplate Inst(
S, Constraint.getBeginLoc(),
Sema::InstantiatingTemplate::ConstraintSubstitution{},
// FIXME: improve const-correctness of InstantiatingTemplate
- const_cast<NamedDecl *>(Template), Info, Constraint.getSourceRange());
+ const_cast<NamedDecl *>(Template), Constraint.getSourceRange());
if (Inst.isInvalid())
return std::nullopt;
- Sema::SFINAETrap Trap(S);
-
TemplateArgumentListInfo SubstArgs;
Sema::ArgPackSubstIndexRAII SubstIndex(
S, Constraint.getPackSubstitutionIndex()
@@ -778,9 +777,6 @@ ConstraintSatisfactionChecker::EvaluateFoldExpandedConstraintSize(
const FoldExpandedConstraint &FE,
const MultiLevelTemplateArgumentList &MLTAL) {
- // We should ignore errors in the presence of packs of different size.
- Sema::SFINAETrap Trap(S);
-
Expr *Pattern = const_cast<Expr *>(FE.getPattern());
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
@@ -792,18 +788,12 @@ ConstraintSatisfactionChecker::EvaluateFoldExpandedConstraintSize(
if (S.CheckParameterPacksForExpansion(
Pattern->getExprLoc(), Pattern->getSourceRange(), Unexpanded, MLTAL,
/*FailOnPackProducingTemplates=*/false, Expand, RetainExpansion,
- NumExpansions) ||
+ NumExpansions, /*Diagnose=*/false) ||
!Expand || RetainExpansion)
return std::nullopt;
- if (NumExpansions && S.getLangOpts().BracketDepth < *NumExpansions) {
- S.Diag(Pattern->getExprLoc(),
- clang::diag::err_fold_expression_limit_exceeded)
- << *NumExpansions << S.getLangOpts().BracketDepth
- << Pattern->getSourceRange();
- S.Diag(Pattern->getExprLoc(), diag::note_bracket_depth);
+ if (NumExpansions && S.getLangOpts().BracketDepth < *NumExpansions)
return std::nullopt;
- }
return NumExpansions;
}
@@ -921,7 +911,6 @@ ExprResult ConstraintSatisfactionChecker::EvaluateSlow(
return ExprError();
}
- Sema::SFINAETrap Trap(S);
Sema::ArgPackSubstIndexRAII SubstIndex(
S, Constraint.getPackSubstitutionIndex()
? Constraint.getPackSubstitutionIndex()
@@ -930,9 +919,10 @@ ExprResult ConstraintSatisfactionChecker::EvaluateSlow(
const ASTTemplateArgumentListInfo *Ori =
ConceptId->getTemplateArgsAsWritten();
TemplateDeductionInfo Info(TemplateNameLoc);
- Sema::InstantiatingTemplate _(
+ Sema::SFINAETrap Trap(S, Info);
+ Sema::InstantiatingTemplate _2(
S, TemplateNameLoc, Sema::InstantiatingTemplate::ConstraintSubstitution{},
- const_cast<NamedDecl *>(Template), Info, Constraint.getSourceRange());
+ const_cast<NamedDecl *>(Template), Constraint.getSourceRange());
TemplateArgumentListInfo OutArgs(Ori->LAngleLoc, Ori->RAngleLoc);
if (S.SubstTemplateArguments(Ori->arguments(), *SubstitutedArgs, OutArgs) ||
@@ -1142,13 +1132,21 @@ static bool CheckConstraintSatisfaction(
if (TemplateArgsLists.getNumLevels() != 0)
Args = TemplateArgsLists.getInnermost();
- std::optional<Sema::InstantiatingTemplate> SynthesisContext;
- if (!TopLevelConceptId) {
- SynthesisContext.emplace(S, TemplateIDRange.getBegin(),
- Sema::InstantiatingTemplate::ConstraintsCheck{},
- const_cast<NamedDecl *>(Template), Args,
+ struct SynthesisContextPair {
+ Sema::InstantiatingTemplate Inst;
+ Sema::NonSFINAEContext NSC;
+ SynthesisContextPair(Sema &S, NamedDecl *Template,
+ ArrayRef<TemplateArgument> TemplateArgs,
+ SourceRange InstantiationRange)
+ : Inst(S, InstantiationRange.getBegin(),
+ Sema::InstantiatingTemplate::ConstraintsCheck{}, Template,
+ TemplateArgs, InstantiationRange),
+ NSC(S) {}
+ };
+ std::optional<SynthesisContextPair> SynthesisContext;
+ if (!TopLevelConceptId)
+ SynthesisContext.emplace(S, const_cast<NamedDecl *>(Template), Args,
TemplateIDRange);
- }
const NormalizedConstraint *C =
S.getNormalizedAssociatedConstraints(Template, AssociatedConstraints);
@@ -1478,8 +1476,7 @@ static const Expr *SubstituteConstraintExpressionWithoutSatisfaction(
if (MLTAL.getNumSubstitutedLevels() == 0)
return ConstrExpr;
- Sema::SFINAETrap SFINAE(S);
-
+ Sema::NonSFINAEContext _(S);
Sema::InstantiatingTemplate Inst(
S, DeclInfo.getLocation(),
Sema::InstantiatingTemplate::ConstraintNormalization{},
@@ -1554,7 +1551,7 @@ static const Expr *SubstituteConstraintExpressionWithoutSatisfaction(
Sema::ReuseLambdaContextDecl);
ExprResult SubstConstr = S.SubstConstraintExprWithoutSatisfaction(
const_cast<clang::Expr *>(ConstrExpr), MLTAL);
- if (SFINAE.hasErrorOccurred() || !SubstConstr.isUsable())
+ if (!SubstConstr.isUsable())
return nullptr;
return SubstConstr.get();
}
@@ -2104,6 +2101,7 @@ bool SubstituteParameterMappings::substitute(
InstLocBegin = SR.getBegin();
InstLocEnd = SR.getEnd();
}
+ Sema::NonSFINAEContext _(SemaRef);
Sema::InstantiatingTemplate Inst(
SemaRef, InstLocBegin,
Sema::InstantiatingTemplate::ParameterMappingSubstitution{},
@@ -2171,6 +2169,7 @@ bool SubstituteParameterMappings::substitute(ConceptIdConstraint &CC) {
InstLocBegin = SR.getBegin();
InstLocEnd = SR.getEnd();
}
+ Sema::NonSFINAEContext _(SemaRef);
// This is useful for name lookup across modules; see Sema::getLookupModules.
Sema::InstantiatingTemplate Inst(
SemaRef, InstLocBegin,
@@ -2311,6 +2310,7 @@ NormalizedConstraint *NormalizedConstraint::fromConstraintExpr(
} else if (auto *CSE = dyn_cast<const ConceptSpecializationExpr>(E)) {
NormalizedConstraint *SubNF;
{
+ Sema::NonSFINAEContext _(S);
Sema::InstantiatingTemplate Inst(
S, CSE->getExprLoc(),
Sema::InstantiatingTemplate::ConstraintNormalization{},
@@ -2546,8 +2546,6 @@ bool Sema::MaybeEmitAmbiguousAtomicConstraintsDiagnostic(
};
{
- // The subsumption checks might cause diagnostics
- SFINAETrap Trap(*this);
auto *Normalized1 = getNormalizedAssociatedConstraints(D1, AC1);
if (!Normalized1)
return false;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index fc3aabf5741c..086dd8ba1c67 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -8492,12 +8492,11 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl,
DeclContext *NewDC = D->getDeclContext();
if (FieldDecl *FD = dyn_cast<FieldDecl>(ShadowedDecl)) {
- if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(NewDC)) {
- // Fields are not shadowed by variables in C++ static methods.
- if (MD->isStatic())
- return;
-
- if (!MD->getParent()->isLambda() && MD->isExplicitObjectMemberFunction())
+ if (const auto *MD =
+ dyn_cast<CXXMethodDecl>(getFunctionLevelDeclContext())) {
+ // Fields aren't shadowed in C++ static members or in member functions
+ // with an explicit object parameter.
+ if (MD->isStatic() || MD->isExplicitObjectMemberFunction())
return;
}
// Fields shadowed by constructor parameters are a special case. Usually
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index a50c27610dc9..2159a0dc2a5d 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -12653,10 +12653,10 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS,
// This is a gcc extension compatibility comparison.
// In a SFINAE context, we treat this as a hard error to maintain
// conformance with the C++ standard.
- diagnoseFunctionPointerToVoidComparison(
- *this, Loc, LHS, RHS, /*isError*/ (bool)isSFINAEContext());
+ bool IsError = isSFINAEContext();
+ diagnoseFunctionPointerToVoidComparison(*this, Loc, LHS, RHS, IsError);
- if (isSFINAEContext())
+ if (IsError)
return QualType();
RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
@@ -14598,11 +14598,11 @@ QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) {
unsigned AddressOfError = AO_No_Error;
if (lval == Expr::LV_ClassTemporary || lval == Expr::LV_ArrayTemporary) {
- bool sfinae = (bool)isSFINAEContext();
- Diag(OpLoc, isSFINAEContext() ? diag::err_typecheck_addrof_temporary
- : diag::ext_typecheck_addrof_temporary)
- << op->getType() << op->getSourceRange();
- if (sfinae)
+ bool IsError = isSFINAEContext();
+ Diag(OpLoc, IsError ? diag::err_typecheck_addrof_temporary
+ : diag::ext_typecheck_addrof_temporary)
+ << op->getType() << op->getSourceRange();
+ if (IsError)
return QualType();
// Materialize the temporary as an lvalue so that we can take its address.
OrigOp = op =
@@ -16185,9 +16185,7 @@ ExprResult Sema::BuildStmtExpr(SourceLocation LPLoc, Stmt *SubStmt,
QualType Ty = Context.VoidTy;
bool StmtExprMayBindToTemp = false;
if (!Compound->body_empty()) {
- // For GCC compatibility we get the last Stmt excluding trailing NullStmts.
- if (const auto *LastStmt =
- dyn_cast<ValueStmt>(Compound->getStmtExprResult())) {
+ if (const auto *LastStmt = dyn_cast<ValueStmt>(Compound->body_back())) {
if (const Expr *Value = LastStmt->getExprStmt()) {
StmtExprMayBindToTemp = true;
Ty = Value->getType();
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 983a7842ef45..4a9e1bc93b91 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -3846,13 +3846,14 @@ QualType Sema::CheckTemplateIdType(ElaboratedTypeKeyword Keyword,
// within enable_if in a SFINAE context, dig out the specific
// enable_if condition that failed and present that instead.
if (isEnableIfAliasTemplate(AliasTemplate)) {
- if (auto DeductionInfo = isSFINAEContext()) {
- if (*DeductionInfo &&
- (*DeductionInfo)->hasSFINAEDiagnostic() &&
- (*DeductionInfo)->peekSFINAEDiagnostic().second.getDiagID() ==
- diag::err_typename_nested_not_found_enable_if &&
- TemplateArgs[0].getArgument().getKind()
- == TemplateArgument::Expression) {
+ if (SFINAETrap *Trap = getSFINAEContext();
+ TemplateDeductionInfo *DeductionInfo =
+ Trap ? Trap->getDeductionInfo() : nullptr) {
+ if (DeductionInfo->hasSFINAEDiagnostic() &&
+ DeductionInfo->peekSFINAEDiagnostic().second.getDiagID() ==
+ diag::err_typename_nested_not_found_enable_if &&
+ TemplateArgs[0].getArgument().getKind() ==
+ TemplateArgument::Expression) {
Expr *FailedCond;
std::string FailedDescription;
std::tie(FailedCond, FailedDescription) =
@@ -3861,15 +3862,14 @@ QualType Sema::CheckTemplateIdType(ElaboratedTypeKeyword Keyword,
// Remove the old SFINAE diagnostic.
PartialDiagnosticAt OldDiag =
{SourceLocation(), PartialDiagnostic::NullDiagnostic()};
- (*DeductionInfo)->takeSFINAEDiagnostic(OldDiag);
+ DeductionInfo->takeSFINAEDiagnostic(OldDiag);
// Add a new SFINAE diagnostic specifying which condition
// failed.
- (*DeductionInfo)->addSFINAEDiagnostic(
- OldDiag.first,
- PDiag(diag::err_typename_nested_not_found_requirement)
- << FailedDescription
- << FailedCond->getSourceRange());
+ DeductionInfo->addSFINAEDiagnostic(
+ OldDiag.first,
+ PDiag(diag::err_typename_nested_not_found_requirement)
+ << FailedDescription << FailedCond->getSourceRange());
}
}
}
@@ -3955,6 +3955,7 @@ QualType Sema::CheckTemplateIdType(ElaboratedTypeKeyword Keyword,
if (Decl->getSpecializationKind() == TSK_Undeclared &&
ClassTemplate->getTemplatedDecl()->hasAttrs()) {
+ NonSFINAEContext _(*this);
InstantiatingTemplate Inst(*this, TemplateLoc, Decl);
if (!Inst.isInvalid()) {
MultiLevelTemplateArgumentList TemplateArgLists(Template,
@@ -5565,12 +5566,11 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param, TemplateArgumentLoc &ArgLoc,
auto checkExpr = [&](Expr *E) -> Expr * {
TemplateArgument SugaredResult, CanonicalResult;
- unsigned CurSFINAEErrors = NumSFINAEErrors;
ExprResult Res = CheckTemplateArgument(
NTTP, NTTPType, E, SugaredResult, CanonicalResult,
/*StrictCheck=*/CTAI.MatchingTTP || CTAI.PartialOrdering, CTAK);
// If the current template argument causes an error, give up now.
- if (Res.isInvalid() || CurSFINAEErrors < NumSFINAEErrors)
+ if (Res.isInvalid())
return nullptr;
CTAI.SugaredConverted.push_back(SugaredResult);
CTAI.CanonicalConverted.push_back(CanonicalResult);
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 6964242b39d6..a287319cc4f8 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -3239,10 +3239,6 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction(
ArrayRef<TemplateArgumentLoc> Ps, ArrayRef<TemplateArgument> As,
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
TemplateDeductionInfo &Info, bool CopyDeducedArgs) {
- // Unevaluated SFINAE context.
- EnterExpressionEvaluationContext Unevaluated(
- S, Sema::ExpressionEvaluationContext::Unevaluated);
-
Sema::ContextRAII SavedContext(S, getAsDeclContextOrEnclosing(Entity));
// C++ [temp.deduct.type]p2:
@@ -3380,10 +3376,6 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction(
Sema &S, TemplateDecl *TD,
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
TemplateDeductionInfo &Info) {
- // Unevaluated SFINAE context.
- EnterExpressionEvaluationContext Unevaluated(
- S, Sema::ExpressionEvaluationContext::Unevaluated);
-
Sema::ContextRAII SavedContext(S, getAsDeclContextOrEnclosing(TD));
// C++ [temp.deduct.type]p2:
@@ -3423,7 +3415,7 @@ DeduceTemplateArguments(Sema &S, T *Partial,
// Unevaluated SFINAE context.
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::Unevaluated);
- Sema::SFINAETrap Trap(S);
+ Sema::SFINAETrap Trap(S, Info);
// This deduction has no relation to any outer instantiation we might be
// performing.
@@ -3441,8 +3433,7 @@ DeduceTemplateArguments(Sema &S, T *Partial,
return Result;
SmallVector<TemplateArgument, 4> DeducedArgs(Deduced.begin(), Deduced.end());
- Sema::InstantiatingTemplate Inst(S, Info.getLocation(), Partial, DeducedArgs,
- Info);
+ Sema::InstantiatingTemplate Inst(S, Info.getLocation(), Partial, DeducedArgs);
if (Inst.isInvalid())
return TemplateDeductionResult::InstantiationDepth;
@@ -3497,7 +3488,7 @@ Sema::DeduceTemplateArgumentsFromType(TemplateDecl *TD, QualType FromType,
// Unevaluated SFINAE context.
EnterExpressionEvaluationContext Unevaluated(
*this, Sema::ExpressionEvaluationContext::Unevaluated);
- SFINAETrap Trap(*this);
+ SFINAETrap Trap(*this, Info);
// This deduction has no relation to any outer instantiation we might be
// performing.
@@ -3514,7 +3505,7 @@ Sema::DeduceTemplateArgumentsFromType(TemplateDecl *TD, QualType FromType,
}
SmallVector<TemplateArgument, 4> DeducedArgs(Deduced.begin(), Deduced.end());
- InstantiatingTemplate Inst(*this, Info.getLocation(), TD, DeducedArgs, Info);
+ InstantiatingTemplate Inst(*this, Info.getLocation(), TD, DeducedArgs);
if (Inst.isInvalid())
return TemplateDeductionResult::InstantiationDepth;
@@ -3558,6 +3549,9 @@ TemplateDeductionResult Sema::SubstituteExplicitTemplateArguments(
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
SmallVectorImpl<QualType> &ParamTypes, QualType *FunctionType,
TemplateDeductionInfo &Info) {
+ assert(isSFINAEContext());
+ assert(isUnevaluatedContext());
+
FunctionDecl *Function = FunctionTemplate->getTemplatedDecl();
TemplateParameterList *TemplateParams
= FunctionTemplate->getTemplateParameters();
@@ -3573,11 +3567,6 @@ TemplateDeductionResult Sema::SubstituteExplicitTemplateArguments(
return TemplateDeductionResult::Success;
}
- // Unevaluated SFINAE context.
- EnterExpressionEvaluationContext Unevaluated(
- *this, Sema::ExpressionEvaluationContext::Unevaluated);
- SFINAETrap Trap(*this);
-
// C++ [temp.arg.explicit]p3:
// Template arguments that are present shall be specified in the
// declaration order of their corresponding template-parameters. The
@@ -3590,7 +3579,7 @@ TemplateDeductionResult Sema::SubstituteExplicitTemplateArguments(
SmallVector<TemplateArgument, 4> DeducedArgs;
InstantiatingTemplate Inst(
*this, Info.getLocation(), FunctionTemplate, DeducedArgs,
- CodeSynthesisContext::ExplicitTemplateArgumentSubstitution, Info);
+ CodeSynthesisContext::ExplicitTemplateArgumentSubstitution);
if (Inst.isInvalid())
return TemplateDeductionResult::InstantiationDepth;
@@ -3598,8 +3587,7 @@ TemplateDeductionResult Sema::SubstituteExplicitTemplateArguments(
if (CheckTemplateArgumentList(FunctionTemplate, SourceLocation(),
ExplicitTemplateArgs, /*DefaultArgs=*/{},
/*PartialTemplateArgs=*/true, CTAI,
- /*UpdateArgsWithConversions=*/false) ||
- Trap.hasErrorOccurred()) {
+ /*UpdateArgsWithConversions=*/false)) {
unsigned Index = CTAI.SugaredConverted.size();
if (Index >= TemplateParams->size())
return TemplateDeductionResult::SubstitutionFailure;
@@ -3688,7 +3676,7 @@ TemplateDeductionResult Sema::SubstituteExplicitTemplateArguments(
ResultType =
SubstType(Proto->getReturnType(), MLTAL,
Function->getTypeSpecStartLoc(), Function->getDeclName());
- if (ResultType.isNull() || Trap.hasErrorOccurred())
+ if (ResultType.isNull())
return TemplateDeductionResult::SubstitutionFailure;
// CUDA: Kernel function must have 'void' return type.
if (getLangOpts().CUDA)
@@ -3714,7 +3702,7 @@ TemplateDeductionResult Sema::SubstituteExplicitTemplateArguments(
Function->getLocation(),
Function->getDeclName(),
EPI);
- if (FunctionType->isNull() || Trap.hasErrorOccurred())
+ if (FunctionType->isNull())
return TemplateDeductionResult::SubstitutionFailure;
}
@@ -3912,12 +3900,15 @@ static TemplateDeductionResult instantiateExplicitSpecifierDeferred(
if (!ExplicitExpr->isValueDependent())
return TemplateDeductionResult::Success;
+ // By this point, FinishTemplateArgumentDeduction will have been reverted back
+ // to a regular non-SFINAE template instantiation context, so setup a new
+ // SFINAE context.
Sema::InstantiatingTemplate Inst(
S, Info.getLocation(), FunctionTemplate, DeducedArgs,
- Sema::CodeSynthesisContext::DeducedTemplateArgumentSubstitution, Info);
+ Sema::CodeSynthesisContext::DeducedTemplateArgumentSubstitution);
if (Inst.isInvalid())
return TemplateDeductionResult::InstantiationDepth;
- Sema::SFINAETrap Trap(S);
+ Sema::SFINAETrap Trap(S, Info);
const ExplicitSpecifier InstantiatedES =
S.instantiateExplicitSpecifier(SubstArgs, ES);
if (InstantiatedES.isInvalid() || Trap.hasErrorOccurred()) {
@@ -3937,17 +3928,12 @@ TemplateDeductionResult Sema::FinishTemplateArgumentDeduction(
bool PartialOverloading, bool PartialOrdering,
bool ForOverloadSetAddressResolution,
llvm::function_ref<bool(bool)> CheckNonDependent) {
- // Unevaluated SFINAE context.
- EnterExpressionEvaluationContext Unevaluated(
- *this, Sema::ExpressionEvaluationContext::Unevaluated);
- SFINAETrap Trap(*this);
-
// Enter a new template instantiation context while we instantiate the
// actual function declaration.
SmallVector<TemplateArgument, 4> DeducedArgs(Deduced.begin(), Deduced.end());
InstantiatingTemplate Inst(
*this, Info.getLocation(), FunctionTemplate, DeducedArgs,
- CodeSynthesisContext::DeducedTemplateArgumentSubstitution, Info);
+ CodeSynthesisContext::DeducedTemplateArgumentSubstitution);
if (Inst.isInvalid())
return TemplateDeductionResult::InstantiationDepth;
@@ -4030,18 +4016,9 @@ TemplateDeductionResult Sema::FinishTemplateArgumentDeduction(
// If the template argument list is owned by the function template
// specialization, release it.
if (Specialization->getTemplateSpecializationArgs() ==
- CanonicalDeducedArgumentList &&
- !Trap.hasErrorOccurred())
+ CanonicalDeducedArgumentList)
Info.takeCanonical();
- // There may have been an error that did not prevent us from constructing a
- // declaration. Mark the declaration invalid and return with a substitution
- // failure.
- if (Trap.hasErrorOccurred()) {
- Specialization->setInvalidDecl(true);
- return TemplateDeductionResult::SubstitutionFailure;
- }
-
// C++2a [temp.deduct]p5
// [...] When all template arguments have been deduced [...] all uses of
// template parameters [...] are replaced with the corresponding deduced
@@ -4553,6 +4530,10 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
return TemplateDeductionResult::TooManyArguments;
}
+ EnterExpressionEvaluationContext Unevaluated(
+ *this, Sema::ExpressionEvaluationContext::Unevaluated);
+ Sema::SFINAETrap Trap(*this, Info);
+
// The types of the parameters from which we will perform template argument
// deduction.
LocalInstantiationScope InstScope(*this);
@@ -4570,6 +4551,8 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
});
if (Result != TemplateDeductionResult::Success)
return Result;
+ if (Trap.hasErrorOccurred())
+ return TemplateDeductionResult::SubstitutionFailure;
NumExplicitlySpecified = Deduced.size();
} else {
@@ -4743,6 +4726,11 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
OnlyInitializeNonUserDefinedConversions);
});
});
+ if (Trap.hasErrorOccurred()) {
+ if (Specialization)
+ Specialization->setInvalidDecl(true);
+ return TemplateDeductionResult::SubstitutionFailure;
+ }
return Result;
}
@@ -4795,6 +4783,14 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
= FunctionTemplate->getTemplateParameters();
QualType FunctionType = Function->getType();
+ bool PotentiallyEvaluated =
+ currentEvaluationContext().isPotentiallyEvaluated();
+
+ // Unevaluated SFINAE context.
+ EnterExpressionEvaluationContext Unevaluated(
+ *this, Sema::ExpressionEvaluationContext::Unevaluated);
+ SFINAETrap Trap(*this, Info);
+
// Substitute any explicit template arguments.
LocalInstantiationScope InstScope(*this);
SmallVector<DeducedTemplateArgument, 4> Deduced;
@@ -4809,6 +4805,8 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
});
if (Result != TemplateDeductionResult::Success)
return Result;
+ if (Trap.hasErrorOccurred())
+ return TemplateDeductionResult::SubstitutionFailure;
NumExplicitlySpecified = Deduced.size();
}
@@ -4820,11 +4818,6 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
ArgFunctionType = adjustCCAndNoReturn(ArgFunctionType, FunctionType,
/*AdjustExceptionSpec*/false);
- // Unevaluated SFINAE context.
- std::optional<EnterExpressionEvaluationContext> Unevaluated(
- std::in_place, *this, Sema::ExpressionEvaluationContext::Unevaluated);
- SFINAETrap Trap(*this);
-
Deduced.resize(TemplateParams->size());
// If the function has a deduced return type, substitute it for a dependent
@@ -4865,14 +4858,12 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
DeduceReturnType(Specialization, Info.getLocation(), false))
return TemplateDeductionResult::MiscellaneousDeductionFailure;
- Unevaluated = std::nullopt;
// [C++26][expr.const]/p17
// An expression or conversion is immediate-escalating if it is not initially
// in an immediate function context and it is [...]
// a potentially-evaluated id-expression that denotes an immediate function.
if (IsAddressOfFunction && getLangOpts().CPlusPlus20 &&
- Specialization->isImmediateEscalating() &&
- currentEvaluationContext().isPotentiallyEvaluated() &&
+ Specialization->isImmediateEscalating() && PotentiallyEvaluated &&
CheckIfFunctionSpecializationIsImmediate(Specialization,
Info.getLocation()))
return TemplateDeductionResult::MiscellaneousDeductionFailure;
@@ -4975,7 +4966,7 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
// Unevaluated SFINAE context.
EnterExpressionEvaluationContext Unevaluated(
*this, Sema::ExpressionEvaluationContext::Unevaluated);
- SFINAETrap Trap(*this);
+ SFINAETrap Trap(*this, Info);
// C++ [temp.deduct.conv]p1:
// Template argument deduction is done by comparing the return
@@ -5614,10 +5605,6 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction(
Sema &S, FunctionTemplateDecl *FTD,
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
TemplateDeductionInfo &Info, T &&CheckDeductionConsistency) {
- EnterExpressionEvaluationContext Unevaluated(
- S, Sema::ExpressionEvaluationContext::Unevaluated);
- Sema::SFINAETrap Trap(S);
-
Sema::ContextRAII SavedContext(S, getAsDeclContextOrEnclosing(FTD));
// C++26 [temp.deduct.type]p2:
@@ -5645,13 +5632,7 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction(
// and verify that the instantiated argument is both valid
// and equivalent to the parameter.
LocalInstantiationScope InstScope(S);
-
- if (auto TDR = CheckDeductionConsistency(S, FTD, CTAI.SugaredConverted);
- TDR != TemplateDeductionResult::Success)
- return TDR;
-
- return Trap.hasErrorOccurred() ? TemplateDeductionResult::SubstitutionFailure
- : TemplateDeductionResult::Success;
+ return CheckDeductionConsistency(S, FTD, CTAI.SugaredConverted);
}
/// Determine whether the function template \p FT1 is at least as
@@ -5717,9 +5698,12 @@ static bool isAtLeastAsSpecializedAs(
}
SmallVector<TemplateArgument, 4> DeducedArgs(Deduced.begin(), Deduced.end());
+ EnterExpressionEvaluationContext Unevaluated(
+ S, Sema::ExpressionEvaluationContext::Unevaluated);
+ Sema::SFINAETrap Trap(S, Info);
Sema::InstantiatingTemplate Inst(
S, Info.getLocation(), FT2, DeducedArgs,
- Sema::CodeSynthesisContext::DeducedTemplateArgumentSubstitution, Info);
+ Sema::CodeSynthesisContext::DeducedTemplateArgumentSubstitution);
if (Inst.isInvalid())
return false;
@@ -5765,7 +5749,7 @@ static bool isAtLeastAsSpecializedAs(
});
}) == TemplateDeductionResult::Success;
});
- if (!AtLeastAsSpecialized)
+ if (!AtLeastAsSpecialized || Trap.hasErrorOccurred())
return false;
// C++0x [temp.deduct.partial]p11:
@@ -6241,10 +6225,11 @@ static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2,
/*HasDeducedAnyParam=*/nullptr) != TemplateDeductionResult::Success)
return false;
- SmallVector<TemplateArgument, 4> DeducedArgs(Deduced.begin(),
- Deduced.end());
- Sema::InstantiatingTemplate Inst(S, Info.getLocation(), P2, DeducedArgs,
- Info);
+ SmallVector<TemplateArgument, 4> DeducedArgs(Deduced.begin(), Deduced.end());
+ EnterExpressionEvaluationContext Unevaluated(
+ S, Sema::ExpressionEvaluationContext::Unevaluated);
+ Sema::SFINAETrap Trap(S, Info);
+ Sema::InstantiatingTemplate Inst(S, Info.getLocation(), P2, DeducedArgs);
if (Inst.isInvalid())
return false;
@@ -6252,8 +6237,6 @@ static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2,
Ps = cast<TemplateSpecializationType>(T2)->template_arguments(),
As = cast<TemplateSpecializationType>(T1)->template_arguments();
- Sema::SFINAETrap Trap(S);
-
TemplateDeductionResult Result;
S.runWithSufficientStackSpace(Info.getLocation(), [&] {
Result = ::FinishTemplateArgumentDeduction(
@@ -6261,14 +6244,7 @@ static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2,
/*IsPartialOrdering=*/true, Ps, As, Deduced, Info,
/*CopyDeducedArgs=*/false);
});
-
- if (Result != TemplateDeductionResult::Success)
- return false;
-
- if (Trap.hasErrorOccurred())
- return false;
-
- return true;
+ return Result == TemplateDeductionResult::Success && !Trap.hasErrorOccurred();
}
namespace {
diff --git a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
index 40811d4c42e2..bfb10665c25b 100644
--- a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
+++ b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
@@ -1025,6 +1025,7 @@ BuildDeductionGuideForTypeAlias(Sema &SemaRef,
TypeAliasTemplateDecl *AliasTemplate,
FunctionTemplateDecl *F, SourceLocation Loc) {
LocalInstantiationScope Scope(SemaRef);
+ Sema::NonSFINAEContext _1(SemaRef);
Sema::InstantiatingTemplate BuildingDeductionGuides(
SemaRef, AliasTemplate->getLocation(), F,
Sema::InstantiatingTemplate::BuildingDeductionGuidesTag{});
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 5fceacd0c00e..35205f40cbce 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -606,8 +606,7 @@ bool Sema::CodeSynthesisContext::isInstantiationRecord() const {
Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema &SemaRef, CodeSynthesisContext::SynthesisKind Kind,
SourceLocation PointOfInstantiation, SourceRange InstantiationRange,
- Decl *Entity, NamedDecl *Template, ArrayRef<TemplateArgument> TemplateArgs,
- sema::TemplateDeductionInfo *DeductionInfo)
+ Decl *Entity, NamedDecl *Template, ArrayRef<TemplateArgument> TemplateArgs)
: SemaRef(SemaRef) {
// Don't allow further instantiation if a fatal error and an uncompilable
// error have occurred. Any diagnostics we might have raised will not be
@@ -625,7 +624,6 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
Inst.Template = Template;
Inst.TemplateArgs = TemplateArgs.data();
Inst.NumTemplateArgs = TemplateArgs.size();
- Inst.DeductionInfo = DeductionInfo;
Inst.InstantiationRange = InstantiationRange;
Inst.InConstraintSubstitution =
Inst.Kind == CodeSynthesisContext::ConstraintSubstitution;
@@ -671,48 +669,40 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema &SemaRef, SourceLocation PointOfInstantiation,
FunctionTemplateDecl *FunctionTemplate,
ArrayRef<TemplateArgument> TemplateArgs,
- CodeSynthesisContext::SynthesisKind Kind,
- sema::TemplateDeductionInfo &DeductionInfo, SourceRange InstantiationRange)
+ CodeSynthesisContext::SynthesisKind Kind, SourceRange InstantiationRange)
: InstantiatingTemplate(SemaRef, Kind, PointOfInstantiation,
InstantiationRange, FunctionTemplate, nullptr,
- TemplateArgs, &DeductionInfo) {
+ TemplateArgs) {
assert(Kind == CodeSynthesisContext::ExplicitTemplateArgumentSubstitution ||
Kind == CodeSynthesisContext::DeducedTemplateArgumentSubstitution ||
Kind == CodeSynthesisContext::BuildingDeductionGuides);
}
Sema::InstantiatingTemplate::InstantiatingTemplate(
- Sema &SemaRef, SourceLocation PointOfInstantiation,
- TemplateDecl *Template,
- ArrayRef<TemplateArgument> TemplateArgs,
- sema::TemplateDeductionInfo &DeductionInfo, SourceRange InstantiationRange)
+ Sema &SemaRef, SourceLocation PointOfInstantiation, TemplateDecl *Template,
+ ArrayRef<TemplateArgument> TemplateArgs, SourceRange InstantiationRange)
: InstantiatingTemplate(
- SemaRef,
- CodeSynthesisContext::DeducedTemplateArgumentSubstitution,
+ SemaRef, CodeSynthesisContext::DeducedTemplateArgumentSubstitution,
PointOfInstantiation, InstantiationRange, Template, nullptr,
- TemplateArgs, &DeductionInfo) {}
+ TemplateArgs) {}
Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema &SemaRef, SourceLocation PointOfInstantiation,
ClassTemplatePartialSpecializationDecl *PartialSpec,
- ArrayRef<TemplateArgument> TemplateArgs,
- sema::TemplateDeductionInfo &DeductionInfo, SourceRange InstantiationRange)
+ ArrayRef<TemplateArgument> TemplateArgs, SourceRange InstantiationRange)
: InstantiatingTemplate(
- SemaRef,
- CodeSynthesisContext::DeducedTemplateArgumentSubstitution,
+ SemaRef, CodeSynthesisContext::DeducedTemplateArgumentSubstitution,
PointOfInstantiation, InstantiationRange, PartialSpec, nullptr,
- TemplateArgs, &DeductionInfo) {}
+ TemplateArgs) {}
Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema &SemaRef, SourceLocation PointOfInstantiation,
VarTemplatePartialSpecializationDecl *PartialSpec,
- ArrayRef<TemplateArgument> TemplateArgs,
- sema::TemplateDeductionInfo &DeductionInfo, SourceRange InstantiationRange)
+ ArrayRef<TemplateArgument> TemplateArgs, SourceRange InstantiationRange)
: InstantiatingTemplate(
- SemaRef,
- CodeSynthesisContext::DeducedTemplateArgumentSubstitution,
+ SemaRef, CodeSynthesisContext::DeducedTemplateArgumentSubstitution,
PointOfInstantiation, InstantiationRange, PartialSpec, nullptr,
- TemplateArgs, &DeductionInfo) {}
+ TemplateArgs) {}
Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema &SemaRef, SourceLocation PointOfInstantiation, ParmVarDecl *Param,
@@ -763,12 +753,11 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema &SemaRef, SourceLocation PointOfInstantiation,
- concepts::Requirement *Req, sema::TemplateDeductionInfo &DeductionInfo,
- SourceRange InstantiationRange)
+ concepts::Requirement *Req, SourceRange InstantiationRange)
: InstantiatingTemplate(
SemaRef, CodeSynthesisContext::RequirementInstantiation,
PointOfInstantiation, InstantiationRange, /*Entity=*/nullptr,
- /*Template=*/nullptr, /*TemplateArgs=*/{}, &DeductionInfo) {}
+ /*Template=*/nullptr, /*TemplateArgs=*/{}) {}
Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema &SemaRef, SourceLocation PointOfInstantiation,
@@ -781,11 +770,11 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema &SemaRef, SourceLocation PointOfInstantiation, const RequiresExpr *RE,
- sema::TemplateDeductionInfo &DeductionInfo, SourceRange InstantiationRange)
+ SourceRange InstantiationRange)
: InstantiatingTemplate(
SemaRef, CodeSynthesisContext::RequirementParameterInstantiation,
PointOfInstantiation, InstantiationRange, /*Entity=*/nullptr,
- /*Template=*/nullptr, /*TemplateArgs=*/{}, &DeductionInfo) {}
+ /*Template=*/nullptr, /*TemplateArgs=*/{}) {}
Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema &SemaRef, SourceLocation PointOfInstantiation,
@@ -797,13 +786,11 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
TemplateArgs) {}
Sema::InstantiatingTemplate::InstantiatingTemplate(
- Sema &SemaRef, SourceLocation PointOfInstantiation,
- ConstraintSubstitution, NamedDecl *Template,
- sema::TemplateDeductionInfo &DeductionInfo, SourceRange InstantiationRange)
+ Sema &SemaRef, SourceLocation PointOfInstantiation, ConstraintSubstitution,
+ NamedDecl *Template, SourceRange InstantiationRange)
: InstantiatingTemplate(
SemaRef, CodeSynthesisContext::ConstraintSubstitution,
- PointOfInstantiation, InstantiationRange, Template, nullptr,
- {}, &DeductionInfo) {}
+ PointOfInstantiation, InstantiationRange, Template, nullptr, {}) {}
Sema::InstantiatingTemplate::InstantiatingTemplate(
Sema &SemaRef, SourceLocation PointOfInstantiation,
@@ -835,9 +822,6 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
ArgLoc, InstantiationRange, PArg) {}
bool Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) {
- Ctx.SavedInNonInstantiationSFINAEContext = InNonInstantiationSFINAEContext;
- InNonInstantiationSFINAEContext = false;
-
if (!Ctx.isInstantiationRecord()) {
++NonInstantiationEntries;
} else {
@@ -871,8 +855,6 @@ void Sema::popCodeSynthesisContext() {
--NonInstantiationEntries;
}
- InNonInstantiationSFINAEContext = Active.SavedInNonInstantiationSFINAEContext;
-
// Name lookup no longer looks in this template's defining module.
assert(CodeSynthesisContexts.size() >=
CodeSynthesisContextLookupModules.size() &&
@@ -1282,93 +1264,6 @@ void Sema::PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc) {
}
}
-std::optional<TemplateDeductionInfo *> Sema::isSFINAEContext() const {
- if (InNonInstantiationSFINAEContext)
- return std::optional<TemplateDeductionInfo *>(nullptr);
-
- for (SmallVectorImpl<CodeSynthesisContext>::const_reverse_iterator
- Active = CodeSynthesisContexts.rbegin(),
- ActiveEnd = CodeSynthesisContexts.rend();
- Active != ActiveEnd;
- ++Active)
- {
- switch (Active->Kind) {
- case CodeSynthesisContext::TypeAliasTemplateInstantiation:
- // An instantiation of an alias template may or may not be a SFINAE
- // context, depending on what else is on the stack.
- if (isa<TypeAliasTemplateDecl>(Active->Entity))
- break;
- [[fallthrough]];
- case CodeSynthesisContext::TemplateInstantiation:
- case CodeSynthesisContext::DefaultFunctionArgumentInstantiation:
- case CodeSynthesisContext::ExceptionSpecInstantiation:
- case CodeSynthesisContext::ConstraintsCheck:
- case CodeSynthesisContext::ParameterMappingSubstitution:
- case CodeSynthesisContext::ConstraintNormalization:
- case CodeSynthesisContext::NestedRequirementConstraintsCheck:
- // This is a template instantiation, so there is no SFINAE.
- return std::nullopt;
- case CodeSynthesisContext::LambdaExpressionSubstitution:
- // [temp.deduct]p9
- // A lambda-expression appearing in a function type or a template
- // parameter is not considered part of the immediate context for the
- // purposes of template argument deduction.
- // CWG2672: A lambda-expression body is never in the immediate context.
- return std::nullopt;
-
- case CodeSynthesisContext::DefaultTemplateArgumentInstantiation:
- case CodeSynthesisContext::PriorTemplateArgumentSubstitution:
- case CodeSynthesisContext::DefaultTemplateArgumentChecking:
- case CodeSynthesisContext::RewritingOperatorAsSpaceship:
- case CodeSynthesisContext::PartialOrderingTTP:
- // A default template argument instantiation and substitution into
- // template parameters with arguments for prior parameters may or may
- // not be a SFINAE context; look further up the stack.
- break;
-
- case CodeSynthesisContext::ExplicitTemplateArgumentSubstitution:
- case CodeSynthesisContext::DeducedTemplateArgumentSubstitution:
- // We're either substituting explicitly-specified template arguments,
- // deduced template arguments. SFINAE applies unless we are in a lambda
- // body, see [temp.deduct]p9.
- case CodeSynthesisContext::ConstraintSubstitution:
- case CodeSynthesisContext::RequirementInstantiation:
- case CodeSynthesisContext::RequirementParameterInstantiation:
- // SFINAE always applies in a constraint expression or a requirement
- // in a requires expression.
- assert(Active->DeductionInfo && "Missing deduction info pointer");
- return Active->DeductionInfo;
-
- case CodeSynthesisContext::DeclaringSpecialMember:
- case CodeSynthesisContext::DeclaringImplicitEqualityComparison:
- case CodeSynthesisContext::DefiningSynthesizedFunction:
- case CodeSynthesisContext::InitializingStructuredBinding:
- case CodeSynthesisContext::MarkingClassDllexported:
- case CodeSynthesisContext::BuildingBuiltinDumpStructCall:
- case CodeSynthesisContext::BuildingDeductionGuides:
- // This happens in a context unrelated to template instantiation, so
- // there is no SFINAE.
- return std::nullopt;
-
- case CodeSynthesisContext::ExceptionSpecEvaluation:
- // FIXME: This should not be treated as a SFINAE context, because
- // we will cache an incorrect exception specification. However, clang
- // bootstrap relies this! See PR31692.
- break;
-
- case CodeSynthesisContext::Memoization:
- break;
- }
-
- // The inner context was transparent for SFINAE. If it occurred within a
- // non-instantiation SFINAE context, then SFINAE applies.
- if (Active->SavedInNonInstantiationSFINAEContext)
- return std::optional<TemplateDeductionInfo *>(nullptr);
- }
-
- return std::nullopt;
-}
-
//===----------------------------------------------------------------------===/
// Template Instantiation for Types
//===----------------------------------------------------------------------===/
@@ -2674,10 +2569,9 @@ ExprResult TemplateInstantiator::TransformRequiresTypeParams(
Sema::ExtParameterInfoBuilder &PInfos) {
TemplateDeductionInfo Info(KWLoc);
- Sema::InstantiatingTemplate TypeInst(SemaRef, KWLoc,
- RE, Info,
+ Sema::InstantiatingTemplate TypeInst(SemaRef, KWLoc, RE,
SourceRange{KWLoc, RBraceLoc});
- Sema::SFINAETrap Trap(SemaRef);
+ Sema::SFINAETrap Trap(SemaRef, Info);
unsigned ErrorIdx;
if (getDerived().TransformFunctionTypeParams(
@@ -2709,10 +2603,10 @@ TemplateInstantiator::TransformTypeRequirement(concepts::TypeRequirement *Req) {
return Req;
}
- Sema::SFINAETrap Trap(SemaRef);
TemplateDeductionInfo Info(Req->getType()->getTypeLoc().getBeginLoc());
- Sema::InstantiatingTemplate TypeInst(SemaRef,
- Req->getType()->getTypeLoc().getBeginLoc(), Req, Info,
+ Sema::SFINAETrap Trap(SemaRef, Info);
+ Sema::InstantiatingTemplate TypeInst(
+ SemaRef, Req->getType()->getTypeLoc().getBeginLoc(), Req,
Req->getType()->getTypeLoc().getSourceRange());
if (TypeInst.isInvalid())
return nullptr;
@@ -2730,8 +2624,6 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) {
if (!Req->isDependent() && !AlwaysRebuild())
return Req;
- Sema::SFINAETrap Trap(SemaRef);
-
llvm::PointerUnion<Expr *, concepts::Requirement::SubstitutionDiagnostic *>
TransExpr;
if (Req->isExprSubstitutionFailure())
@@ -2739,7 +2631,8 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) {
else {
Expr *E = Req->getExpr();
TemplateDeductionInfo Info(E->getBeginLoc());
- Sema::InstantiatingTemplate ExprInst(SemaRef, E->getBeginLoc(), Req, Info,
+ Sema::SFINAETrap Trap(SemaRef, Info);
+ Sema::InstantiatingTemplate ExprInst(SemaRef, E->getBeginLoc(), Req,
E->getSourceRange());
if (ExprInst.isInvalid())
return nullptr;
@@ -2765,8 +2658,9 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) {
TemplateParameterList *OrigTPL =
RetReq.getTypeConstraintTemplateParameterList();
TemplateDeductionInfo Info(OrigTPL->getTemplateLoc());
- Sema::InstantiatingTemplate TPLInst(SemaRef, OrigTPL->getTemplateLoc(),
- Req, Info, OrigTPL->getSourceRange());
+ Sema::SFINAETrap Trap(SemaRef, Info);
+ Sema::InstantiatingTemplate TPLInst(SemaRef, OrigTPL->getTemplateLoc(), Req,
+ OrigTPL->getSourceRange());
if (TPLInst.isInvalid())
return nullptr;
TemplateParameterList *TPL = TransformTemplateParameterList(OrigTPL);
@@ -2830,11 +2724,9 @@ TemplateInstantiator::TransformNestedRequirement(
bool Success;
Expr *NewConstraint;
- TemplateDeductionInfo Info(Constraint->getBeginLoc());
{
EnterExpressionEvaluationContext ContextRAII(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
-
Sema::InstantiatingTemplate ConstrInst(
SemaRef, Constraint->getBeginLoc(), Req,
Sema::InstantiatingTemplate::ConstraintsCheck(),
@@ -2843,16 +2735,10 @@ TemplateInstantiator::TransformNestedRequirement(
if (ConstrInst.isInvalid())
return nullptr;
- Sema::SFINAETrap Trap(SemaRef);
-
Success = !SemaRef.CheckConstraintSatisfaction(
Req, AssociatedConstraint(Constraint, SemaRef.ArgPackSubstIndex),
TemplateArgs, Constraint->getSourceRange(), Satisfaction,
/*TopLevelConceptId=*/nullptr, &NewConstraint);
-
- assert((!Success || !Trap.hasErrorOccurred()) &&
- "Substitution failures must be handled "
- "by CheckConstraintSatisfaction.");
}
if (!Success || Satisfaction.HasSubstitutionFailure())
@@ -3306,7 +3192,7 @@ bool Sema::SubstDefaultArgument(
EnterExpressionEvaluationContext EvalContext(
*this, ExpressionEvaluationContext::PotentiallyEvaluated, Param);
-
+ NonSFINAEContext _(*this);
InstantiatingTemplate Inst(*this, Loc, Param, TemplateArgs.getInnermost());
if (Inst.isInvalid())
return true;
@@ -3594,6 +3480,7 @@ bool Sema::InstantiateClassImpl(
Spec->setPointOfInstantiation(PointOfInstantiation);
}
+ NonSFINAEContext _(*this);
InstantiatingTemplate Inst(*this, PointOfInstantiation, Instantiation);
if (Inst.isInvalid())
return true;
@@ -3828,6 +3715,7 @@ bool Sema::InstantiateEnum(SourceLocation PointOfInstantiation,
MSInfo->setPointOfInstantiation(PointOfInstantiation);
}
+ NonSFINAEContext _(*this);
InstantiatingTemplate Inst(*this, PointOfInstantiation, Instantiation);
if (Inst.isInvalid())
return true;
@@ -3892,6 +3780,7 @@ bool Sema::InstantiateInClassInitializer(
return true;
}
+ NonSFINAEContext _(*this);
InstantiatingTemplate Inst(*this, PointOfInstantiation, Instantiation);
if (Inst.isInvalid())
return true;
@@ -3975,6 +3864,7 @@ static ActionResult<CXXRecordDecl *> getPatternForClassTemplateSpecialization(
Sema &S, SourceLocation PointOfInstantiation,
ClassTemplateSpecializationDecl *ClassTemplateSpec,
TemplateSpecializationKind TSK, bool PrimaryStrictPackMatch) {
+ std::optional<Sema::NonSFINAEContext> NSC(S);
Sema::InstantiatingTemplate Inst(S, PointOfInstantiation, ClassTemplateSpec);
if (Inst.isInvalid())
return {/*Invalid=*/true};
@@ -4076,6 +3966,7 @@ static ActionResult<CXXRecordDecl *> getPatternForClassTemplateSpecialization(
if (Ambiguous) {
// Partial ordering did not produce a clear winner. Complain.
Inst.Clear();
+ NSC.reset();
S.Diag(PointOfInstantiation,
diag::err_partial_spec_ordering_ambiguous)
<< ClassTemplateSpec;
@@ -4507,6 +4398,7 @@ ExprResult Sema::SubstConceptTemplateArguments(
TemplateArgumentListInfo SubstArgs(ArgsAsWritten->getLAngleLoc(),
ArgsAsWritten->getRAngleLoc());
+ NonSFINAEContext _(*this);
Sema::InstantiatingTemplate Inst(
*this, ArgsAsWritten->arguments().front().getSourceRange().getBegin(),
Sema::InstantiatingTemplate::ConstraintNormalization{},
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 681bfe0d8cbf..4d58f0016829 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -5316,6 +5316,7 @@ void Sema::InstantiateExceptionSpec(SourceLocation PointOfInstantiation,
return;
}
+ NonSFINAEContext _(*this);
InstantiatingTemplate Inst(*this, PointOfInstantiation, Decl,
InstantiatingTemplate::ExceptionSpecification());
if (Inst.isInvalid()) {
@@ -5383,6 +5384,7 @@ TemplateDeclInstantiator::InitFunctionInstantiation(FunctionDecl *New,
if (ActiveInst.Kind == ActiveInstType::ExplicitTemplateArgumentSubstitution ||
ActiveInst.Kind == ActiveInstType::DeducedTemplateArgumentSubstitution) {
if (isa<FunctionTemplateDecl>(ActiveInst.Entity)) {
+ SemaRef.CurrentSFINAEContext = nullptr;
atTemplateEnd(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst);
ActiveInst.Kind = ActiveInstType::TemplateInstantiation;
ActiveInst.Entity = New;
@@ -5493,8 +5495,7 @@ FunctionDecl *Sema::InstantiateFunctionDeclaration(
SourceLocation Loc, CodeSynthesisContext::SynthesisKind CSC) {
FunctionDecl *FD = FTD->getTemplatedDecl();
- sema::TemplateDeductionInfo Info(Loc);
- InstantiatingTemplate Inst(*this, Loc, FTD, Args->asArray(), CSC, Info);
+ InstantiatingTemplate Inst(*this, Loc, FTD, Args->asArray(), CSC);
if (Inst.isInvalid())
return nullptr;
@@ -5684,6 +5685,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
}
}
+ NonSFINAEContext _(*this);
InstantiatingTemplate Inst(*this, PointOfInstantiation, Function);
if (Inst.isInvalid())
return;
@@ -5974,6 +5976,7 @@ VarTemplateSpecializationDecl *Sema::BuildVarTemplateInstantiation(
if (FromVar->isInvalidDecl())
return nullptr;
+ NonSFINAEContext _(*this);
InstantiatingTemplate Inst(*this, PointOfInstantiation, FromVar);
if (Inst.isInvalid())
return nullptr;
@@ -6281,6 +6284,7 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
!Var->hasInit()) {
// FIXME: Factor out the duplicated instantiation context setup/tear down
// code here.
+ NonSFINAEContext _(*this);
InstantiatingTemplate Inst(*this, PointOfInstantiation, Var);
if (Inst.isInvalid())
return;
@@ -6385,6 +6389,7 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
return;
}
+ NonSFINAEContext _(*this);
InstantiatingTemplate Inst(*this, PointOfInstantiation, Var);
if (Inst.isInvalid())
return;
diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp
index 0f72d6a13ae0..5b1aad3fa847 100644
--- a/clang/lib/Sema/SemaTemplateVariadic.cpp
+++ b/clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -844,7 +844,7 @@ bool Sema::CheckParameterPacksForExpansion(
ArrayRef<UnexpandedParameterPack> Unexpanded,
const MultiLevelTemplateArgumentList &TemplateArgs,
bool FailOnPackProducingTemplates, bool &ShouldExpand,
- bool &RetainExpansion, UnsignedOrNone &NumExpansions) {
+ bool &RetainExpansion, UnsignedOrNone &NumExpansions, bool Diagnose) {
ShouldExpand = true;
RetainExpansion = false;
IdentifierLoc FirstPack;
@@ -874,6 +874,9 @@ bool Sema::CheckParameterPacksForExpansion(
if (!FailOnPackProducingTemplates)
continue;
+ if (!Diagnose)
+ return true;
+
// It is not yet supported in certain contexts.
return Diag(PatternRange.getBegin().isValid() ? PatternRange.getBegin()
: EllipsisLoc,
@@ -1015,7 +1018,9 @@ bool Sema::CheckParameterPacksForExpansion(
// C++0x [temp.variadic]p5:
// All of the parameter packs expanded by a pack expansion shall have
// the same number of arguments specified.
- if (HaveFirstPack)
+ if (!Diagnose)
+ ;
+ else if (HaveFirstPack)
Diag(EllipsisLoc, diag::err_pack_expansion_length_conflict)
<< FirstPack.getIdentifierInfo() << Name << *NumExpansions
<< (LeastNewPackSize != NewPackSize) << LeastNewPackSize
@@ -1041,6 +1046,8 @@ bool Sema::CheckParameterPacksForExpansion(
if (NumExpansions && *NumExpansions < *NumPartialExpansions) {
NamedDecl *PartialPack =
CurrentInstantiationScope->getPartiallySubstitutedPack();
+ if (!Diagnose)
+ return true;
Diag(EllipsisLoc, diag::err_pack_expansion_length_conflict_partial)
<< PartialPack << *NumPartialExpansions << *NumExpansions
<< SourceRange(PartiallySubstitutedPackLoc);
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index dffd7c1def8e..94105f10d71b 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -8076,14 +8076,13 @@ TreeTransform<Derived>::TransformCompoundStmt(CompoundStmt *S,
getSema().resetFPOptions(
S->getStoredFPFeatures().applyOverrides(getSema().getLangOpts()));
- const Stmt *ExprResult = S->getStmtExprResult();
bool SubStmtInvalid = false;
bool SubStmtChanged = false;
SmallVector<Stmt*, 8> Statements;
for (auto *B : S->body()) {
StmtResult Result = getDerived().TransformStmt(
- B, IsStmtExpr && B == ExprResult ? StmtDiscardKind::StmtExprResult
- : StmtDiscardKind::Discarded);
+ B, IsStmtExpr && B == S->body_back() ? StmtDiscardKind::StmtExprResult
+ : StmtDiscardKind::Discarded);
if (Result.isInvalid()) {
// Immediately fail if this was a DeclStmt, since it's very
@@ -15824,16 +15823,20 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
Sema::ExpressionEvaluationContext::PotentiallyEvaluated,
E->getCallOperator());
- Sema::CodeSynthesisContext C;
- C.Kind = clang::Sema::CodeSynthesisContext::LambdaExpressionSubstitution;
- C.PointOfInstantiation = E->getBody()->getBeginLoc();
- getSema().pushCodeSynthesisContext(C);
+ StmtResult Body;
+ {
+ Sema::NonSFINAEContext _(getSema());
+ Sema::CodeSynthesisContext C;
+ C.Kind = clang::Sema::CodeSynthesisContext::LambdaExpressionSubstitution;
+ C.PointOfInstantiation = E->getBody()->getBeginLoc();
+ getSema().pushCodeSynthesisContext(C);
- // Instantiate the body of the lambda expression.
- StmtResult Body =
- Invalid ? StmtError() : getDerived().TransformLambdaBody(E, E->getBody());
+ // Instantiate the body of the lambda expression.
+ Body = Invalid ? StmtError()
+ : getDerived().TransformLambdaBody(E, E->getBody());
- getSema().popCodeSynthesisContext();
+ getSema().popCodeSynthesisContext();
+ }
// ActOnLambda* will pop the function scope for us.
FuncScopeCleanup.disable();
diff --git a/clang/test/AST/ast-dump-stmt.c b/clang/test/AST/ast-dump-stmt.c
index 5c44fea2df6e..6fb01a4b159f 100644
--- a/clang/test/AST/ast-dump-stmt.c
+++ b/clang/test/AST/ast-dump-stmt.c
@@ -400,7 +400,7 @@ void TestMiscStmts(void) {
// CHECK-NEXT: ImplicitCastExpr
// CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} <col:17> 'int' lvalue Var 0x{{[^ ]*}} 'a' 'int'
({int a = 10; a;;; });
- // CHECK-NEXT: StmtExpr 0x{{[^ ]*}} <line:[[@LINE-1]]:3, col:23> 'int'
+ // CHECK-NEXT: StmtExpr 0x{{[^ ]*}} <line:[[@LINE-1]]:3, col:23> 'void'
// CHECK-NEXT: CompoundStmt
// CHECK-NEXT: DeclStmt
// CHECK-NEXT: VarDecl 0x{{[^ ]*}} <col:5, col:13> col:9 used a 'int' cinit
diff --git a/clang/test/C/C2y/n3525.c b/clang/test/C/C2y/n3525.c
new file mode 100644
index 000000000000..428df23c79ba
--- /dev/null
+++ b/clang/test/C/C2y/n3525.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s
+// RUN: %clang_cc1 -verify -std=c23 -Wall -pedantic %s
+
+/* WG14 N3525: Yes
+ * static_assert without UB
+ *
+ * Ensures that a static_assert declaration cannot defer to runtime; it must
+ * take an integer constant expression that is resolved at compile time.
+ *
+ * Note: implementations are free to extend what is a valid integer constant
+ * expression, and Clang (and GCC) does so. So this test is validating that
+ * we quietly accept a pasing assertion, loudly reject a failing assertion, and
+ * issue a pedantic diagnostic for the extension case.
+ */
+
+static_assert(1); // Okay
+
+static_assert(0); // expected-error {{static assertion failed}}
+
+extern int a;
+static_assert(1 || a); // expected-warning {{expression is not an integer constant expression; folding it to a constant is a GNU extension}}
+
+static_assert(a); // expected-error {{static assertion expression is not an integral constant expression}}
+static_assert(0 || a); // expected-error {{static assertion expression is not an integral constant expression}}
+
+// Note, there is no CodeGen test for this; we have existing tests for the ICE
+// extension, so the pedantic warning is sufficient to verify we're not
+// emitting code which reads 'a' in '1 || a' because of the folding, and
+// there's no way to generate code for reading 'a' in '0 || a' because of the
+// error.
diff --git a/clang/test/CIR/CodeGen/statement-exprs.c b/clang/test/CIR/CodeGen/statement-exprs.c
index c784ec9eda7d..2d05fc13ff6e 100644
--- a/clang/test/CIR/CodeGen/statement-exprs.c
+++ b/clang/test/CIR/CodeGen/statement-exprs.c
@@ -6,7 +6,7 @@
// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
int f19(void) {
- return ({ 3;;4;; });
+ return ({ 3;;4; });
}
// CIR: cir.func dso_local @f19() -> !s32i
@@ -42,6 +42,16 @@ int f19(void) {
// OGCG: %[[TMP_VAL:.+]] = load i32, ptr %[[TMP]]
// OGCG: ret i32 %[[TMP_VAL]]
+// PR166036: The trailing NullStmt should result in a void.
+void f20(void) {
+ return ({ 3;;4;; });
+}
+
+// CIR-LABEL: cir.func dso_local @f20() {{[^-]*}}
+// CIR: cir.return {{[^%]*}}
+
+// LLVM-LABEL: define{{.*}} void @f20
+// LLVM: ret void
int nested(void) {
({123;});
diff --git a/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c b/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c
index 670b65070289..929df94aa60e 100644
--- a/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-fcvt-intrinsics.c
@@ -26,16 +26,36 @@ int32_t test_vcvtas_s32_f32(float32_t a) {
return (int32_t)vcvtas_s32_f32(a);
}
-// CHECK-LABEL: define {{[^@]+}}@test_test_vcvtad_s64_f64
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtad_s64_f64
// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VCVTAD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double [[A]])
// CHECK-NEXT: ret i64 [[VCVTAD_S64_F64_I]]
//
-int64_t test_test_vcvtad_s64_f64(float64_t a) {
+int64_t test_vcvtad_s64_f64(float64_t a) {
return (int64_t)vcvtad_s64_f64(a);
}
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtas_s64_f32
+// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTAS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float [[A]])
+// CHECK-NEXT: ret i64 [[VCVTAS_S64_F32_I]]
+//
+int64_t test_vcvtas_s64_f32(float32_t a) {
+ return (int64_t)vcvtas_s64_f32(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtad_s32_f64
+// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTAD_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double [[A]])
+// CHECK-NEXT: ret i32 [[VCVTAD_S32_F64_I]]
+//
+int32_t test_vcvtad_s32_f64(float64_t a) {
+ return (int32_t)vcvtad_s32_f64(a);
+}
+
// CHECK-LABEL: define {{[^@]+}}@test_vcvtas_u32_f32
// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
@@ -56,6 +76,26 @@ uint64_t test_vcvtad_u64_f64(float64_t a) {
return (uint64_t)vcvtad_u64_f64(a);
}
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtas_u64_f32
+// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTAS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float [[A]])
+// CHECK-NEXT: ret i64 [[VCVTAS_U64_F32_I]]
+//
+uint64_t test_vcvtas_u64_f32(float32_t a) {
+ return (uint64_t)vcvtas_u64_f32(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtad_u32_f64
+// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTAD_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double [[A]])
+// CHECK-NEXT: ret i32 [[VCVTAD_U32_F64_I]]
+//
+uint32_t test_vcvtad_u32_f64(float64_t a) {
+ return (uint32_t)vcvtad_u32_f64(a);
+}
+
// CHECK-LABEL: define {{[^@]+}}@test_vcvtms_s32_f32
// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
@@ -76,6 +116,26 @@ int64_t test_vcvtmd_s64_f64(float64_t a) {
return (int64_t)vcvtmd_s64_f64(a);
}
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtms_s64_f32
+// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTMS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float [[A]])
+// CHECK-NEXT: ret i64 [[VCVTMS_S64_F32_I]]
+//
+int64_t test_vcvtms_s64_f32(float32_t a) {
+ return (int64_t)vcvtms_s64_f32(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtmd_s32_f64
+// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTMD_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double [[A]])
+// CHECK-NEXT: ret i32 [[VCVTMD_S32_F64_I]]
+//
+int32_t test_vcvtmd_s32_f64(float64_t a) {
+ return (int32_t)vcvtmd_s32_f64(a);
+}
+
// CHECK-LABEL: define {{[^@]+}}@test_vcvtms_u32_f32
// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
@@ -96,6 +156,26 @@ uint64_t test_vcvtmd_u64_f64(float64_t a) {
return (uint64_t)vcvtmd_u64_f64(a);
}
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtms_u64_f32
+// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTMS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float [[A]])
+// CHECK-NEXT: ret i64 [[VCVTMS_U64_F32_I]]
+//
+uint64_t test_vcvtms_u64_f32(float32_t a) {
+ return (uint64_t)vcvtms_u64_f32(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtmd_u32_f64
+// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTMD_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double [[A]])
+// CHECK-NEXT: ret i32 [[VCVTMD_U32_F64_I]]
+//
+uint32_t test_vcvtmd_u32_f64(float64_t a) {
+ return (uint32_t)vcvtmd_u32_f64(a);
+}
+
// CHECK-LABEL: define {{[^@]+}}@test_vcvtns_s32_f32
// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
@@ -116,6 +196,26 @@ int64_t test_vcvtnd_s64_f64(float64_t a) {
return (int64_t)vcvtnd_s64_f64(a);
}
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtns_s64_f32
+// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTNS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float [[A]])
+// CHECK-NEXT: ret i64 [[VCVTNS_S64_F32_I]]
+//
+int64_t test_vcvtns_s64_f32(float32_t a) {
+ return (int64_t)vcvtns_s64_f32(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtnd_s32_f64
+// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTND_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double [[A]])
+// CHECK-NEXT: ret i32 [[VCVTND_S32_F64_I]]
+//
+int32_t test_vcvtnd_s32_f64(float64_t a) {
+ return (int32_t)vcvtnd_s32_f64(a);
+}
+
// CHECK-LABEL: define {{[^@]+}}@test_vcvtns_u32_f32
// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
@@ -136,6 +236,26 @@ uint64_t test_vcvtnd_u64_f64(float64_t a) {
return (uint64_t)vcvtnd_u64_f64(a);
}
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtns_u64_f32
+// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTNS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float [[A]])
+// CHECK-NEXT: ret i64 [[VCVTNS_U64_F32_I]]
+//
+uint64_t test_vcvtns_u64_f32(float32_t a) {
+ return (uint64_t)vcvtns_u64_f32(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtnd_u32_f64
+// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTND_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double [[A]])
+// CHECK-NEXT: ret i32 [[VCVTND_U32_F64_I]]
+//
+uint32_t test_vcvtnd_u32_f64(float64_t a) {
+ return (uint32_t)vcvtnd_u32_f64(a);
+}
+
// CHECK-LABEL: define {{[^@]+}}@test_vcvtps_s32_f32
// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
@@ -156,6 +276,26 @@ int64_t test_vcvtpd_s64_f64(float64_t a) {
return (int64_t)vcvtpd_s64_f64(a);
}
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtps_s64_f32
+// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTPS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float [[A]])
+// CHECK-NEXT: ret i64 [[VCVTPS_S64_F32_I]]
+//
+int64_t test_vcvtps_s64_f32(float32_t a) {
+ return (int64_t)vcvtps_s64_f32(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtpd_s32_f64
+// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTPD_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double [[A]])
+// CHECK-NEXT: ret i32 [[VCVTPD_S32_F64_I]]
+//
+int32_t test_vcvtpd_s32_f64(float64_t a) {
+ return (int32_t)vcvtpd_s32_f64(a);
+}
+
// CHECK-LABEL: define {{[^@]+}}@test_vcvtps_u32_f32
// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
@@ -176,6 +316,26 @@ uint64_t test_vcvtpd_u64_f64(float64_t a) {
return (uint64_t)vcvtpd_u64_f64(a);
}
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtps_u64_f32
+// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTPS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float [[A]])
+// CHECK-NEXT: ret i64 [[VCVTPS_U64_F32_I]]
+//
+uint64_t test_vcvtps_u64_f32(float32_t a) {
+ return (uint64_t)vcvtps_u64_f32(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtpd_u32_f64
+// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTPD_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double [[A]])
+// CHECK-NEXT: ret i32 [[VCVTPD_U32_F64_I]]
+//
+uint32_t test_vcvtpd_u32_f64(float64_t a) {
+ return (uint32_t)vcvtpd_u32_f64(a);
+}
+
// CHECK-LABEL: define {{[^@]+}}@test_vcvts_s32_f32
// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
@@ -196,6 +356,26 @@ int64_t test_vcvtd_s64_f64(float64_t a) {
return (int64_t)vcvtd_s64_f64(a);
}
+// CHECK-LABEL: define {{[^@]+}}@test_vcvts_s64_f32
+// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTS_S64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float [[A]])
+// CHECK-NEXT: ret i64 [[VCVTS_S64_F32_I]]
+//
+int64_t test_vcvts_s64_f32(float32_t a) {
+ return (int64_t)vcvts_s64_f32(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtd_s32_f64
+// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTD_S32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double [[A]])
+// CHECK-NEXT: ret i32 [[VCVTD_S32_F64_I]]
+//
+int32_t test_vcvtd_s32_f64(float64_t a) {
+ return (int32_t)vcvtd_s32_f64(a);
+}
+
// CHECK-LABEL: define {{[^@]+}}@test_vcvts_u32_f32
// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
@@ -215,3 +395,24 @@ uint32_t test_vcvts_u32_f32(float32_t a) {
uint64_t test_vcvtd_u64_f64(float64_t a) {
return (uint64_t)vcvtd_u64_f64(a);
}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvts_u64_f32
+// CHECK-SAME: (float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTS_U64_F32_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float [[A]])
+// CHECK-NEXT: ret i64 [[VCVTS_U64_F32_I]]
+//
+uint64_t test_vcvts_u64_f32(float32_t a) {
+ return (uint64_t)vcvts_u64_f32(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtd_u32_f64
+// CHECK-SAME: (double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VCVTD_U32_F64_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double [[A]])
+// CHECK-NEXT: ret i32 [[VCVTD_U32_F64_I]]
+//
+uint32_t test_vcvtd_u32_f64(float64_t a) {
+ return (uint32_t)vcvtd_u32_f64(a);
+}
+
diff --git a/clang/test/CodeGen/AArch64/v9.6a-neon-f16-intrinsics.c b/clang/test/CodeGen/AArch64/v9.6a-neon-f16-intrinsics.c
new file mode 100644
index 000000000000..89ee9e38bb3f
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/v9.6a-neon-f16-intrinsics.c
@@ -0,0 +1,23 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +v9.6a -target-feature +f8f16mm -target-feature +fp8 \
+// RUN: -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -passes=mem2reg,sroa \
+// RUN: | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: define dso_local <8 x half> @test_vmmlaq_f16_mf8(
+// CHECK-SAME: <8 x half> noundef [[P0:%.*]], <16 x i8> [[P1:%.*]], <16 x i8> [[P2:%.*]], i64 noundef [[P3:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[P0]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[P3]])
+// CHECK-NEXT: [[FMMLA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[FMMLA1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmmla.v8f16.v16i8(<8 x half> [[FMMLA_I]], <16 x i8> [[P1]], <16 x i8> [[P2]])
+// CHECK-NEXT: ret <8 x half> [[FMMLA1_I]]
+//
+float16x8_t test_vmmlaq_f16_mf8(float16x8_t p0, mfloat8x16_t p1, mfloat8x16_t p2, fpm_t p3) {
+ return vmmlaq_f16_mf8_fpm(p0, p1, p2, p3);
+}
diff --git a/clang/test/CodeGen/AArch64/v9.6a-neon-f32-intrinsics.c b/clang/test/CodeGen/AArch64/v9.6a-neon-f32-intrinsics.c
new file mode 100644
index 000000000000..13db72c2cbdd
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/v9.6a-neon-f32-intrinsics.c
@@ -0,0 +1,21 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +v9.6a -target-feature +f8f32mm -target-feature +fp8 \
+// RUN: -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -passes=mem2reg,sroa \
+// RUN: | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: define dso_local <4 x float> @test_vmmlaq_f32_mf8(
+// CHECK-SAME: <4 x float> noundef [[P0:%.*]], <16 x i8> [[P1:%.*]], <16 x i8> [[P2:%.*]], i64 noundef [[P3:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[P3]])
+// CHECK-NEXT: [[FMMLA_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmmla.v4f32.v16i8(<4 x float> [[P0]], <16 x i8> [[P1]], <16 x i8> [[P2]])
+// CHECK-NEXT: ret <4 x float> [[FMMLA_I]]
+//
+float32x4_t test_vmmlaq_f32_mf8(float32x4_t p0, mfloat8x16_t p1, mfloat8x16_t p2, fpm_t p3) {
+ return vmmlaq_f32_mf8_fpm(p0, p1, p2, p3);
+}
+
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index e6e2e38bcc09..834e140018c3 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -209,6 +209,10 @@ unsigned char test_kortestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu16_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestz_mask32_u8(0x0000'0000, 0x0000'0000) == 1);
+TEST_CONSTEXPR(_kortestz_mask32_u8(0x0000'0000, 0x8000'0000) == 0);
+TEST_CONSTEXPR(_kortestz_mask32_u8(0x0123'4567, 0xFEDC'BA98) == 0);
+
unsigned char test_kortestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestc_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -222,6 +226,10 @@ unsigned char test_kortestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu16_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestc_mask32_u8(0x0000'0000, 0x0000'0000) == 0);
+TEST_CONSTEXPR(_kortestc_mask32_u8(0x0000'0000, 0x8000'0000) == 0);
+TEST_CONSTEXPR(_kortestc_mask32_u8(0x0123'4567, 0xFEDC'BA98) == 1);
+
unsigned char test_kortest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_kortest_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -242,6 +250,30 @@ unsigned char test_kortest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu16_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_kortest_mask32_u8(unsigned int A, unsigned int B) {
+ unsigned char all_ones{};
+ return (_kortest_mask32_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _kortest_mask32_u8() {
+ constexpr unsigned int A1 = 0x0000'0000;
+ constexpr unsigned int B1 = 0x0000'0000;
+ constexpr unsigned char expected_result_1 = 0x10;
+ static_assert(test_kortest_mask32_u8(A1, B1) == expected_result_1);
+ constexpr unsigned int A2 = 0x0000'0000;
+ constexpr unsigned int B2 = 0x8000'0000;
+ constexpr unsigned char expected_result_2 = 0x00;
+ static_assert(test_kortest_mask32_u8(A2, B2) == expected_result_2);
+ constexpr unsigned int A3 = 0x0123'4567;
+ constexpr unsigned int B3 = 0xFEDC'BA98;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_kortest_mask32_u8(A3, B3) == expected_result_3);
+}
+#endif
+
unsigned char test_kortestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestz_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -255,6 +287,10 @@ unsigned char test_kortestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu8_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestz_mask64_u8(0x0000'0000'0000'0000, 0x0000'0000'0000'0000) == 1);
+TEST_CONSTEXPR(_kortestz_mask64_u8(0x0000'0000'0000'0000, 0x8000'0000'0000'0000) == 0);
+TEST_CONSTEXPR(_kortestz_mask64_u8(0x0123'4567'89AB'CDEF, 0xFEDC'BA98'7654'3210) == 0);
+
unsigned char test_kortestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestc_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -268,6 +304,10 @@ unsigned char test_kortestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu8_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestc_mask64_u8(0x0000'0000'0000'0000, 0x0000'0000'0000'0000) == 0);
+TEST_CONSTEXPR(_kortestc_mask64_u8(0x0023'4567'89AB'CDEF, 0xFEDC'BA98'7654'3210) == 0);
+TEST_CONSTEXPR(_kortestc_mask64_u8(0x0123'4567'89AB'CDEF, 0xFEDC'BA98'7654'3210) == 1);
+
unsigned char test_kortest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_kortest_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -288,6 +328,30 @@ unsigned char test_kortest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu8_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_kortest_mask64_u8(unsigned long long A, unsigned long long B) {
+ unsigned char all_ones{};
+ return (_kortest_mask64_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _kortest_mask64_u8() {
+ constexpr unsigned long long A1 = 0x0000'0000'0000'0000;
+ constexpr unsigned long long B1 = 0x0000'0000'0000'0000;
+ constexpr unsigned char expected_result_1 = 0x10;
+ static_assert(test_kortest_mask64_u8(A1, B1) == expected_result_1);
+ constexpr unsigned long long A2 = 0x0000'0000'0000'0000;
+ constexpr unsigned long long B2 = 0x8000'0000'0000'0000;
+ constexpr unsigned char expected_result_2 = 0x00;
+ static_assert(test_kortest_mask64_u8(A2, B2) == expected_result_2);
+ constexpr unsigned long long A3 = 0x0123'4567'89AB'CDEF;
+ constexpr unsigned long long B3 = 0xFEDC'BA98'7654'3210;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_kortest_mask64_u8(A3, B3) == expected_result_3);
+}
+#endif
+
unsigned char test_ktestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestz_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -298,6 +362,11 @@ unsigned char test_ktestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu16_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestz_mask32_u8(0x0000'0000, 0x0000'0000) == 1);
+TEST_CONSTEXPR(_ktestz_mask32_u8(0x0000'0000, 0x8000'0000) == 1);
+TEST_CONSTEXPR(_ktestz_mask32_u8(0xF000'0000, 0x8000'0000) == 0);
+TEST_CONSTEXPR(_ktestz_mask32_u8(0x0123'4567, 0x0123'4567) == 0);
+
unsigned char test_ktestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestc_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -308,6 +377,11 @@ unsigned char test_ktestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu16_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestc_mask32_u8(0x0000'0000, 0x0000'0000) == 1);
+TEST_CONSTEXPR(_ktestc_mask32_u8(0x0000'0000, 0x8000'0000) == 0);
+TEST_CONSTEXPR(_ktestc_mask32_u8(0xF000'0000, 0x8000'0000) == 1);
+TEST_CONSTEXPR(_ktestc_mask32_u8(0x0123'4567, 0x0123'4567) == 1);
+
unsigned char test_ktest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_ktest_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -322,6 +396,34 @@ unsigned char test_ktest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512
_mm512_cmpneq_epu16_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_ktest_mask32_u8(unsigned int A, unsigned int B) {
+ unsigned char and_not{};
+ return (_ktest_mask32_u8(A, B, &and_not) << 4) | and_not;
+}
+
+void _ktest_mask32_u8() {
+ constexpr unsigned int A1 = 0x0000'0000;
+ constexpr unsigned int B1 = 0x0000'0000;
+ constexpr unsigned char expected_result_1 = 0x11;
+ static_assert(test_ktest_mask32_u8(A1, B1) == expected_result_1);
+ constexpr unsigned int A2 = 0x0000'0000;
+ constexpr unsigned int B2 = 0x8000'0000;
+ constexpr unsigned char expected_result_2 = 0x10;
+ static_assert(test_ktest_mask32_u8(A2, B2) == expected_result_2);
+ constexpr unsigned int A3 = 0xF000'0000;
+ constexpr unsigned int B3 = 0x8000'0000;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_ktest_mask32_u8(A3, B3) == expected_result_3);
+ constexpr unsigned int A4 = 0x0123'4567;
+ constexpr unsigned int B4 = 0x0123'4567;
+ constexpr unsigned char expected_result_4 = 0x01;
+ static_assert(test_ktest_mask32_u8(A4, B4) == expected_result_4);
+}
+#endif
+
unsigned char test_ktestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestz_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -332,6 +434,11 @@ unsigned char test_ktestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu8_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestz_mask64_u8(0x0000'0000'0000'0000, 0x0000'0000'0000'0000) == 1);
+TEST_CONSTEXPR(_ktestz_mask64_u8(0x0000'0000'0000'0000, 0x8000'0000'0000'0000) == 1);
+TEST_CONSTEXPR(_ktestz_mask64_u8(0xF000'0000'0000'0000, 0x8000'0000'0000'0000) == 0);
+TEST_CONSTEXPR(_ktestz_mask64_u8(0x0123'4567'89AB'CDEF, 0x0123'4567'89AB'CDEF) == 0);
+
unsigned char test_ktestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestc_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -342,6 +449,11 @@ unsigned char test_ktestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu8_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestc_mask64_u8(0x0000'0000'0000'0000, 0x0000'0000'0000'0000) == 1);
+TEST_CONSTEXPR(_ktestc_mask64_u8(0x0000'0000'0000'0000, 0x8000'0000'0000'0000) == 0);
+TEST_CONSTEXPR(_ktestc_mask64_u8(0xF000'0000'0000'0000, 0x8000'0000'0000'0000) == 1);
+TEST_CONSTEXPR(_ktestc_mask64_u8(0x0123'4567'89AB'CDEF, 0x0123'4567'89AB'CDEF) == 1);
+
unsigned char test_ktest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_ktest_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -356,6 +468,34 @@ unsigned char test_ktest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512
_mm512_cmpneq_epu8_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201402L)
+constexpr unsigned char
+test_ktest_mask64_u8(unsigned long long A, unsigned long long B) {
+ unsigned char and_not{};
+ return (_ktest_mask64_u8(A, B, &and_not) << 4) | and_not;
+}
+
+void _ktest_mask64_u8() {
+ constexpr unsigned long long A1 = 0x0000'0000'0000'0000;
+ constexpr unsigned long long B1 = 0x0000'0000'0000'0000;
+ constexpr unsigned char expected_result_1 = 0x11;
+ static_assert(test_ktest_mask64_u8(A1, B1) == expected_result_1);
+ constexpr unsigned long long A2 = 0x0000'0000'0000'0000;
+ constexpr unsigned long long B2 = 0x8000'0000'0000'0000;
+ constexpr unsigned char expected_result_2 = 0x10;
+ static_assert(test_ktest_mask64_u8(A2, B2) == expected_result_2);
+ constexpr unsigned long long A3 = 0xF000'0000'0000'0000;
+ constexpr unsigned long long B3 = 0x8000'0000'0000'0000;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_ktest_mask64_u8(A3, B3) == expected_result_3);
+ constexpr unsigned long long A4 = 0x0123'4567'89AB'CDEF;
+ constexpr unsigned long long B4 = 0x0123'4567'89AB'CDEF;
+ constexpr unsigned char expected_result_4 = 0x01;
+ static_assert(test_ktest_mask64_u8(A4, B4) == expected_result_4);
+}
+#endif
+
__mmask32 test_kadd_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_kadd_mask32
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 9c4ada3a2b7b..efe983ce5b10 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -117,6 +117,10 @@ unsigned char test_kortestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu64_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestz_mask8_u8(0x00, 0x00) == 1);
+TEST_CONSTEXPR(_kortestz_mask8_u8(0x00, 0x80) == 0);
+TEST_CONSTEXPR(_kortestz_mask8_u8(0x01, 0xFE) == 0);
+
unsigned char test_kortestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestc_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -130,6 +134,10 @@ unsigned char test_kortestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu64_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestc_mask8_u8(0x00, 0x00) == 0);
+TEST_CONSTEXPR(_kortestc_mask8_u8(0x00, 0x80) == 0);
+TEST_CONSTEXPR(_kortestc_mask8_u8(0x01, 0xFE) == 1);
+
unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_kortest_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -150,6 +158,30 @@ unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu64_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_kortest_mask8_u8(unsigned char A, unsigned char B) {
+ unsigned char all_ones{};
+ return (_kortest_mask8_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _kortest_mask8_u8() {
+ constexpr unsigned char A1 = 0x00;
+ constexpr unsigned char B1 = 0x00;
+ constexpr unsigned char expected_result_1 = 0x10;
+ static_assert(test_kortest_mask8_u8(A1, B1) == expected_result_1);
+ constexpr unsigned char A2 = 0x00;
+ constexpr unsigned char B2 = 0x80;
+ constexpr unsigned char expected_result_2 = 0x00;
+ static_assert(test_kortest_mask8_u8(A2, B2) == expected_result_2);
+ constexpr unsigned char A3 = 0x01;
+ constexpr unsigned char B3 = 0xFE;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_kortest_mask8_u8(A3, B3) == expected_result_3);
+}
+#endif
+
unsigned char test_ktestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestz_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -160,6 +192,11 @@ unsigned char test_ktestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512
_mm512_cmpneq_epu64_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestz_mask8_u8(0x00, 0x00) == 1);
+TEST_CONSTEXPR(_ktestz_mask8_u8(0x00, 0x80) == 1);
+TEST_CONSTEXPR(_ktestz_mask8_u8(0xF0, 0x80) == 0);
+TEST_CONSTEXPR(_ktestz_mask8_u8(0x01, 0x01) == 0);
+
unsigned char test_ktestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestc_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -170,6 +207,11 @@ unsigned char test_ktestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512
_mm512_cmpneq_epu64_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestc_mask8_u8(0x00, 0x00) == 1);
+TEST_CONSTEXPR(_ktestc_mask8_u8(0x00, 0x80) == 0);
+TEST_CONSTEXPR(_ktestc_mask8_u8(0xF0, 0x80) == 1);
+TEST_CONSTEXPR(_ktestc_mask8_u8(0x01, 0x01) == 1);
+
unsigned char test_ktest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_ktest_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -184,6 +226,34 @@ unsigned char test_ktest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i
_mm512_cmpneq_epu64_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_ktest_mask8_u8(unsigned char A, unsigned char B) {
+ unsigned char all_ones{};
+ return (_ktest_mask8_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _ktest_mask8_u8() {
+ constexpr unsigned char A1 = 0x00;
+ constexpr unsigned char B1 = 0x00;
+ constexpr unsigned char expected_result_1 = 0x11;
+ static_assert(test_ktest_mask8_u8(A1, B1) == expected_result_1);
+ constexpr unsigned char A2 = 0x00;
+ constexpr unsigned char B2 = 0x80;
+ constexpr unsigned char expected_result_2 = 0x10;
+ static_assert(test_ktest_mask8_u8(A2, B2) == expected_result_2);
+ constexpr unsigned char A3 = 0xF0;
+ constexpr unsigned char B3 = 0x80;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_ktest_mask8_u8(A3, B3) == expected_result_3);
+ constexpr unsigned char A4 = 0x01;
+ constexpr unsigned char B4 = 0x01;
+ constexpr unsigned char expected_result_4 = 0x01;
+ static_assert(test_ktest_mask8_u8(A4, B4) == expected_result_4);
+}
+#endif
+
unsigned char test_ktestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestz_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -194,6 +264,11 @@ unsigned char test_ktestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestz_mask16_u8(0x0000, 0x0000) == 1);
+TEST_CONSTEXPR(_ktestz_mask16_u8(0x0000, 0x8000) == 1);
+TEST_CONSTEXPR(_ktestz_mask16_u8(0xF000, 0x8000) == 0);
+TEST_CONSTEXPR(_ktestz_mask16_u8(0x0123, 0x0123) == 0);
+
unsigned char test_ktestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_ktestc_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -204,6 +279,11 @@ unsigned char test_ktestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m51
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_ktestc_mask16_u8(0x0000, 0x0000) == 1);
+TEST_CONSTEXPR(_ktestc_mask16_u8(0x0000, 0x8000) == 0);
+TEST_CONSTEXPR(_ktestc_mask16_u8(0xF000, 0x8000) == 1);
+TEST_CONSTEXPR(_ktestc_mask16_u8(0x0123, 0x0123) == 1);
+
unsigned char test_ktest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_ktest_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -218,6 +298,34 @@ unsigned char test_ktest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512
_mm512_cmpneq_epu32_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_ktest_mask16_u8(unsigned int A, unsigned int B) {
+ unsigned char all_ones{};
+ return (_ktest_mask16_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _ktest_mask16_u8() {
+ constexpr unsigned int A1 = 0x0000;
+ constexpr unsigned int B1 = 0x0000;
+ constexpr unsigned char expected_result_1 = 0x11;
+ static_assert(test_ktest_mask16_u8(A1, B1) == expected_result_1);
+ constexpr unsigned int A2 = 0x0000;
+ constexpr unsigned int B2 = 0x8000;
+ constexpr unsigned char expected_result_2 = 0x10;
+ static_assert(test_ktest_mask16_u8(A2, B2) == expected_result_2);
+ constexpr unsigned int A3 = 0xF000;
+ constexpr unsigned int B3 = 0x8000;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_ktest_mask16_u8(A3, B3) == expected_result_3);
+ constexpr unsigned int A4 = 0x0123;
+ constexpr unsigned int B4 = 0x0123;
+ constexpr unsigned char expected_result_4 = 0x01;
+ static_assert(test_ktest_mask16_u8(A4, B4) == expected_result_4);
+}
+#endif
+
__mmask8 test_kadd_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_kadd_mask8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 8e65430bd3e8..17778b52d367 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -9015,6 +9015,10 @@ int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_mm512_kortestc(0x0000, 0x0000) == 0);
+TEST_CONSTEXPR(_mm512_kortestc(0x0000, 0x8000) == 0);
+TEST_CONSTEXPR(_mm512_kortestc(0x0123, 0xFEDC) == 1);
+
int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_mm512_kortestz
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -9027,6 +9031,10 @@ int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_mm512_kortestz(0x0000, 0x0000) == 1);
+TEST_CONSTEXPR(_mm512_kortestz(0x0000, 0x8000) == 0);
+TEST_CONSTEXPR(_mm512_kortestz(0x0123, 0xFEDC) == 0);
+
unsigned char test_kortestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestz_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -9040,6 +9048,10 @@ unsigned char test_kortestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestz_mask16_u8(0x0000, 0x0000) == 1);
+TEST_CONSTEXPR(_kortestz_mask16_u8(0x0000, 0x8000) == 0);
+TEST_CONSTEXPR(_kortestz_mask16_u8(0x0123, 0xFEDC) == 0);
+
unsigned char test_kortestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: test_kortestc_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -9053,6 +9065,10 @@ unsigned char test_kortestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m
_mm512_cmpneq_epu32_mask(__C, __D));
}
+TEST_CONSTEXPR(_kortestc_mask16_u8(0x0000, 0x0000) == 0);
+TEST_CONSTEXPR(_kortestc_mask16_u8(0x0000, 0x8000) == 0);
+TEST_CONSTEXPR(_kortestc_mask16_u8(0x0123, 0xFEDC) == 1);
+
unsigned char test_kortest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: test_kortest_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
@@ -9073,6 +9089,30 @@ unsigned char test_kortest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m5
_mm512_cmpneq_epu32_mask(__C, __D), CF);
}
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr unsigned char
+test_kortest_mask16_u8(unsigned short A, unsigned short B) {
+ unsigned char all_ones{};
+ return (_kortest_mask16_u8(A, B, &all_ones) << 4) | all_ones;
+}
+
+void _kortest_mask16_u8() {
+ constexpr unsigned short A1 = 0x0000;
+ constexpr unsigned short B1 = 0x0000;
+ constexpr unsigned char expected_result_1 = 0x10;
+ static_assert(test_kortest_mask16_u8(A1, B1) == expected_result_1);
+ constexpr unsigned short A2 = 0x0000;
+ constexpr unsigned short B2 = 0x8000;
+ constexpr unsigned char expected_result_2 = 0x00;
+ static_assert(test_kortest_mask16_u8(A2, B2) == expected_result_2);
+ constexpr unsigned short A3 = 0x0123;
+ constexpr unsigned short B3 = 0xFEDC;
+ constexpr unsigned char expected_result_3 = 0x01;
+ static_assert(test_kortest_mask16_u8(A3, B3) == expected_result_3);
+}
+#endif
+
__mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kunpackb
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
diff --git a/clang/test/CodeGen/attr-counted-by-void-ptr-gnu.c b/clang/test/CodeGen/attr-counted-by-void-ptr-gnu.c
new file mode 100644
index 000000000000..e22aad306f60
--- /dev/null
+++ b/clang/test/CodeGen/attr-counted-by-void-ptr-gnu.c
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 -std=gnu11 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm -o - %s | FileCheck %s
+
+// Test that counted_by on void* in GNU mode treats void as having size 1 (byte count)
+
+#define __counted_by(f) __attribute__((counted_by(f)))
+#define __sized_by(f) __attribute__((sized_by(f)))
+
+struct with_counted_by_void {
+ int count;
+ void* buf __counted_by(count);
+};
+
+struct with_sized_by_void {
+ int size;
+ void* buf __sized_by(size);
+};
+
+struct with_counted_by_int {
+ int count;
+ int* buf __counted_by(count);
+};
+
+// CHECK-LABEL: define dso_local {{.*}}@test_counted_by_void(
+// CHECK: %[[COUNT:.*]] = load i32, ptr %s
+// CHECK: %[[NARROW:.*]] = tail call i32 @llvm.smax.i32(i32 %[[COUNT]], i32 0)
+// CHECK: %[[ZEXT:.*]] = zext nneg i32 %[[NARROW]] to i64
+// CHECK: ret i64 %[[ZEXT]]
+//
+// Verify: counted_by on void* returns the count directly (count * 1 byte)
+long long test_counted_by_void(struct with_counted_by_void *s) {
+ return __builtin_dynamic_object_size(s->buf, 0);
+}
+
+// CHECK-LABEL: define dso_local {{.*}}@test_sized_by_void(
+// CHECK: %[[SIZE:.*]] = load i32, ptr %s
+// CHECK: %[[NARROW:.*]] = tail call i32 @llvm.smax.i32(i32 %[[SIZE]], i32 0)
+// CHECK: %[[ZEXT:.*]] = zext nneg i32 %[[NARROW]] to i64
+// CHECK: ret i64 %[[ZEXT]]
+//
+// Verify: sized_by on void* returns the size directly
+long long test_sized_by_void(struct with_sized_by_void *s) {
+ return __builtin_dynamic_object_size(s->buf, 0);
+}
+
+// CHECK-LABEL: define dso_local {{.*}}@test_counted_by_int(
+// CHECK: %[[COUNT:.*]] = load i32, ptr %s
+// CHECK: %[[SEXT:.*]] = sext i32 %[[COUNT]] to i64
+// CHECK: %[[SIZE:.*]] = shl nsw i64 %[[SEXT]], 2
+// CHECK: ret i64
+//
+// Verify: counted_by on int* returns count * sizeof(int) = count * 4
+long long test_counted_by_int(struct with_counted_by_int *s) {
+ return __builtin_dynamic_object_size(s->buf, 0);
+}
+
+// CHECK-LABEL: define dso_local ptr @test_void_ptr_arithmetic(
+// CHECK: %[[BUF:.*]] = load ptr, ptr
+// CHECK: %[[EXT:.*]] = sext i32 %offset to i64
+// CHECK: %[[PTR:.*]] = getelementptr inbounds i8, ptr %[[BUF]], i64 %[[EXT]]
+// CHECK: ret ptr %[[PTR]]
+//
+// Verify: pointer arithmetic on void* uses i8 (byte offsets), not i32 or other sizes
+void* test_void_ptr_arithmetic(struct with_counted_by_void *s, int offset) {
+ return s->buf + offset; // GNU extension: void* arithmetic
+}
diff --git a/clang/test/CodeGen/exprs.c b/clang/test/CodeGen/exprs.c
index 5cca9722dcb3..93015da074bf 100644
--- a/clang/test/CodeGen/exprs.c
+++ b/clang/test/CodeGen/exprs.c
@@ -196,10 +196,17 @@ void f18(void) {
// Ensure the right stmt is returned
int f19(void) {
- return ({ 3;;4;; });
+ return ({ 3;;4; });
}
// CHECK-LABEL: define{{.*}} i32 @f19()
// CHECK: [[T:%.*]] = alloca i32
// CHECK: store i32 4, ptr [[T]]
// CHECK: [[L:%.*]] = load i32, ptr [[T]]
// CHECK: ret i32 [[L]]
+
+// PR166036: The trailing NullStmt should result in a void.
+void f20(void) {
+ return ({ 3;;4;; });
+}
+// CHECK-LABEL: define{{.*}} void @f20()
+// CHECK: ret void
diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
index 368d652a6f77..51b0f81bea06 100644
--- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
@@ -1,161 +1,260 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \
-// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -fnative-int16-type -emit-llvm -O1 -o - | FileCheck %s -DTARGET=dx \
+// RUN: --check-prefixes=CHECK,DXCHECK
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type -fnative-int16-type \
-// RUN: -emit-llvm -disable-llvm-passes \
-// RUN: -o - | FileCheck %s -DTARGET=spv
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -fnative-int16-type -emit-llvm -O1 -o - | FileCheck %s -DTARGET=spv
#ifdef __HLSL_ENABLE_16_BIT
// CHECK-LABEL: test_firstbithigh_ushort
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16
+// CHECK: [[FBH:%.*]] = tail call {{.*}}i32 @llvm.[[TARGET]].firstbituhigh.i16
+// DXCHECK-NEXT: [[SUB:%.*]] = sub i32 15, [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[FBH]], -1
+// DXCHECK-NEXT: select i1 %cmp.i, i32 -1, i32 [[SUB]]
+// CHECK-NEXT: ret i32
uint test_firstbithigh_ushort(uint16_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ushort2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> splat (i32 15), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <2 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <2 x i1> %cmp.i, <2 x i32> splat (i32 -1), <2 x i32> [[SUB]]
+// CHECK-NEXT: ret <2 x i32>
uint2 test_firstbithigh_ushort2(uint16_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ushort3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <3 x i32> splat (i32 15), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <3 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <3 x i1> %cmp.i, <3 x i32> splat (i32 -1), <3 x i32> [[SUB]]
+// CHECK-NEXT: ret <3 x i32>
uint3 test_firstbithigh_ushort3(uint16_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ushort4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 15), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <4 x i1> %cmp.i, <4 x i32> splat (i32 -1), <4 x i32> [[SUB]]
+// CHECK-NEXT: ret <4 x i32>
uint4 test_firstbithigh_ushort4(uint16_t4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16
+// CHECK: [[FBH:%.*]] = tail call {{.*}}i32 @llvm.[[TARGET]].firstbitshigh.i16
+// DXCHECK-NEXT: [[SUB:%.*]] = sub i32 15, [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[FBH]], -1
+// DXCHECK-NEXT: select i1 %cmp.i, i32 -1, i32 [[SUB]]
+// CHECK-NEXT: ret i32
uint test_firstbithigh_short(int16_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> splat (i32 15), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <2 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <2 x i1> %cmp.i, <2 x i32> splat (i32 -1), <2 x i32> [[SUB]]
+// CHECK-NEXT: ret <2 x i32>
uint2 test_firstbithigh_short2(int16_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <3 x i32> splat (i32 15), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <3 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <3 x i1> %cmp.i, <3 x i32> splat (i32 -1), <3 x i32> [[SUB]]
+// CHECK-NEXT: ret <3 x i32>
uint3 test_firstbithigh_short3(int16_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 15), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <4 x i1> %cmp.i, <4 x i32> splat (i32 -1), <4 x i32> [[SUB]]
+// CHECK-NEXT: ret <4 x i32>
uint4 test_firstbithigh_short4(int16_t4 p0) {
return firstbithigh(p0);
}
#endif // __HLSL_ENABLE_16_BIT
// CHECK-LABEL: test_firstbithigh_uint
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32
+// CHECK: [[FBH:%.*]] = tail call {{.*}}i32 @llvm.[[TARGET]].firstbituhigh.i32
+// DXCHECK-NEXT: [[SUB:%.*]] = sub i32 31, [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[FBH]], -1
+// DXCHECK-NEXT: select i1 %cmp.i, i32 -1, i32 [[SUB]]
+// CHECK-NEXT: ret i32
uint test_firstbithigh_uint(uint p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_uint2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> splat (i32 31), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <2 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <2 x i1> %cmp.i, <2 x i32> splat (i32 -1), <2 x i32> [[SUB]]
+// CHECK-NEXT: ret <2 x i32>
uint2 test_firstbithigh_uint2(uint2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_uint3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <3 x i32> splat (i32 31), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <3 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <3 x i1> %cmp.i, <3 x i32> splat (i32 -1), <3 x i32> [[SUB]]
+// CHECK-NEXT: ret <3 x i32>
uint3 test_firstbithigh_uint3(uint3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_uint4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 31), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <4 x i1> %cmp.i, <4 x i32> splat (i32 -1), <4 x i32> [[SUB]]
+// CHECK-NEXT: ret <4 x i32>
uint4 test_firstbithigh_uint4(uint4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64
+// CHECK: [[FBH:%.*]] = tail call {{.*}}i32 @llvm.[[TARGET]].firstbituhigh.i64
+// DXCHECK-NEXT: [[SUB:%.*]] = sub i32 63, [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[FBH]], -1
+// DXCHECK-NEXT: select i1 %cmp.i, i32 -1, i32 [[SUB]]
+// CHECK-NEXT: ret i32
uint test_firstbithigh_ulong(uint64_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> splat (i32 63), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <2 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <2 x i1> %cmp.i, <2 x i32> splat (i32 -1), <2 x i32> [[SUB]]
+// CHECK-NEXT: ret <2 x i32>
uint2 test_firstbithigh_ulong2(uint64_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <3 x i32> splat (i32 63), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <3 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <3 x i1> %cmp.i, <3 x i32> splat (i32 -1), <3 x i32> [[SUB]]
+// CHECK-NEXT: ret <3 x i32>
uint3 test_firstbithigh_ulong3(uint64_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 63), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <4 x i1> %cmp.i, <4 x i32> splat (i32 -1), <4 x i32> [[SUB]]
+// CHECK-NEXT: ret <4 x i32>
uint4 test_firstbithigh_ulong4(uint64_t4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32
+// CHECK: [[FBH:%.*]] = tail call {{.*}}i32 @llvm.[[TARGET]].firstbitshigh.i32
+// DXCHECK-NEXT: [[SUB:%.*]] = sub i32 31, [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[FBH]], -1
+// DXCHECK-NEXT: select i1 %cmp.i, i32 -1, i32 [[SUB]]
+// CHECK-NEXT: ret i32
uint test_firstbithigh_int(int p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> splat (i32 31), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <2 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <2 x i1> %cmp.i, <2 x i32> splat (i32 -1), <2 x i32> [[SUB]]
+// CHECK-NEXT: ret <2 x i32>
uint2 test_firstbithigh_int2(int2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <3 x i32> splat (i32 31), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <3 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <3 x i1> %cmp.i, <3 x i32> splat (i32 -1), <3 x i32> [[SUB]]
+// CHECK-NEXT: ret <3 x i32>
uint3 test_firstbithigh_int3(int3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 31), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <4 x i1> %cmp.i, <4 x i32> splat (i32 -1), <4 x i32> [[SUB]]
+// CHECK-NEXT: ret <4 x i32>
uint4 test_firstbithigh_int4(int4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64
+// CHECK: [[FBH:%.*]] = tail call {{.*}}i32 @llvm.[[TARGET]].firstbitshigh.i64
+// DXCHECK-NEXT: [[SUB:%.*]] = sub i32 63, [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[FBH]], -1
+// DXCHECK-NEXT: select i1 %cmp.i, i32 -1, i32 [[SUB]]
+// CHECK-NEXT: ret i32
uint test_firstbithigh_long(int64_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> splat (i32 63), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <2 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <2 x i1> %cmp.i, <2 x i32> splat (i32 -1), <2 x i32> [[SUB]]
+// CHECK-NEXT: ret <2 x i32>
uint2 test_firstbithigh_long2(int64_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <3 x i32> splat (i32 63), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <3 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <3 x i1> %cmp.i, <3 x i32> splat (i32 -1), <3 x i32> [[SUB]]
+// CHECK-NEXT: ret <3 x i32>
uint3 test_firstbithigh_long3(int64_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 63), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <4 x i1> %cmp.i, <4 x i32> splat (i32 -1), <4 x i32> [[SUB]]
+// CHECK-NEXT: ret <4 x i32>
uint4 test_firstbithigh_long4(int64_t4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_upcast
-// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> %{{.*}})
-// CHECK: [[CONV:%.*]] = zext <4 x i32> [[FBH]] to <4 x i64>
-// CHECK: ret <4 x i64> [[CONV]]
+// CHECK: [[FBH:%.*]] = tail call {{.*}}<4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> %{{.*}})
+// DXCHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 31), [[FBH]]
+// DXCHECK-NEXT: [[ICMP:%.*]] = icmp eq <4 x i32> [[FBH]], splat (i32 -1)
+// DXCHECK-NEXT: select <4 x i1> %cmp.i, <4 x i32> splat (i32 -1), <4 x i32> [[SUB]]
+// CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> {{.*}} to <4 x i64>
+// CHECK-NEXT: ret <4 x i64> [[ZEXT]]
uint64_t4 test_firstbithigh_upcast(uint4 p0) {
return firstbithigh(p0);
}
diff --git a/clang/test/Parser/lambda-misplaced-capture-default.cpp b/clang/test/Parser/lambda-misplaced-capture-default.cpp
index d65b875102da..4f5bd6d7fa5e 100644
--- a/clang/test/Parser/lambda-misplaced-capture-default.cpp
+++ b/clang/test/Parser/lambda-misplaced-capture-default.cpp
@@ -36,3 +36,12 @@ template <typename... Args> void Test(Args... args) {
[... xs = &args, &] {}; // expected-error {{capture default must be first}}
}
} // namespace misplaced_capture_default_pack
+
+namespace GH163498 {
+struct S {
+ template <class T> S(T) {}
+};
+void t() {
+ S s{[a(42), &] {}}; // expected-error {{capture default must be first}}
+}
+}
diff --git a/clang/test/Profile/Inputs/c-counter-overflows.proftext b/clang/test/Profile/Inputs/c-counter-overflows.proftext
index 4d0287c78705..863306050701 100644
--- a/clang/test/Profile/Inputs/c-counter-overflows.proftext
+++ b/clang/test/Profile/Inputs/c-counter-overflows.proftext
@@ -1,5 +1,5 @@
main
-7779561829442898616
+862032801801816760
8
1
68719476720
diff --git a/clang/test/Profile/Inputs/c-general.profdata.v12 b/clang/test/Profile/Inputs/c-general.profdata.v12
new file mode 100644
index 000000000000..57a72faaecc8
--- /dev/null
+++ b/clang/test/Profile/Inputs/c-general.profdata.v12
Binary files differ
diff --git a/clang/test/Profile/Inputs/c-general.proftext b/clang/test/Profile/Inputs/c-general.proftext
index 08280ef39a89..72e1be6e8846 100644
--- a/clang/test/Profile/Inputs/c-general.proftext
+++ b/clang/test/Profile/Inputs/c-general.proftext
@@ -7,7 +7,7 @@ simple_loops
75
conditionals
-4904767535850050386
+293081517422662482
13
1
100
@@ -24,7 +24,7 @@ conditionals
1
early_exits
-2880354649761471549
+574511640547777597
9
1
0
@@ -37,7 +37,7 @@ early_exits
0
jumps
-15051420506203462683
+63440946314451995
22
1
1
@@ -86,7 +86,7 @@ switches
0
big_switch
-13144136522122330070
+461999971447013334
17
1
32
@@ -125,7 +125,7 @@ boolean_operators
33
boolop_loops
-12402604614320574815
+873389568252105055
13
1
50
@@ -149,7 +149,7 @@ conditional_operator
1
do_fallthrough
-8714614136504380050
+644163604256451218
4
1
10
diff --git a/clang/test/Profile/Inputs/c-unprofiled-blocks.proftext b/clang/test/Profile/Inputs/c-unprofiled-blocks.proftext
index d880663fed32..7af509715f8f 100644
--- a/clang/test/Profile/Inputs/c-unprofiled-blocks.proftext
+++ b/clang/test/Profile/Inputs/c-unprofiled-blocks.proftext
@@ -1,5 +1,5 @@
never_called
-6820425066224770721
+1055817543190535841
9
0
0
@@ -17,7 +17,7 @@ main
1
dead_code
-5254464978620792806
+642778960193404902
10
1
0
diff --git a/clang/test/Profile/Inputs/cxx-rangefor.proftext b/clang/test/Profile/Inputs/cxx-rangefor.proftext
index d41205bbde14..cfc88da8f972 100644
--- a/clang/test/Profile/Inputs/cxx-rangefor.proftext
+++ b/clang/test/Profile/Inputs/cxx-rangefor.proftext
@@ -1,5 +1,5 @@
_Z9range_forv
-8789831523895825398
+719380991647896566
5
1
4
diff --git a/clang/test/Profile/Inputs/cxx-throws.proftext b/clang/test/Profile/Inputs/cxx-throws.proftext
index 043dea08c728..92b0eab39684 100644
--- a/clang/test/Profile/Inputs/cxx-throws.proftext
+++ b/clang/test/Profile/Inputs/cxx-throws.proftext
@@ -1,5 +1,5 @@
_Z6throwsv
-18172607911962830854
+878785342860126214
9
1
100
diff --git a/clang/test/Profile/Inputs/misexpect-switch-default.proftext b/clang/test/Profile/Inputs/misexpect-switch-default.proftext
index 533da9176523..112426e0c7b5 100644
--- a/clang/test/Profile/Inputs/misexpect-switch-default.proftext
+++ b/clang/test/Profile/Inputs/misexpect-switch-default.proftext
@@ -1,6 +1,6 @@
main
# Func Hash:
-8734802134600123338
+664351602352194506
# Num Counters:
9
# Counter Values:
diff --git a/clang/test/Profile/Inputs/misexpect-switch-nonconst.proftext b/clang/test/Profile/Inputs/misexpect-switch-nonconst.proftext
index 0da9379357ae..99d067c57f16 100644
--- a/clang/test/Profile/Inputs/misexpect-switch-nonconst.proftext
+++ b/clang/test/Profile/Inputs/misexpect-switch-nonconst.proftext
@@ -1,6 +1,6 @@
main
# Func Hash:
-3721743393642630379
+262978879822089451
# Num Counters:
10
# Counter Values:
diff --git a/clang/test/Profile/c-collision.c b/clang/test/Profile/c-collision.c
index 6c779c6facaa..f35ba1bfb762 100644
--- a/clang/test/Profile/c-collision.c
+++ b/clang/test/Profile/c-collision.c
@@ -2,8 +2,8 @@
// RUN: %clang_cc1 -UEXTRA -triple x86_64-unknown-linux-gnu -main-file-name c-collision.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s --check-prefix=CHECK-NOEXTRA
// RUN: %clang_cc1 -DEXTRA -triple x86_64-unknown-linux-gnu -main-file-name c-collision.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s --check-prefix=CHECK-EXTRA
-// CHECK-NOEXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 7156072912471487002,
-// CHECK-EXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 -4383447408116050035,
+// CHECK-NOEXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 238543884830405146,
+// CHECK-EXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 228238610311337869,
extern int bar;
void foo(void) {
diff --git a/clang/test/Profile/c-general.c b/clang/test/Profile/c-general.c
index ee36a43dac08..6c865e608a03 100644
--- a/clang/test/Profile/c-general.c
+++ b/clang/test/Profile/c-general.c
@@ -4,6 +4,7 @@
// RUN: llvm-profdata merge %S/Inputs/c-general.proftext -o %t.profdata
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use=clang -fprofile-instrument-use-path=%t.profdata | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOUSE %s
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use=clang -fprofile-instrument-use-path=%S/Inputs/c-general.profdata.v12 | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOUSE %s
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use=clang -fprofile-instrument-use-path=%S/Inputs/c-general.profdata.v5 | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOUSE %s
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use=clang -fprofile-instrument-use-path=%S/Inputs/c-general.profdata.v3 | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOUSE %s
// Also check compatibility with older profiles.
diff --git a/clang/test/Sema/attr-counted-by-late-parsed-struct-ptrs.c b/clang/test/Sema/attr-counted-by-late-parsed-struct-ptrs.c
index 8d4e0c510603..443ccbbae66d 100644
--- a/clang/test/Sema/attr-counted-by-late-parsed-struct-ptrs.c
+++ b/clang/test/Sema/attr-counted-by-late-parsed-struct-ptrs.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fexperimental-late-parse-attributes -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fexperimental-late-parse-attributes -fsyntax-only -Wpointer-arith -verify %s
#define __counted_by(f) __attribute__((counted_by(f)))
@@ -29,7 +29,9 @@ struct on_member_pointer_const_incomplete_ty {
};
struct on_member_pointer_void_ty {
- void* buf __counted_by(count); // expected-error{{'counted_by' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+ // expected-warning@+2{{'counted_by' on a pointer to void is a GNU extension, treated as 'sized_by'}}
+ // expected-note@+1{{use '__sized_by' to suppress this warning}}
+ void* buf __counted_by(count);
int count;
};
diff --git a/clang/test/Sema/attr-counted-by-or-null-last-field.c b/clang/test/Sema/attr-counted-by-or-null-last-field.c
index 60a1f571b19e..d0c50a733ace 100644
--- a/clang/test/Sema/attr-counted-by-or-null-last-field.c
+++ b/clang/test/Sema/attr-counted-by-or-null-last-field.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify=expected,immediate %s
-// RUN: %clang_cc1 -fsyntax-only -fexperimental-late-parse-attributes -verify=expected,late %s
+// RUN: %clang_cc1 -fsyntax-only -Wpointer-arith -verify=expected,immediate %s
+// RUN: %clang_cc1 -fsyntax-only -Wpointer-arith -fexperimental-late-parse-attributes -verify=expected,late %s
#define __counted_by_or_null(f) __attribute__((counted_by_or_null(f)))
@@ -128,7 +128,9 @@ struct on_member_ptr_incomplete_const_ty_ty_pos {
struct on_member_ptr_void_ty_ty_pos {
int count;
- void * ptr __counted_by_or_null(count); // expected-error {{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+ // expected-warning@+2{{'counted_by_or_null' on a pointer to void is a GNU extension, treated as 'sized_by_or_null'}}
+ // expected-note@+1{{use '__sized_by_or_null' to suppress this warning}}
+ void * ptr __counted_by_or_null(count);
};
typedef void(fn_ty)(int);
diff --git a/clang/test/Sema/attr-counted-by-or-null-late-parsed-struct-ptrs.c b/clang/test/Sema/attr-counted-by-or-null-late-parsed-struct-ptrs.c
index 2150c81f9e9b..233b729f87cc 100644
--- a/clang/test/Sema/attr-counted-by-or-null-late-parsed-struct-ptrs.c
+++ b/clang/test/Sema/attr-counted-by-or-null-late-parsed-struct-ptrs.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fexperimental-late-parse-attributes -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fexperimental-late-parse-attributes -fsyntax-only -Wpointer-arith -verify %s
#define __counted_by_or_null(f) __attribute__((counted_by_or_null(f)))
#define __counted_by(f) __attribute__((counted_by(f)))
@@ -30,7 +30,9 @@ struct on_member_pointer_const_incomplete_ty {
};
struct on_member_pointer_void_ty {
- void* buf __counted_by_or_null(count); // expected-error{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+ // expected-warning@+2{{'counted_by_or_null' on a pointer to void is a GNU extension, treated as 'sized_by_or_null'}}
+ // expected-note@+1{{use '__sized_by_or_null' to suppress this warning}}
+ void* buf __counted_by_or_null(count);
int count;
};
diff --git a/clang/test/Sema/attr-counted-by-or-null-struct-ptrs.c b/clang/test/Sema/attr-counted-by-or-null-struct-ptrs.c
index 0bb09059c97f..0fd739ca7d4c 100644
--- a/clang/test/Sema/attr-counted-by-or-null-struct-ptrs.c
+++ b/clang/test/Sema/attr-counted-by-or-null-struct-ptrs.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fexperimental-late-parse-attributes -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -Wpointer-arith -verify %s
+// RUN: %clang_cc1 -fexperimental-late-parse-attributes -fsyntax-only -Wpointer-arith -verify %s
#define __counted_by_or_null(f) __attribute__((counted_by_or_null(f)))
#define __counted_by(f) __attribute__((counted_by(f)))
@@ -32,7 +32,8 @@ struct on_member_pointer_const_incomplete_ty {
struct on_member_pointer_void_ty {
int count;
- // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+ // expected-warning@+2{{'counted_by_or_null' on a pointer to void is a GNU extension, treated as 'sized_by_or_null'}}
+ // expected-note@+1{{use '__sized_by_or_null' to suppress this warning}}
void* buf __counted_by_or_null(count);
};
@@ -124,7 +125,8 @@ struct on_member_pointer_const_incomplete_ty_ty_pos {
struct on_member_pointer_void_ty_ty_pos {
int count;
- // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+ // expected-warning@+2{{'counted_by_or_null' on a pointer to void is a GNU extension, treated as 'sized_by_or_null'}}
+ // expected-note@+1{{use '__sized_by_or_null' to suppress this warning}}
void *__counted_by_or_null(count) buf;
};
diff --git a/clang/test/Sema/attr-counted-by-struct-ptrs.c b/clang/test/Sema/attr-counted-by-struct-ptrs.c
index c05d18262e2b..a42f3895695a 100644
--- a/clang/test/Sema/attr-counted-by-struct-ptrs.c
+++ b/clang/test/Sema/attr-counted-by-struct-ptrs.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fexperimental-late-parse-attributes %s -verify
+// RUN: %clang_cc1 -fsyntax-only -Wpointer-arith -verify %s
+// RUN: %clang_cc1 -fsyntax-only -Wpointer-arith -fexperimental-late-parse-attributes %s -verify
#define __counted_by(f) __attribute__((counted_by(f)))
@@ -31,7 +31,8 @@ struct on_member_pointer_const_incomplete_ty {
struct on_member_pointer_void_ty {
int count;
- // expected-error@+1{{'counted_by' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+ // expected-warning@+2{{'counted_by' on a pointer to void is a GNU extension, treated as 'sized_by'}}
+ // expected-note@+1{{use '__sized_by' to suppress this warning}}
void* buf __counted_by(count);
};
@@ -123,7 +124,8 @@ struct on_member_pointer_const_incomplete_ty_ty_pos {
struct on_member_pointer_void_ty_ty_pos {
int count;
- // expected-error@+1{{'counted_by' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+ // expected-warning@+2{{'counted_by' on a pointer to void is a GNU extension, treated as 'sized_by'}}
+ // expected-note@+1{{use '__sized_by' to suppress this warning}}
void *__counted_by(count) buf;
};
diff --git a/clang/test/Sema/attr-counted-by-void-ptr-gnu.c b/clang/test/Sema/attr-counted-by-void-ptr-gnu.c
new file mode 100644
index 000000000000..c1ed5f84cf93
--- /dev/null
+++ b/clang/test/Sema/attr-counted-by-void-ptr-gnu.c
@@ -0,0 +1,101 @@
+// RUN: %clang_cc1 -fsyntax-only -verify=expected-nowarn %s
+// RUN: %clang_cc1 -Wpointer-arith -fsyntax-only -verify=expected-warn %s
+// RUN: %clang_cc1 -fexperimental-bounds-safety -fsyntax-only -verify=expected-bounds %s
+
+// expected-nowarn-no-diagnostics
+// expected-bounds-no-diagnostics
+
+#define NULL (void*)0
+#define __counted_by(f) __attribute__((counted_by(f)))
+#define __counted_by_or_null(f) __attribute__((counted_by_or_null(f)))
+#define __sized_by(f) __attribute__((sized_by(f)))
+
+//==============================================================================
+// Test: counted_by on void* is allowed (warns with -Wpointer-arith)
+//==============================================================================
+
+struct test_void_ptr_gnu {
+ int count;
+ // expected-warn-warning@+2{{'counted_by' on a pointer to void is a GNU extension, treated as 'sized_by'}}
+ // expected-warn-note@+1{{use '__sized_by' to suppress this warning}}
+ void* buf __counted_by(count);
+};
+
+struct test_const_void_ptr_gnu {
+ int count;
+ // expected-warn-warning@+2{{'counted_by' on a pointer to void is a GNU extension, treated as 'sized_by'}}
+ // expected-warn-note@+1{{use '__sized_by' to suppress this warning}}
+ const void* buf __counted_by(count);
+};
+
+struct test_volatile_void_ptr_gnu {
+ int count;
+ // expected-warn-warning@+2{{'counted_by' on a pointer to void is a GNU extension, treated as 'sized_by'}}
+ // expected-warn-note@+1{{use '__sized_by' to suppress this warning}}
+ volatile void* buf __counted_by(count);
+};
+
+struct test_const_volatile_void_ptr_gnu {
+ int count;
+ // expected-warn-warning@+2{{'counted_by' on a pointer to void is a GNU extension, treated as 'sized_by'}}
+ // expected-warn-note@+1{{use '__sized_by' to suppress this warning}}
+ const volatile void* buf __counted_by(count);
+};
+
+// Verify sized_by still works the same way (always allowed, no warning)
+struct test_sized_by_void_ptr {
+ int size;
+ void* buf __sized_by(size); // OK in both modes, no warning
+};
+
+//==============================================================================
+// Test: counted_by_or_null on void* behaves the same
+//==============================================================================
+
+struct test_void_ptr_or_null_gnu {
+ int count;
+ // expected-warn-warning@+2{{'counted_by_or_null' on a pointer to void is a GNU extension, treated as 'sized_by_or_null'}}
+ // expected-warn-note@+1{{use '__sized_by_or_null' to suppress this warning}}
+ void* buf __counted_by_or_null(count);
+};
+
+struct test_const_void_ptr_or_null_gnu {
+ int count;
+ // expected-warn-warning@+2{{'counted_by_or_null' on a pointer to void is a GNU extension, treated as 'sized_by_or_null'}}
+ // expected-warn-note@+1{{use '__sized_by_or_null' to suppress this warning}}
+ const void* buf __counted_by_or_null(count);
+};
+
+//==============================================================================
+// Test: Using void* __counted_by(...) pointers (not just declaring them)
+//==============================================================================
+
+// Verify that void* __counted_by pointers can be used as rvalues, assigned to,
+// passed to functions, etc.
+
+void* use_as_rvalue(struct test_void_ptr_gnu* t) {
+ return t->buf;
+}
+
+void assign_to_pointer(struct test_void_ptr_gnu* t) {
+ t->buf = NULL;
+ t->count = 0;
+}
+
+extern void* my_allocator(unsigned long);
+
+void assign_from_allocator(struct test_void_ptr_gnu* t) {
+ t->buf = my_allocator(100);
+ t->count = 100;
+}
+
+void takes_void_ptr(void* p);
+
+void pass_to_function(struct test_void_ptr_gnu* t) {
+ takes_void_ptr(t->buf);
+}
+
+void* pointer_arithmetic(struct test_void_ptr_gnu* t) {
+ // expected-warn-warning@+1{{arithmetic on a pointer to void is a GNU extension}}
+ return t->buf + 10;
+}
diff --git a/clang/test/Sema/statements.c b/clang/test/Sema/statements.c
index d44ab5a65d5a..28740fa29576 100644
--- a/clang/test/Sema/statements.c
+++ b/clang/test/Sema/statements.c
@@ -119,14 +119,15 @@ void test_pr22849(void) {
};
}
-// GCC ignores empty statements at the end of compound expressions where the
-// result type is concerned.
+// Empty statements at the end of compound expressions have a result type 'void'.
void test13(void) {
int a;
a = ({ 1; });
- a = ({1;; });
+ a = ({ 1; 2; }); // expected-warning {{expression result unused}}
+ a = ({ 1;; }); // expected-error {{assigning to 'int' from incompatible type 'void'}}
+ // expected-warning@-1 {{expression result unused}}
a = ({int x = 1; (void)x; }); // expected-error {{assigning to 'int' from incompatible type 'void'}}
- a = ({int x = 1; (void)x;; }); // expected-error {{assigning to 'int' from incompatible type 'void'}}
+ a = ({int x = 1;; }); // expected-error {{assigning to 'int' from incompatible type 'void'}}
}
void test14(void) { return ({}); }
diff --git a/clang/test/SemaCUDA/error-includes-mode.cu b/clang/test/SemaCUDA/error-includes-mode.cu
index 257fdeceef65..f775e656b07a 100644
--- a/clang/test/SemaCUDA/error-includes-mode.cu
+++ b/clang/test/SemaCUDA/error-includes-mode.cu
@@ -1,7 +1,16 @@
// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck --check-prefix HOST %s
// RUN: not %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_35 \
// RUN: -fcuda-is-device -fsyntax-only %s 2>&1 | FileCheck --check-prefix SM35 %s
+// RUN: not %clang_cc1 -triple spirv64-unknown-unknown \
+// RUN: -fcuda-is-device -fsyntax-only %s 2>&1 | FileCheck --check-prefix SPIRV %s
+// RUN: not %clang_cc1 -triple spirv64-amd-amdhsa \
+// RUN: -fcuda-is-device -fsyntax-only %s 2>&1 | FileCheck --check-prefix AMDGCNSPIRV %s
+// RUN: not %clang_cc1 -triple spirv64-intel-unknown \
+// RUN: -fcuda-is-device -fsyntax-only %s 2>&1 | FileCheck --check-prefix INTELSPIRV %s
// HOST: 1 error generated when compiling for host
// SM35: 1 error generated when compiling for sm_35
+// SPIRV: 1 error generated when compiling for spirv64-unknown-unknown
+// AMDGCNSPIRV: 1 error generated when compiling for spirv64-amd-amdhsa
+// INTELSPIRV: 1 error generated when compiling for spirv64-intel-unknown
error;
diff --git a/clang/test/SemaCXX/attr-mode-tmpl.cpp b/clang/test/SemaCXX/attr-mode-tmpl.cpp
index f665b1ba4912..3a1da3b358af 100644
--- a/clang/test/SemaCXX/attr-mode-tmpl.cpp
+++ b/clang/test/SemaCXX/attr-mode-tmpl.cpp
@@ -45,7 +45,7 @@ void CheckMachineMode() {
// Check attributes on function parameters.
template <class T1, class T2>
-void CheckParameters(T1 __attribute__((mode(SI))) paramSI, // expected-note{{ignored: substitution failure}} expected-note-re{{not viable: no known conversion from '{{.*}}' (vector of 4 '{{.*}}' values) to 'EnumType' for 2nd argument}}
+void CheckParameters(T1 __attribute__((mode(SI))) paramSI, // expected-note{{ignored: substitution failure}} expected-note{{ignored: substitution failure [with T1 = int, T2 = int]: type of machine mode does not match type of base type}}
T1 __attribute__((mode(V4DI))) paramV4DI, // expected-warning{{deprecated}}
T2 __attribute__((mode(SF))) paramSF,
T2 __attribute__((mode(V4DF))) paramV4DF) { // expected-warning{{deprecated}}
diff --git a/clang/test/SemaCXX/cxx23-assume.cpp b/clang/test/SemaCXX/cxx23-assume.cpp
index ce862666aa48..a594a1a44337 100644
--- a/clang/test/SemaCXX/cxx23-assume.cpp
+++ b/clang/test/SemaCXX/cxx23-assume.cpp
@@ -108,7 +108,8 @@ constexpr bool f4() {
template <typename T>
concept C = f4<T>(); // expected-note 3 {{in instantiation of}}
// expected-note@-1 3 {{while substituting}}
- // expected-error@-2 2 {{resulted in a non-constant expression}}
+ // expected-error@-2 {{resulted in a non-constant expression}}
+ // expected-note@-3 {{because substituted constraint expression is ill-formed: substitution into constraint expression resulted in a non-constant expression}}
struct D {
int x;
@@ -130,13 +131,13 @@ constexpr int f5() requires C<T> { return 1; } // expected-note {{while checking
// expected-note@-1 {{candidate template ignored}}
template <typename T>
-constexpr int f5() requires (!C<T>) { return 2; } // expected-note 4 {{while checking the satisfaction}} \
- // expected-note 4 {{while substituting template arguments}} \
+constexpr int f5() requires (!C<T>) { return 2; } // expected-note 3 {{while checking the satisfaction}} \
+ // expected-note 3 {{while substituting template arguments}} \
// expected-note {{candidate template ignored}}
static_assert(f5<int>() == 1);
-static_assert(f5<D>() == 1); // expected-note 3 {{while checking constraint satisfaction}}
- // expected-note@-1 3 {{while substituting deduced template arguments}}
+static_assert(f5<D>() == 1); // expected-note 2 {{while checking constraint satisfaction}}
+ // expected-note@-1 2 {{while substituting deduced template arguments}}
// expected-error@-2 {{no matching function for call}}
static_assert(f5<double>() == 2);
@@ -170,7 +171,7 @@ foo (int x, int y)
// Do not crash when assumptions are unreachable.
namespace gh106898 {
-int foo () {
+int foo () {
while(1);
int a = 0, b = 1;
__attribute__((assume (a < b)));
diff --git a/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp b/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp
index 331fe8387e1c..ff104243a973 100644
--- a/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp
+++ b/clang/test/SemaCXX/cxx2b-consteval-propagate.cpp
@@ -1,5 +1,7 @@
// RUN: %clang_cc1 -std=c++2a -Wno-unused-value %s -verify
+// RUN: %clang_cc1 -std=c++2a -Wno-unused-value %s -verify -fexperimental-new-constant-interpreter
// RUN: %clang_cc1 -std=c++2b -Wno-unused-value %s -verify
+// RUN: %clang_cc1 -std=c++2b -Wno-unused-value %s -verify -fexperimental-new-constant-interpreter
consteval int id(int i) { return i; }
constexpr char id(char c) { return c; }
diff --git a/clang/test/SemaCXX/cxx2b-warn-shadow.cpp b/clang/test/SemaCXX/cxx2b-warn-shadow.cpp
index 76866c426947..9ce0c5a7434f 100644
--- a/clang/test/SemaCXX/cxx2b-warn-shadow.cpp
+++ b/clang/test/SemaCXX/cxx2b-warn-shadow.cpp
@@ -11,3 +11,29 @@ struct Foo {
}
};
} // namespace GH95707
+
+namespace GH163731 {
+struct S1 {
+ int a;
+ void m(this S1 &self) {
+ auto lambda = [](int a) { return a; };
+ }
+};
+
+struct S2 {
+ int a;
+ void m(this S2 &self) {
+ int a = 1; // expected-note {{previous declaration is here}}
+ auto lambda = [](int a) { // expected-warning {{declaration shadows a local variable}}
+ return a;
+ };
+ }
+};
+
+struct S3 {
+ int a;
+ void m(this S3 &self) {
+ auto lambda = [self](int a) { return a + self.a; };
+ }
+};
+}
diff --git a/clang/test/SemaCXX/statements.cpp b/clang/test/SemaCXX/statements.cpp
index 48f178dd9a8b..426e9fa1e585 100644
--- a/clang/test/SemaCXX/statements.cpp
+++ b/clang/test/SemaCXX/statements.cpp
@@ -43,8 +43,6 @@ T test7(T v) {
return ({ // expected-warning{{use of GNU statement expression extension}}
T a = v;
a;
- ;
- ;
});
}
@@ -53,6 +51,21 @@ void test8() {
double b = test7(2.0);
}
+template <typename T>
+T test9(T v) {
+ return ({ // expected-warning {{use of GNU statement expression extension}}
+ T a = v;
+ a; // expected-warning {{expression result unused}}
+ ;
+ ;
+ });
+}
+
+void test10() {
+ int a = test9(1); // expected-note {{in instantiation of function template specialization 'test9<int>' requested here}}
+ // expected-error@-10 {{cannot initialize return object of type 'int' with an rvalue of type 'void'}}
+}
+
namespace GH48405 {
void foo() {
struct S {
diff --git a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
index f99e606fc656..1f70186c78ad 100644
--- a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
@@ -12,7 +12,7 @@ int test_too_many_arg(int p0) {
double test_int_builtin(double p0) {
return firstbithigh(p0);
- // expected-error@-1 {{call to 'firstbithigh' is ambiguous}}
+ // expected-error@-1 {{no matching function for call to 'firstbithigh'}}
}
double2 test_int_builtin_2(double2 p0) {
diff --git a/clang/test/SemaTemplate/temp_arg_nontype.cpp b/clang/test/SemaTemplate/temp_arg_nontype.cpp
index 7d2a010295b4..bd0bf3cfdbc5 100644
--- a/clang/test/SemaTemplate/temp_arg_nontype.cpp
+++ b/clang/test/SemaTemplate/temp_arg_nontype.cpp
@@ -173,8 +173,7 @@ namespace pr6249 {
}
namespace PR6723 {
- template<unsigned char C> void f(int (&a)[C]); // expected-note 3{{candidate template ignored: substitution failure [with C = '\x00']}}
- // expected-note@-1 {{not viable: no known conversion from 'int[512]' to 'int (&)[0]'}}
+ template<unsigned char C> void f(int (&a)[C]); // expected-note 4{{candidate template ignored: substitution failure [with C = '\x00']}}
void g() {
int arr512[512];
f(arr512); // expected-error{{no matching function for call}}
diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx11.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx11.cpp
index 5752cbac0291..45bdb4c623df 100644
--- a/clang/test/SemaTemplate/temp_arg_nontype_cxx11.cpp
+++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx11.cpp
@@ -43,7 +43,7 @@ void TempFunc() {}
void Useage() {
//expected-error@+2 {{no matching function}}
- //expected-note@-4 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'b'}}
+ //expected-note@-4 {{candidate template ignored: substitution failure [with a = 1, b = 4294967295, c = 1]: non-type template argument evaluates to -1, which cannot be narrowed to type 'unsigned int'}}
TempFunc<1, -1, 1>();
}
}
diff --git a/clang/tools/clang-repl/ClangRepl.cpp b/clang/tools/clang-repl/ClangRepl.cpp
index c7879422cd7d..c86a1314ac02 100644
--- a/clang/tools/clang-repl/ClangRepl.cpp
+++ b/clang/tools/clang-repl/ClangRepl.cpp
@@ -309,6 +309,7 @@ int main(int argc, const char **argv) {
clang::Interpreter::JITConfig Config;
Config.IsOutOfProcess = !OOPExecutor.empty() || !OOPExecutorConnect.empty();
Config.OOPExecutor = OOPExecutor;
+ Config.OrcRuntimePath = OrcRuntimePath;
auto SizeOrErr = getSlabAllocSize(SlabAllocateSizeString);
if (!SizeOrErr) {
llvm::logAllUnhandledErrors(SizeOrErr.takeError(), llvm::errs(), "error: ");
diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp
index 0c051847f4d4..34af476843c0 100644
--- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp
+++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp
@@ -530,6 +530,7 @@ TEST_F(LifetimeAnalysisTest, PointersInACycle) {
p1 = p2;
p2 = p3;
p3 = temp;
+ POINT(in_loop);
}
POINT(after_loop);
}
@@ -543,7 +544,11 @@ TEST_F(LifetimeAnalysisTest, PointersInACycle) {
EXPECT_THAT(Origin("p1"), HasLoansTo({"v1", "v2", "v3"}, "after_loop"));
EXPECT_THAT(Origin("p2"), HasLoansTo({"v1", "v2", "v3"}, "after_loop"));
EXPECT_THAT(Origin("p3"), HasLoansTo({"v1", "v2", "v3"}, "after_loop"));
- EXPECT_THAT(Origin("temp"), HasLoansTo({"v1", "v2", "v3"}, "after_loop"));
+
+ EXPECT_THAT(Origin("temp"), HasLoansTo({"v1", "v2", "v3"}, "in_loop"));
+ // 'temp' is a block-local origin and it's loans are not tracked outside the
+ // block.
+ EXPECT_THAT(Origin("temp"), HasLoansTo({}, "after_loop"));
}
TEST_F(LifetimeAnalysisTest, PointersAndExpirationInACycle) {
diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp
index 43b21176962e..d578fa7a1a1e 100644
--- a/clang/unittests/Format/ConfigParseTest.cpp
+++ b/clang/unittests/Format/ConfigParseTest.cpp
@@ -589,20 +589,20 @@ TEST(ConfigParseTest, ParsesConfiguration) {
CHECK_PARSE("AlignTrailingComments: Leave", AlignTrailingComments,
FormatStyle::TrailingCommentsAlignmentStyle(
- {FormatStyle::TCAS_Leave, 0}));
+ {FormatStyle::TCAS_Leave, 0, true}));
CHECK_PARSE("AlignTrailingComments: Always", AlignTrailingComments,
FormatStyle::TrailingCommentsAlignmentStyle(
- {FormatStyle::TCAS_Always, 0}));
+ {FormatStyle::TCAS_Always, 0, true}));
CHECK_PARSE("AlignTrailingComments: Never", AlignTrailingComments,
FormatStyle::TrailingCommentsAlignmentStyle(
- {FormatStyle::TCAS_Never, 0}));
+ {FormatStyle::TCAS_Never, 0, true}));
// For backwards compatibility
CHECK_PARSE("AlignTrailingComments: true", AlignTrailingComments,
FormatStyle::TrailingCommentsAlignmentStyle(
- {FormatStyle::TCAS_Always, 0}));
+ {FormatStyle::TCAS_Always, 0, true}));
CHECK_PARSE("AlignTrailingComments: false", AlignTrailingComments,
FormatStyle::TrailingCommentsAlignmentStyle(
- {FormatStyle::TCAS_Never, 0}));
+ {FormatStyle::TCAS_Never, 0, true}));
CHECK_PARSE_NESTED_VALUE("Kind: Always", AlignTrailingComments, Kind,
FormatStyle::TCAS_Always);
CHECK_PARSE_NESTED_VALUE("Kind: Never", AlignTrailingComments, Kind,
@@ -611,6 +611,7 @@ TEST(ConfigParseTest, ParsesConfiguration) {
FormatStyle::TCAS_Leave);
CHECK_PARSE_NESTED_VALUE("OverEmptyLines: 1234", AlignTrailingComments,
OverEmptyLines, 1234u);
+ CHECK_PARSE_NESTED_BOOL(AlignTrailingComments, AlignPPAndNotPP);
Style.UseTab = FormatStyle::UT_ForIndentation;
CHECK_PARSE("UseTab: Never", UseTab, FormatStyle::UT_Never);
diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp
index d7b225760548..684d3014fa7b 100644
--- a/clang/unittests/Format/FormatTestComments.cpp
+++ b/clang/unittests/Format/FormatTestComments.cpp
@@ -3378,6 +3378,66 @@ TEST_F(FormatTestComments, DontAlignOverScope) {
"int foobar; // group");
}
+TEST_F(FormatTestComments, DontAlignOverPPDirective) {
+ auto Style = getLLVMStyle();
+ Style.AlignTrailingComments.AlignPPAndNotPP = false;
+
+ verifyFormat("int i; // Aligned\n"
+ "int long; // with this\n"
+ "#define FOO // only aligned\n"
+ "#define LOOONG // with other pp directives\n"
+ "int loooong; // new alignment",
+ "int i;//Aligned\n"
+ "int long;//with this\n"
+ "#define FOO //only aligned\n"
+ "#define LOOONG //with other pp directives\n"
+ "int loooong; //new alignment",
+ Style);
+
+ verifyFormat("#define A // Comment\n"
+ "#define AB // Comment",
+ Style);
+
+ Style.ColumnLimit = 30;
+ verifyNoChange("#define A // Comment\n"
+ " // Continued\n"
+ "int i = 0; // New Stuff\n"
+ " // Continued\n"
+ "#define Func(X) \\\n"
+ " X(); \\\n"
+ " X(); // Comment\n"
+ " // Continued\n"
+ "long loong = 1; // Dont align",
+ Style);
+
+ verifyFormat("#define A // Comment that\n"
+ " // would wrap\n"
+ "#define FOO // For the\n"
+ " // alignment\n"
+ "#define B // Also\n"
+ " // aligned",
+ "#define A // Comment that would wrap\n"
+ "#define FOO // For the alignment\n"
+ "#define B // Also\n"
+ " // aligned",
+ Style);
+
+ Style.AlignTrailingComments.OverEmptyLines = 1;
+ verifyNoChange("#define A // Comment\n"
+ "\n"
+ " // Continued\n"
+ "int i = 0; // New Stuff\n"
+ "\n"
+ " // Continued\n"
+ "#define Func(X) \\\n"
+ " X(); \\\n"
+ " X(); // Comment\n"
+ "\n"
+ " // Continued\n"
+ "long loong = 1; // Dont align",
+ Style);
+}
+
TEST_F(FormatTestComments, AlignsBlockCommentDecorations) {
verifyFormat("/*\n"
" */",
diff --git a/clang/www/c_status.html b/clang/www/c_status.html
index 80a52f791dfc..2c1f6f4140a9 100644
--- a/clang/www/c_status.html
+++ b/clang/www/c_status.html
@@ -344,7 +344,7 @@ conformance.</p>
<tr>
<td>static_assert without UB</td>
<td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3525.htm">N3525</a></td>
- <td class="unknown" align="center">Unknown</td>
+ <td class="full" align="center">Yes</td>
</tr>
<tr>
<td>Allow calling static inline within extern inline</td>
diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index a9e8899f8ae0..1ed4e66d5622 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -605,6 +605,10 @@ string(REGEX REPLACE "-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}
list(APPEND COMPILER_RT_COMMON_CFLAGS ${stdlib_flag})
list(APPEND COMPILER_RT_COMMON_LINK_FLAGS ${stdlib_flag})
+# Add assembler flags for execute-only code generation. C and C++ flags should have already
+# been added to CMAKE_C_FLAGS and CMAKE_CXX_FLAGS.
+append_string_if(RUNTIMES_EXECUTE_ONLY_CODE -DCOMPILER_RT_EXECUTE_ONLY_CODE CMAKE_ASM_FLAGS)
+
# TODO: There's a lot of duplication across lib/*/tests/CMakeLists.txt files,
# move some of the common flags to COMPILER_RT_UNITTEST_CFLAGS.
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index 0496f240dc82..46d6bb5bd889 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -722,7 +722,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
/* Raw profile format version (start from 1). */
#define INSTR_PROF_RAW_VERSION 10
/* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 12
+#define INSTR_PROF_INDEX_VERSION 13
/* Coverage mapping format version (start from 0). */
#define INSTR_PROF_COVMAP_VERSION 6
diff --git a/compiler-rt/lib/builtins/assembly.h b/compiler-rt/lib/builtins/assembly.h
index 368cbaf108d3..2eddbf468c14 100644
--- a/compiler-rt/lib/builtins/assembly.h
+++ b/compiler-rt/lib/builtins/assembly.h
@@ -71,9 +71,24 @@
#endif
+#if defined(__aarch64__) && defined(__ELF__) && \
+ defined(COMPILER_RT_EXECUTE_ONLY_CODE)
+// The assembler always creates an implicit '.text' section with default flags
+// (SHF_ALLOC | SHF_EXECINSTR), which is incompatible with the execute-only
+// '.text' section we want to create here because of the missing
+// SHF_AARCH64_PURECODE section flag. To solve this, we use 'unique,0' to
+// differentiate the two sections. The output will therefore have two separate
+// sections named '.text', where code will be placed into the execute-only
+// '.text' section, and the implicitly-created one will be empty.
+#define TEXT_SECTION \
+ .section .text,"axy",@progbits,unique,0
+#else
+#define TEXT_SECTION \
+ .text
+#endif
+
#if defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__)
#define FUNC_ALIGN \
- .text SEPARATOR \
.balign 16 SEPARATOR
#else
#define FUNC_ALIGN
@@ -255,6 +270,7 @@
#endif
#define DEFINE_COMPILERRT_FUNCTION(name) \
+ TEXT_SECTION SEPARATOR \
DEFINE_CODE_STATE \
FILE_LEVEL_DIRECTIVE SEPARATOR \
.globl FUNC_SYMBOL(SYMBOL_NAME(name)) SEPARATOR \
@@ -264,6 +280,7 @@
FUNC_SYMBOL(SYMBOL_NAME(name)):
#define DEFINE_COMPILERRT_THUMB_FUNCTION(name) \
+ TEXT_SECTION SEPARATOR \
DEFINE_CODE_STATE \
FILE_LEVEL_DIRECTIVE SEPARATOR \
.globl FUNC_SYMBOL(SYMBOL_NAME(name)) SEPARATOR \
@@ -273,6 +290,7 @@
FUNC_SYMBOL(SYMBOL_NAME(name)):
#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \
+ TEXT_SECTION SEPARATOR \
DEFINE_CODE_STATE \
FILE_LEVEL_DIRECTIVE SEPARATOR \
.globl FUNC_SYMBOL(SYMBOL_NAME(name)) SEPARATOR \
@@ -282,6 +300,7 @@
FUNC_SYMBOL(SYMBOL_NAME(name)):
#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \
+ TEXT_SECTION SEPARATOR \
DEFINE_CODE_STATE \
.globl FUNC_SYMBOL(name) SEPARATOR \
SYMBOL_IS_FUNC(name) SEPARATOR \
@@ -290,6 +309,7 @@
FUNC_SYMBOL(name):
#define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name) \
+ TEXT_SECTION SEPARATOR \
DEFINE_CODE_STATE \
FUNC_ALIGN \
.globl FUNC_SYMBOL(name) SEPARATOR \
diff --git a/compiler-rt/lib/fuzzer/CMakeLists.txt b/compiler-rt/lib/fuzzer/CMakeLists.txt
index 6db24610df1f..a57e2fe46245 100644
--- a/compiler-rt/lib/fuzzer/CMakeLists.txt
+++ b/compiler-rt/lib/fuzzer/CMakeLists.txt
@@ -162,6 +162,7 @@ if(OS_NAME MATCHES "Android|Linux|Fuchsia" AND
CFLAGS ${TARGET_CFLAGS}
CMAKE_ARGS -DCMAKE_CXX_COMPILER_WORKS=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
+ -DRUNTIMES_EXECUTE_ONLY_CODE=${RUNTIMES_EXECUTE_ONLY_CODE}
-DLIBCXXABI_ENABLE_EXCEPTIONS=OFF
-DLIBCXX_ABI_NAMESPACE=__Fuzzer
-DLIBCXX_ENABLE_EXCEPTIONS=OFF)
diff --git a/compiler-rt/lib/hwasan/hwasan_setjmp_aarch64.S b/compiler-rt/lib/hwasan/hwasan_setjmp_aarch64.S
index b8d98b09ada2..80d680017cfe 100644
--- a/compiler-rt/lib/hwasan/hwasan_setjmp_aarch64.S
+++ b/compiler-rt/lib/hwasan/hwasan_setjmp_aarch64.S
@@ -28,7 +28,7 @@
// stack pointer when compiling a C function.
// Hence we have to write this function in assembly.
-.section .text
+TEXT_SECTION
.file "hwasan_setjmp_aarch64.S"
.global ASM_WRAPPER_NAME(setjmp)
diff --git a/compiler-rt/lib/hwasan/hwasan_tag_mismatch_aarch64.S b/compiler-rt/lib/hwasan/hwasan_tag_mismatch_aarch64.S
index be82475101c8..1631d3257a26 100644
--- a/compiler-rt/lib/hwasan/hwasan_tag_mismatch_aarch64.S
+++ b/compiler-rt/lib/hwasan/hwasan_tag_mismatch_aarch64.S
@@ -70,7 +70,7 @@
// clobbering the x17 register in error reports, and that the program will have
// a runtime dependency on the __hwasan_tag_mismatch_v2 symbol therefore it will
// fail to start up given an older (i.e. incompatible) runtime.
-.section .text
+TEXT_SECTION
.file "hwasan_tag_mismatch_aarch64.S"
.global __hwasan_tag_mismatch
.type __hwasan_tag_mismatch, %function
diff --git a/compiler-rt/lib/msan/tests/CMakeLists.txt b/compiler-rt/lib/msan/tests/CMakeLists.txt
index a8500225337e..b4848a8d190d 100644
--- a/compiler-rt/lib/msan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/msan/tests/CMakeLists.txt
@@ -139,6 +139,7 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS AND
add_custom_libcxx(libcxx_msan_${arch} ${LIBCXX_PREFIX}
DEPS ${MSAN_RUNTIME_LIBRARIES}
CFLAGS ${MSAN_LIBCXX_CFLAGS} ${TARGET_CFLAGS}
+ CMAKE_ARGS -DRUNTIMES_EXECUTE_ONLY_CODE=${RUNTIMES_EXECUTE_ONLY_CODE}
USE_TOOLCHAIN)
set(MSAN_LIBCXX_DIR ${LIBCXX_PREFIX}/lib/)
diff --git a/compiler-rt/lib/orc/elfnix_tls.aarch64.S b/compiler-rt/lib/orc/elfnix_tls.aarch64.S
index 8dcdd535be8a..25d97e6593dc 100644
--- a/compiler-rt/lib/orc/elfnix_tls.aarch64.S
+++ b/compiler-rt/lib/orc/elfnix_tls.aarch64.S
@@ -13,9 +13,11 @@
// The content of this file is aarch64-only
#if defined(__arm64__) || defined(__aarch64__)
+#include "builtins/assembly.h"
+
#define REGISTER_SAVE_SPACE_SIZE 32 * 24
- .text
+ TEXT_SECTION
// returns address of TLV in x0, all other registers preserved
// TODO: add fast-path for repeat access
diff --git a/compiler-rt/lib/orc/sysv_reenter.arm64.S b/compiler-rt/lib/orc/sysv_reenter.arm64.S
index 74941c459d6a..61e58e50c97c 100644
--- a/compiler-rt/lib/orc/sysv_reenter.arm64.S
+++ b/compiler-rt/lib/orc/sysv_reenter.arm64.S
@@ -13,7 +13,9 @@
// The content of this file is arm64-only
#if defined(__arm64__) || defined(__aarch64__)
- .text
+#include "builtins/assembly.h"
+
+ TEXT_SECTION
// Saves GPRs, calls __orc_rt_resolve
.globl __orc_rt_sysv_reenter
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S
index 5066953980af..c5c2180e0de9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S
@@ -5,6 +5,7 @@
ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA)
+TEXT_SECTION
.comm _ZN14__interception10real_vforkE,8,8
.globl ASM_WRAPPER_NAME(vfork)
ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
index a5ec85ae1646..72f4bbf212f9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp
@@ -45,7 +45,6 @@ struct MemoryMappedSegmentData {
const char *current_load_cmd_addr;
u32 lc_type;
uptr base_virt_addr;
- uptr addr_mask;
};
template <typename Section>
@@ -54,12 +53,58 @@ static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data,
const Section *sc = (const Section *)data->current_load_cmd_addr;
data->current_load_cmd_addr += sizeof(Section);
- uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr;
+ uptr sec_start = sc->addr + data->base_virt_addr;
uptr sec_end = sec_start + sc->size;
module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable,
sc->sectname);
}
+static bool VerifyMemoryMapping(MemoryMappingLayout* mapping) {
+ InternalMmapVector<LoadedModule> modules;
+ modules.reserve(128); // matches DumpProcessMap
+ mapping->DumpListOfModules(&modules);
+
+ InternalMmapVector<LoadedModule::AddressRange> segments;
+ for (uptr i = 0; i < modules.size(); ++i) {
+ for (auto& range : modules[i].ranges()) {
+ segments.push_back(range);
+ }
+ }
+
+ // Verify that none of the segments overlap:
+ // 1. Sort the segments by the start address
+ // 2. Check that every segment starts after the previous one ends.
+ Sort(segments.data(), segments.size(),
+ [](LoadedModule::AddressRange& a, LoadedModule::AddressRange& b) {
+ return a.beg < b.beg;
+ });
+
+ // To avoid spam, we only print the report message once-per-process.
+ static bool invalid_module_map_reported = false;
+ bool well_formed = true;
+
+ for (size_t i = 1; i < segments.size(); i++) {
+ uptr cur_start = segments[i].beg;
+ uptr prev_end = segments[i - 1].end;
+ if (cur_start < prev_end) {
+ well_formed = false;
+ VReport(2, "Overlapping mappings: %s start = %p, %s end = %p\n",
+ segments[i].name, (void*)cur_start, segments[i - 1].name,
+ (void*)prev_end);
+ if (!invalid_module_map_reported) {
+ Report(
+ "WARN: Invalid dyld module map detected. This is most likely a bug "
+ "in the sanitizer.\n");
+ Report("WARN: Backtraces may be unreliable.\n");
+ invalid_module_map_reported = true;
+ }
+ }
+ }
+
+ mapping->Reset();
+ return well_formed;
+}
+
void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
// Don't iterate over sections when the caller hasn't set up the
// data pointer, when there are no sections, or when the segment
@@ -85,6 +130,7 @@ void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
Reset();
+ VerifyMemoryMapping(this);
}
MemoryMappingLayout::~MemoryMappingLayout() {
@@ -190,6 +236,7 @@ typedef struct dyld_shared_cache_dylib_text_info
extern bool _dyld_get_shared_cache_uuid(uuid_t uuid);
extern const void *_dyld_get_shared_cache_range(size_t *length);
+extern intptr_t _dyld_get_image_slide(const struct mach_header* mh);
extern int dyld_shared_cache_iterate_text(
const uuid_t cacheUuid,
void (^callback)(const dyld_shared_cache_dylib_text_info *info));
@@ -258,23 +305,21 @@ static bool NextSegmentLoad(MemoryMappedSegment *segment,
layout_data->current_load_cmd_count--;
if (((const load_command *)lc)->cmd == kLCSegment) {
const SegmentCommand* sc = (const SegmentCommand *)lc;
- uptr base_virt_addr, addr_mask;
- if (layout_data->current_image == kDyldImageIdx) {
- base_virt_addr = (uptr)get_dyld_hdr();
- // vmaddr is masked with 0xfffff because on macOS versions < 10.12,
- // it contains an absolute address rather than an offset for dyld.
- // To make matters even more complicated, this absolute address
- // isn't actually the absolute segment address, but the offset portion
- // of the address is accurate when combined with the dyld base address,
- // and the mask will give just this offset.
- addr_mask = 0xfffff;
- } else {
+ if (strncmp(sc->segname, "__LINKEDIT", sizeof("__LINKEDIT")) == 0) {
+ // The LINKEDIT sections are for internal linker use, and may alias
+ // with the LINKEDIT section for other modules. (If we included them,
+ // our memory map would contain overlappping sections.)
+ return false;
+ }
+
+ uptr base_virt_addr;
+ if (layout_data->current_image == kDyldImageIdx)
+ base_virt_addr = (uptr)_dyld_get_image_slide(get_dyld_hdr());
+ else
base_virt_addr =
(uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image);
- addr_mask = ~0;
- }
- segment->start = (sc->vmaddr & addr_mask) + base_virt_addr;
+ segment->start = sc->vmaddr + base_virt_addr;
segment->end = segment->start + sc->vmsize;
// Most callers don't need section information, so only fill this struct
// when required.
@@ -284,7 +329,6 @@ static bool NextSegmentLoad(MemoryMappedSegment *segment,
(const char *)lc + sizeof(SegmentCommand);
seg_data->lc_type = kLCSegment;
seg_data->base_virt_addr = base_virt_addr;
- seg_data->addr_mask = addr_mask;
internal_strncpy(seg_data->name, sc->segname,
ARRAY_SIZE(seg_data->name));
}
diff --git a/compiler-rt/lib/tsan/CMakeLists.txt b/compiler-rt/lib/tsan/CMakeLists.txt
index 7928116879c0..3319855521bd 100644
--- a/compiler-rt/lib/tsan/CMakeLists.txt
+++ b/compiler-rt/lib/tsan/CMakeLists.txt
@@ -30,6 +30,7 @@ if(COMPILER_RT_LIBCXX_PATH AND
add_custom_libcxx(libcxx_tsan_${arch} ${LIBCXX_PREFIX}
DEPS ${TSAN_RUNTIME_LIBRARIES}
CFLAGS ${TARGET_CFLAGS} -fsanitize=thread
+ CMAKE_ARGS -DRUNTIMES_EXECUTE_ONLY_CODE=${RUNTIMES_EXECUTE_ONLY_CODE}
USE_TOOLCHAIN)
list(APPEND libcxx_tsan_deps libcxx_tsan_${arch}-install-cmake326-workaround)
endforeach()
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S
index f1d11a3e7f54..124bd59a91f0 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S
@@ -4,10 +4,8 @@
#include "sanitizer_common/sanitizer_asm.h"
#include "builtins/assembly.h"
-#if !defined(__APPLE__)
-.section .text
-#else
-.section __TEXT,__text
+TEXT_SECTION
+#if defined(__APPLE__)
.align 3
#endif
diff --git a/compiler-rt/lib/xray/xray_trampoline_AArch64.S b/compiler-rt/lib/xray/xray_trampoline_AArch64.S
index 2586def04cbb..5d951f3821a5 100644
--- a/compiler-rt/lib/xray/xray_trampoline_AArch64.S
+++ b/compiler-rt/lib/xray/xray_trampoline_AArch64.S
@@ -37,7 +37,7 @@
#endif
.endm
-.text
+TEXT_SECTION
.p2align 2
.global ASM_SYMBOL(__xray_FunctionEntry)
ASM_HIDDEN(__xray_FunctionEntry)
diff --git a/compiler-rt/test/asan/TestCases/Darwin/asan-verify-module-map.cpp b/compiler-rt/test/asan/TestCases/Darwin/asan-verify-module-map.cpp
new file mode 100644
index 000000000000..7660841c7287
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/Darwin/asan-verify-module-map.cpp
@@ -0,0 +1,25 @@
+// This test simply checks that the "Invalid dyld module map" warning is not printed
+// in the output of a backtrace.
+
+// RUN: %clangxx_asan -DSHARED_LIB -g %s -dynamiclib -o %t.dylib
+// RUN: %clangxx_asan -O0 -g %s %t.dylib -o %t.executable
+// RUN: %env_asan_opts="print_module_map=2" not %run %t.executable 2>&1 | FileCheck %s -DDYLIB=%t.dylib
+
+// CHECK-NOT: WARN: Invalid dyld module map
+// CHECK-DAG: 0x{{.*}}-0x{{.*}} [[DYLIB]]
+// CHECK-DAG: 0x{{.*}}-0x{{.*}} {{.*}}libsystem
+
+#ifdef SHARED_LIB
+extern "C" void foo(int *a) { *a = 5; }
+#else
+# include <cstdlib>
+
+extern "C" void foo(int *a);
+
+int main() {
+ int *a = (int *)malloc(sizeof(int));
+ free(a);
+ foo(a);
+ return 0;
+}
+#endif \ No newline at end of file
diff --git a/compiler-rt/test/asan/TestCases/log-path_test.cpp b/compiler-rt/test/asan/TestCases/log-path_test.cpp
index 6875d57c43cc..22f077fb5468 100644
--- a/compiler-rt/test/asan/TestCases/log-path_test.cpp
+++ b/compiler-rt/test/asan/TestCases/log-path_test.cpp
@@ -1,6 +1,5 @@
// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
-// XFAIL: android
-// UNSUPPORTED: ios
+// UNSUPPORTED: ios, android
//
// The for loop in the backticks below requires bash.
// REQUIRES: shell
diff --git a/compiler-rt/test/asan/TestCases/verbose-log-path_test.cpp b/compiler-rt/test/asan/TestCases/verbose-log-path_test.cpp
index 53166ccded39..f4781a7d4764 100644
--- a/compiler-rt/test/asan/TestCases/verbose-log-path_test.cpp
+++ b/compiler-rt/test/asan/TestCases/verbose-log-path_test.cpp
@@ -9,8 +9,8 @@
// RUN: FileCheck %s --check-prefix=CHECK-ERROR < %t-dir/asan.log.verbose-log-path_test-binary.*
// FIXME: only FreeBSD, NetBSD and Linux have verbose log paths now.
-// XFAIL: target={{.*windows-msvc.*}},android
-// UNSUPPORTED: ios
+// XFAIL: target={{.*windows-msvc.*}}
+// UNSUPPORTED: ios, android
#include <stdlib.h>
#include <string.h>
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cpp
index 6ba7025bf757..e4064828015a 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cpp
@@ -7,7 +7,7 @@
// RUN: not %run %t %t-out && FileCheck < %t-out %s
// REQUIRES: stable-runtime
-// XFAIL: android && asan
+// UNSUPPORTED: android && asan
#include <sanitizer/common_interface_defs.h>
#include <stdio.h>
diff --git a/flang-rt/include/flang-rt/runtime/io-stmt.h b/flang-rt/include/flang-rt/runtime/io-stmt.h
index f6a81f7cb812..3c6bcfec8d0c 100644
--- a/flang-rt/include/flang-rt/runtime/io-stmt.h
+++ b/flang-rt/include/flang-rt/runtime/io-stmt.h
@@ -730,8 +730,7 @@ public:
RT_API_ATTRS bool AdvanceRecord(int = 1);
RT_API_ATTRS int EndIoStatement();
RT_API_ATTRS bool CanAdvance() {
- return DIR == Direction::Input &&
- (canAdvance_ || this->mutableModes().inNamelist);
+ return canAdvance_ || this->mutableModes().inNamelist;
}
private:
diff --git a/flang-rt/lib/runtime/edit-output.cpp b/flang-rt/lib/runtime/edit-output.cpp
index f90b6fb10963..73dba35ff08d 100644
--- a/flang-rt/lib/runtime/edit-output.cpp
+++ b/flang-rt/lib/runtime/edit-output.cpp
@@ -175,9 +175,10 @@ bool RT_API_ATTRS EditIntegerOutput(IoStatementState &io, const DataEdit &edit,
}
if (edit.IsListDirected()) {
int total{std::max(leadingSpaces, 1) + subTotal};
- if (io.GetConnectionState().NeedAdvance(static_cast<std::size_t>(total)) &&
- !io.AdvanceRecord()) {
- return false;
+ if (io.GetConnectionState().NeedAdvance(static_cast<std::size_t>(total))) {
+ if (!io.AdvanceRecord()) {
+ return false;
+ }
}
leadingSpaces = 1;
} else if (!edit.width) {
diff --git a/flang-rt/lib/runtime/io-stmt.cpp b/flang-rt/lib/runtime/io-stmt.cpp
index b958f23cf534..a88fbe605f89 100644
--- a/flang-rt/lib/runtime/io-stmt.cpp
+++ b/flang-rt/lib/runtime/io-stmt.cpp
@@ -1109,20 +1109,20 @@ ChildListIoStatementState<DIR>::ChildListIoStatementState(
ChildIo &child, const char *sourceFile, int sourceLine)
: ChildIoStatementState<DIR>{child, sourceFile, sourceLine} {
#if !defined(RT_DEVICE_AVOID_RECURSION)
- if constexpr (DIR == Direction::Input) {
- if (const auto *listInput{child.parent()
- .get_if<ListDirectedStatementState<Direction::Input>>()}) {
- this->set_eatComma(listInput->eatComma());
- this->namelistGroup_ = listInput->namelistGroup();
- if (auto *childListInput{child.parent()
- .get_if<ChildListIoStatementState<Direction::Input>>()}) {
- // Child list input whose parent is child list input: can advance
- // if the parent can.
- this->canAdvance_ = childListInput->CanAdvance();
- } else {
- // Child list input of top-level list input: can advance.
- this->canAdvance_ = true;
- }
+ if (const auto *listParent{
+ child.parent().get_if<ListDirectedStatementState<DIR>>()}) {
+ if constexpr (DIR == Direction::Input) {
+ this->set_eatComma(listParent->eatComma());
+ this->namelistGroup_ = listParent->namelistGroup();
+ }
+ if (auto *childListParent{
+ child.parent().get_if<ChildListIoStatementState<DIR>>()}) {
+ // Child list I/O whose parent is child list I/O: can advance
+ // if the parent can.
+ this->canAdvance_ = childListParent->CanAdvance();
+ } else {
+ // Child list I/O of top-level list I/O: can advance.
+ this->canAdvance_ = true;
}
}
#else
diff --git a/flang/include/flang/Lower/OpenMP/Clauses.h b/flang/include/flang/Lower/OpenMP/Clauses.h
index 688d01704370..3eff90b95a20 100644
--- a/flang/include/flang/Lower/OpenMP/Clauses.h
+++ b/flang/include/flang/Lower/OpenMP/Clauses.h
@@ -204,6 +204,7 @@ using At = tomp::clause::AtT<TypeTy, IdTy, ExprTy>;
using Bind = tomp::clause::BindT<TypeTy, IdTy, ExprTy>;
using Capture = tomp::clause::CaptureT<TypeTy, IdTy, ExprTy>;
using Collapse = tomp::clause::CollapseT<TypeTy, IdTy, ExprTy>;
+using Collector = tomp::clause::CollectorT<TypeTy, IdTy, ExprTy>;
using Compare = tomp::clause::CompareT<TypeTy, IdTy, ExprTy>;
using Contains = tomp::clause::ContainsT<TypeTy, IdTy, ExprTy>;
using Copyin = tomp::clause::CopyinT<TypeTy, IdTy, ExprTy>;
@@ -239,6 +240,7 @@ using If = tomp::clause::IfT<TypeTy, IdTy, ExprTy>;
using Inbranch = tomp::clause::InbranchT<TypeTy, IdTy, ExprTy>;
using Inclusive = tomp::clause::InclusiveT<TypeTy, IdTy, ExprTy>;
using Indirect = tomp::clause::IndirectT<TypeTy, IdTy, ExprTy>;
+using Inductor = tomp::clause::InductorT<TypeTy, IdTy, ExprTy>;
using Init = tomp::clause::InitT<TypeTy, IdTy, ExprTy>;
using Initializer = tomp::clause::InitializerT<TypeTy, IdTy, ExprTy>;
using InReduction = tomp::clause::InReductionT<TypeTy, IdTy, ExprTy>;
diff --git a/flang/include/flang/Optimizer/OpenACC/Analysis/FIROpenACCSupportAnalysis.h b/flang/include/flang/Optimizer/OpenACC/Analysis/FIROpenACCSupportAnalysis.h
new file mode 100644
index 000000000000..c798681306c1
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenACC/Analysis/FIROpenACCSupportAnalysis.h
@@ -0,0 +1,51 @@
+//===- FIROpenACCSupportAnalysis.h - FIR OpenACCSupport Analysis ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the FIR-specific implementation of OpenACCSupport analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_OPENACC_ANALYSIS_FIROPENACCSUPPORTANALYSIS_H
+#define FORTRAN_OPTIMIZER_OPENACC_ANALYSIS_FIROPENACCSUPPORTANALYSIS_H
+
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Value.h"
+#include <string>
+
+namespace fir {
+namespace acc {
+
+/// FIR-specific implementation for the OpenACCSupport analysis interface.
+///
+/// This class provides the custom implementations of the OpenACCSupport
+/// interface methods that are tailored to FIR's requirements and
+/// can handle FIR dialect operations and types.
+/// Its primary intent is to be registered with the OpenACCSupport analysis
+/// using setImplementation()
+///
+/// Usage:
+/// auto &support = getAnalysis<mlir::acc::OpenACCSupport>();
+/// support.setImplementation(fir::acc::FIROpenACCSupportAnalysis());
+///
+class FIROpenACCSupportAnalysis {
+public:
+ FIROpenACCSupportAnalysis() = default;
+
+ std::string getVariableName(mlir::Value v);
+
+ std::string getRecipeName(mlir::acc::RecipeKind kind, mlir::Type type,
+ mlir::Value var);
+
+ mlir::InFlightDiagnostic emitNYI(mlir::Location loc,
+ const mlir::Twine &message);
+};
+
+} // namespace acc
+} // namespace fir
+
+#endif // FORTRAN_OPTIMIZER_OPENACC_ANALYSIS_FIROPENACCSUPPORTANALYSIS_H
diff --git a/flang/include/flang/Optimizer/OpenACC/Passes.h b/flang/include/flang/Optimizer/OpenACC/Passes.h
index 0627cc8ce4a6..c27c7ebc3b06 100644
--- a/flang/include/flang/Optimizer/OpenACC/Passes.h
+++ b/flang/include/flang/Optimizer/OpenACC/Passes.h
@@ -13,6 +13,9 @@
#ifndef FORTRAN_OPTIMIZER_OPENACC_PASSES_H
#define FORTRAN_OPTIMIZER_OPENACC_PASSES_H
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassRegistry.h"
@@ -25,6 +28,7 @@ namespace acc {
#define GEN_PASS_REGISTRATION
#include "flang/Optimizer/OpenACC/Passes.h.inc"
+std::unique_ptr<mlir::Pass> createACCInitializeFIRAnalysesPass();
std::unique_ptr<mlir::Pass> createACCRecipeBufferizationPass();
} // namespace acc
diff --git a/flang/include/flang/Optimizer/OpenACC/Passes.td b/flang/include/flang/Optimizer/OpenACC/Passes.td
index 3c127b30aa9b..d947aa470494 100644
--- a/flang/include/flang/Optimizer/OpenACC/Passes.td
+++ b/flang/include/flang/Optimizer/OpenACC/Passes.td
@@ -11,6 +11,22 @@
include "mlir/Pass/PassBase.td"
+def ACCInitializeFIRAnalyses
+ : Pass<"acc-initialize-fir-analyses", "mlir::ModuleOp"> {
+ let summary = "Initialize FIR analyses for OpenACC passes";
+ let description = [{
+ This pass initializes analyses that can be used by subsequent OpenACC passes
+ in the pipeline. It creates and caches the OpenACCSupport analysis with a
+ FIR-specific implementation that can handle FIR types and operations.
+ It also initializes FIR's AliasAnalysis for use in OpenACC passes.
+ This pass needs to rerun if any analyses were invalidated by MLIR's framework.
+ }];
+ // In addition to pre-registering the needed analyses, this pass also
+ // pre-registers the dialects that various OpenACC passes may generate.
+ let dependentDialects = ["fir::FIROpsDialect", "hlfir::hlfirDialect",
+ "mlir::acc::OpenACCDialect"];
+}
+
def ACCRecipeBufferization
: Pass<"fir-acc-recipe-bufferization", "mlir::ModuleOp"> {
let summary = "Rewrite acc.*.recipe box values to ref<box> and update uses";
diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h
new file mode 100644
index 000000000000..5ca0925ea681
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h
@@ -0,0 +1,57 @@
+//===- FIROpenACCUtils.h - FIR OpenACC Utilities ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares utility functions for FIR OpenACC support.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_OPENACC_SUPPORT_FIROPENACCUTILS_H
+#define FORTRAN_OPTIMIZER_OPENACC_SUPPORT_FIROPENACCUTILS_H
+
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Value.h"
+#include <string>
+
+namespace fir {
+namespace acc {
+
+/// Attempts to extract the variable name from a value by walking through
+/// FIR operations and looking for variable names.
+/// \param v The value to extract the variable name from
+/// \param preferDemangledName If true, prefers demangled/bindc names over
+/// mangled/unique names. If false, prefers mangled names.
+/// Returns empty string if no name is found.
+std::string getVariableName(mlir::Value v, bool preferDemangledName = true);
+
+/// Get the recipe name for a given recipe kind, FIR type, and optional
+/// variable. Uses FIR's type string representation with appropriate prefix. For
+/// firstprivate and reduction recipes, handles bounds suffix when all bounds
+/// are constant. For reduction recipes, embeds the operator name in the recipe.
+/// \param kind The recipe kind (private, firstprivate, or reduction)
+/// \param type The FIR type (must be a FIR type)
+/// \param var Optional variable value
+/// \param bounds Optional bounds for array sections (used for suffix
+/// generation)
+/// \param reductionOp Optional reduction operator (required for reduction
+/// recipes)
+/// \return The complete recipe name with all necessary suffixes
+std::string getRecipeName(mlir::acc::RecipeKind kind, mlir::Type type,
+ mlir::Value var = nullptr,
+ llvm::ArrayRef<mlir::Value> bounds = {},
+ mlir::acc::ReductionOperator reductionOp =
+ mlir::acc::ReductionOperator::AccNone);
+
+/// Check if all bounds are expressed with constant values.
+/// \param bounds Array of DataBoundsOp values to check
+/// \return true if all bounds have constant lowerbound/upperbound or extent
+bool areAllBoundsConstant(llvm::ArrayRef<mlir::Value> bounds);
+
+} // namespace acc
+} // namespace fir
+
+#endif // FORTRAN_OPTIMIZER_OPENACC_SUPPORT_FIROPENACCUTILS_H
diff --git a/flang/lib/Evaluate/check-expression.cpp b/flang/lib/Evaluate/check-expression.cpp
index 839717d0833f..656fc5004487 100644
--- a/flang/lib/Evaluate/check-expression.cpp
+++ b/flang/lib/Evaluate/check-expression.cpp
@@ -379,8 +379,11 @@ bool IsInitialProcedureTarget(const semantics::Symbol &symbol) {
common::visitors{
[&](const semantics::SubprogramDetails &subp) {
return !subp.isDummy() && !subp.stmtFunction() &&
- symbol.owner().kind() != semantics::Scope::Kind::MainProgram &&
- symbol.owner().kind() != semantics::Scope::Kind::Subprogram;
+ ((symbol.owner().kind() !=
+ semantics::Scope::Kind::MainProgram &&
+ symbol.owner().kind() !=
+ semantics::Scope::Kind::Subprogram) ||
+ ultimate.attrs().test(semantics::Attr::EXTERNAL));
},
[](const semantics::SubprogramNameDetails &x) {
return x.kind() != semantics::SubprogramKind::Internal;
diff --git a/flang/lib/Evaluate/common.cpp b/flang/lib/Evaluate/common.cpp
index ed6a0ef93b0d..119ea3c5612a 100644
--- a/flang/lib/Evaluate/common.cpp
+++ b/flang/lib/Evaluate/common.cpp
@@ -16,25 +16,26 @@ namespace Fortran::evaluate {
void FoldingContext::RealFlagWarnings(
const RealFlags &flags, const char *operation) {
static constexpr auto warning{common::UsageWarning::FoldingException};
+ if (!realFlagWarningContext_.empty()) {
+ // Override 'operation' with a string like
+ // "compilation-time evaluation of a call to '...'"
+ operation = realFlagWarningContext_.c_str();
+ }
if (flags.test(RealFlag::Overflow)) {
- Warn(warning, "overflow on %s%s"_warn_en_US, operation,
- realFlagWarningContext_);
+ Warn(warning, "overflow on %s"_warn_en_US, operation);
}
if (flags.test(RealFlag::DivideByZero)) {
if (std::strcmp(operation, "division") == 0) {
- Warn(warning, "division by zero%s"_warn_en_US, realFlagWarningContext_);
+ Warn(warning, "division by zero"_warn_en_US);
} else {
- Warn(warning, "division by zero on %s%s"_warn_en_US, operation,
- realFlagWarningContext_);
+ Warn(warning, "division by zero on %s"_warn_en_US, operation);
}
}
if (flags.test(RealFlag::InvalidArgument)) {
- Warn(warning, "invalid argument on %s%s"_warn_en_US, operation,
- realFlagWarningContext_);
+ Warn(warning, "invalid argument on %s"_warn_en_US, operation);
}
if (flags.test(RealFlag::Underflow)) {
- Warn(warning, "underflow on %s%s"_warn_en_US, operation,
- realFlagWarningContext_);
+ Warn(warning, "underflow on %s"_warn_en_US, operation);
}
}
diff --git a/flang/lib/Evaluate/intrinsics-library.cpp b/flang/lib/Evaluate/intrinsics-library.cpp
index d8af5246fabd..54726ac539d6 100644
--- a/flang/lib/Evaluate/intrinsics-library.cpp
+++ b/flang/lib/Evaluate/intrinsics-library.cpp
@@ -1052,7 +1052,7 @@ std::optional<HostRuntimeWrapper> GetHostRuntimeWrapper(const std::string &name,
.value());
}
auto restorer{context.SetRealFlagWarningContext(
- " after folding a call to '"s + name + "'"s)};
+ "compilation-time evaluation of a call to '"s + name + "'"s)};
return Fold(context,
ConvertToType(
resultType, hostFolderWithChecks(context, std::move(args)))
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 548ca675db5e..f05c4cfccf7f 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -595,9 +595,15 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args,
// -cc1` does accept multiple action options, but will only consider the
// rightmost one.
if (args.hasMultipleArgs(clang::driver::options::OPT_Action_Group)) {
- const unsigned diagID = diags.getCustomDiagID(
- clang::DiagnosticsEngine::Error, "Only one action option is allowed");
- diags.Report(diagID);
+ llvm::SmallString<32> buf;
+ llvm::raw_svector_ostream os(buf);
+ for (const llvm::opt::Arg *arg :
+ args.filtered(clang::driver::options::OPT_Action_Group)) {
+ if (buf.size())
+ os << ", ";
+ os << "'" << arg->getSpelling() << "'";
+ }
+ diags.Report(clang::diag::err_drv_too_many_actions) << buf;
return false;
}
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 6208bed6d0ae..98a3aced3f52 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -28,6 +28,7 @@
#include "flang/Optimizer/Builder/IntrinsicCall.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h"
#include "flang/Parser/parse-tree-visitor.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Parser/tools.h"
@@ -1159,18 +1160,6 @@ bool isConstantBound(mlir::acc::DataBoundsOp &op) {
return false;
}
-/// Return true iff all the bounds are expressed with constant values.
-bool areAllBoundConstant(const llvm::SmallVector<mlir::Value> &bounds) {
- for (auto bound : bounds) {
- auto dataBound =
- mlir::dyn_cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp());
- assert(dataBound && "Must be DataBoundOp operation");
- if (!isConstantBound(dataBound))
- return false;
- }
- return true;
-}
-
static llvm::SmallVector<mlir::Value>
genConstantBounds(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::acc::DataBoundsOp &dataBound) {
@@ -1324,7 +1313,7 @@ mlir::acc::FirstprivateRecipeOp Fortran::lower::createOrGetFirstprivateRecipe(
mlir::OpBuilder::InsertionGuard guard(builder);
auto recipe = genRecipeOp<mlir::acc::FirstprivateRecipeOp>(
builder, mod, recipeName, loc, ty);
- bool allConstantBound = areAllBoundConstant(bounds);
+ bool allConstantBound = fir::acc::areAllBoundsConstant(bounds);
auto [source, destination] = genRecipeCombinerOrCopyRegion(
builder, loc, ty, recipe.getCopyRegion(), bounds, allConstantBound);
@@ -1358,33 +1347,6 @@ mlir::acc::FirstprivateRecipeOp Fortran::lower::createOrGetFirstprivateRecipe(
return recipe;
}
-/// Get a string representation of the bounds.
-std::string getBoundsString(llvm::SmallVector<mlir::Value> &bounds) {
- std::stringstream boundStr;
- if (!bounds.empty())
- boundStr << "_section_";
- llvm::interleave(
- bounds,
- [&](mlir::Value bound) {
- auto boundsOp =
- mlir::cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp());
- if (boundsOp.getLowerbound() &&
- fir::getIntIfConstant(boundsOp.getLowerbound()) &&
- boundsOp.getUpperbound() &&
- fir::getIntIfConstant(boundsOp.getUpperbound())) {
- boundStr << "lb" << *fir::getIntIfConstant(boundsOp.getLowerbound())
- << ".ub" << *fir::getIntIfConstant(boundsOp.getUpperbound());
- } else if (boundsOp.getExtent() &&
- fir::getIntIfConstant(boundsOp.getExtent())) {
- boundStr << "ext" << *fir::getIntIfConstant(boundsOp.getExtent());
- } else {
- boundStr << "?";
- }
- },
- [&] { boundStr << "x"; });
- return boundStr.str();
-}
-
/// Rebuild the array type from the acc.bounds operation with constant
/// lowerbound/upperbound or extent.
mlir::Type getTypeFromBounds(llvm::SmallVector<mlir::Value> &bounds,
@@ -1458,9 +1420,8 @@ static void genPrivatizationRecipes(
RecipeOp recipe;
mlir::Type retTy = getTypeFromBounds(bounds, info.addr.getType());
if constexpr (std::is_same_v<RecipeOp, mlir::acc::PrivateRecipeOp>) {
- std::string recipeName =
- fir::getTypeAsString(retTy, converter.getKindMap(),
- Fortran::lower::privatizationRecipePrefix);
+ std::string recipeName = fir::acc::getRecipeName(
+ mlir::acc::RecipeKind::private_recipe, retTy, info.addr, bounds);
recipe = Fortran::lower::createOrGetPrivateRecipe(builder, recipeName,
operandLocation, retTy);
auto op = createDataEntryOp<mlir::acc::PrivateOp>(
@@ -1474,10 +1435,8 @@ static void genPrivatizationRecipes(
symbolPairs->emplace_back(op.getAccVar(),
Fortran::semantics::SymbolRef(symbol));
} else {
- std::string suffix =
- areAllBoundConstant(bounds) ? getBoundsString(bounds) : "";
- std::string recipeName = fir::getTypeAsString(
- retTy, converter.getKindMap(), "firstprivatization" + suffix);
+ std::string recipeName = fir::acc::getRecipeName(
+ mlir::acc::RecipeKind::firstprivate_recipe, retTy, info.addr, bounds);
recipe = Fortran::lower::createOrGetFirstprivateRecipe(
builder, recipeName, operandLocation, retTy, bounds);
auto op = createDataEntryOp<mlir::acc::FirstprivateOp>(
@@ -1623,7 +1582,7 @@ mlir::acc::ReductionRecipeOp Fortran::lower::createOrGetReductionRecipe(
mlir::OpBuilder::InsertionGuard guard(builder);
auto recipe = genRecipeOp<mlir::acc::ReductionRecipeOp>(
builder, mod, recipeName, loc, ty, op);
- bool allConstantBound = areAllBoundConstant(bounds);
+ bool allConstantBound = fir::acc::areAllBoundsConstant(bounds);
auto [dest, src] = genRecipeCombinerOrCopyRegion(
builder, loc, ty, recipe.getCombinerRegion(), bounds, allConstantBound);
@@ -1708,15 +1667,12 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
mlir::acc::DataClause::acc_reduction, info.addr.getType(), async,
asyncDeviceTypes, asyncOnlyDeviceTypes, /*unwrapBoxAddr=*/true);
mlir::Type ty = op.getAccVar().getType();
- if (!areAllBoundConstant(bounds) ||
+ if (!fir::acc::areAllBoundsConstant(bounds) ||
fir::isAssumedShape(info.addr.getType()) ||
fir::isAllocatableOrPointerArray(info.addr.getType()))
ty = info.addr.getType();
- std::string suffix =
- areAllBoundConstant(bounds) ? getBoundsString(bounds) : "";
- std::string recipeName = fir::getTypeAsString(
- ty, converter.getKindMap(),
- ("reduction_" + stringifyReductionOperator(mlirOp)).str() + suffix);
+ std::string recipeName = fir::acc::getRecipeName(
+ mlir::acc::RecipeKind::reduction_recipe, ty, info.addr, bounds, mlirOp);
mlir::acc::ReductionRecipeOp recipe =
Fortran::lower::createOrGetReductionRecipe(
@@ -1961,9 +1917,8 @@ static void privatizeIv(
}
if (privateOp == nullptr) {
- std::string recipeName =
- fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
- Fortran::lower::privatizationRecipePrefix);
+ std::string recipeName = fir::acc::getRecipeName(
+ mlir::acc::RecipeKind::private_recipe, ivValue.getType(), ivValue, {});
auto recipe = Fortran::lower::createOrGetPrivateRecipe(
builder, recipeName, loc, ivValue.getType());
@@ -2048,6 +2003,49 @@ static void determineDefaultLoopParMode(
}
}
+// Helper to visit Bounds of DO LOOP nest.
+static void visitLoopControl(
+ Fortran::lower::AbstractConverter &converter,
+ const Fortran::parser::DoConstruct &outerDoConstruct,
+ uint64_t loopsToProcess, Fortran::lower::pft::Evaluation &eval,
+ std::function<void(const Fortran::parser::LoopControl::Bounds &,
+ mlir::Location)>
+ callback) {
+ Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
+ for (uint64_t i = 0; i < loopsToProcess; ++i) {
+ const Fortran::parser::LoopControl *loopControl;
+ if (i == 0) {
+ loopControl = &*outerDoConstruct.GetLoopControl();
+ mlir::Location loc = converter.genLocation(
+ Fortran::parser::FindSourceLocation(outerDoConstruct));
+ callback(std::get<Fortran::parser::LoopControl::Bounds>(loopControl->u),
+ loc);
+ } else {
+ // Safely locate the next inner DoConstruct within this eval.
+ const Fortran::parser::DoConstruct *innerDo = nullptr;
+ if (crtEval && crtEval->hasNestedEvaluations()) {
+ for (Fortran::lower::pft::Evaluation &child :
+ crtEval->getNestedEvaluations()) {
+ if (auto *stmt = child.getIf<Fortran::parser::DoConstruct>()) {
+ innerDo = stmt;
+ // Prepare to descend for the next iteration
+ crtEval = &child;
+ break;
+ }
+ }
+ }
+ if (!innerDo)
+ break; // No deeper loop; stop collecting collapsed bounds.
+
+ loopControl = &*innerDo->GetLoopControl();
+ mlir::Location loc =
+ converter.genLocation(Fortran::parser::FindSourceLocation(*innerDo));
+ callback(std::get<Fortran::parser::LoopControl::Bounds>(loopControl->u),
+ loc);
+ }
+ }
+}
+
// Extract loop bounds, steps, induction variables, and privatization info
// for both DO CONCURRENT and regular do loops
static void processDoLoopBounds(
@@ -2069,7 +2067,6 @@ static void processDoLoopBounds(
llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) {
assert(loopsToProcess > 0 && "expect at least one loop");
locs.push_back(currentLocation); // Location of the directive
- Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
if (isDoConcurrent) {
@@ -2110,57 +2107,29 @@ static void processDoLoopBounds(
inclusiveBounds.push_back(true);
}
} else {
- for (uint64_t i = 0; i < loopsToProcess; ++i) {
- const Fortran::parser::LoopControl *loopControl;
- if (i == 0) {
- loopControl = &*outerDoConstruct.GetLoopControl();
- locs.push_back(converter.genLocation(
- Fortran::parser::FindSourceLocation(outerDoConstruct)));
- } else {
- // Safely locate the next inner DoConstruct within this eval.
- const Fortran::parser::DoConstruct *innerDo = nullptr;
- if (crtEval && crtEval->hasNestedEvaluations()) {
- for (Fortran::lower::pft::Evaluation &child :
- crtEval->getNestedEvaluations()) {
- if (auto *stmt = child.getIf<Fortran::parser::DoConstruct>()) {
- innerDo = stmt;
- // Prepare to descend for the next iteration
- crtEval = &child;
- break;
- }
- }
- }
- if (!innerDo)
- break; // No deeper loop; stop collecting collapsed bounds.
-
- loopControl = &*innerDo->GetLoopControl();
- locs.push_back(converter.genLocation(
- Fortran::parser::FindSourceLocation(*innerDo)));
- }
-
- const Fortran::parser::LoopControl::Bounds *bounds =
- std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
- assert(bounds && "Expected bounds on the loop construct");
- lowerbounds.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
- upperbounds.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
- if (bounds->step)
- steps.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
- else // If `step` is not present, assume it is `1`.
- steps.push_back(builder.createIntegerConstant(
- currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
-
- Fortran::semantics::Symbol &ivSym =
- bounds->name.thing.symbol->GetUltimate();
- privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
- privateOperands, ivPrivate, privatizationRecipes);
-
- inclusiveBounds.push_back(true);
-
- // crtEval already updated when descending; no blind increment here.
- }
+ visitLoopControl(
+ converter, outerDoConstruct, loopsToProcess, eval,
+ [&](const Fortran::parser::LoopControl::Bounds &bounds,
+ mlir::Location loc) {
+ locs.push_back(loc);
+ lowerbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds.lower), stmtCtx)));
+ upperbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds.upper), stmtCtx)));
+ if (bounds.step)
+ steps.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds.step), stmtCtx)));
+ else // If `step` is not present, assume it is `1`.
+ steps.push_back(builder.createIntegerConstant(
+ currentLocation, upperbounds[upperbounds.size() - 1].getType(),
+ 1));
+ Fortran::semantics::Symbol &ivSym =
+ bounds.name.thing.symbol->GetUltimate();
+ privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
+ privateOperands, ivPrivate, privatizationRecipes);
+
+ inclusiveBounds.push_back(true);
+ });
}
}
@@ -2296,6 +2265,34 @@ static void remapDataOperandSymbols(
}
}
+static void privatizeInductionVariables(
+ Fortran::lower::AbstractConverter &converter,
+ mlir::Location currentLocation,
+ const Fortran::parser::DoConstruct &outerDoConstruct,
+ Fortran::lower::pft::Evaluation &eval,
+ llvm::SmallVector<mlir::Value> &privateOperands,
+ llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ &ivPrivate,
+ llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
+ llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) {
+ // ivTypes and locs will be ignored since no acc.loop control arguments will
+ // be created.
+ llvm::SmallVector<mlir::Type> ivTypes;
+ llvm::SmallVector<mlir::Location> ivLocs;
+ assert(!outerDoConstruct.IsDoConcurrent() &&
+ "do concurrent loops are not expected to contained earlty exits");
+ visitLoopControl(converter, outerDoConstruct, loopsToProcess, eval,
+ [&](const Fortran::parser::LoopControl::Bounds &bounds,
+ mlir::Location loc) {
+ locs.push_back(loc);
+ Fortran::semantics::Symbol &ivSym =
+ bounds.name.thing.symbol->GetUltimate();
+ privatizeIv(converter, ivSym, currentLocation, ivTypes,
+ ivLocs, privateOperands, ivPrivate,
+ privatizationRecipes);
+ });
+}
+
static mlir::acc::LoopOp buildACCLoopOp(
Fortran::lower::AbstractConverter &converter,
mlir::Location currentLocation,
@@ -2325,13 +2322,22 @@ static mlir::acc::LoopOp buildACCLoopOp(
llvm::SmallVector<mlir::Location> locs;
llvm::SmallVector<mlir::Value> lowerbounds, upperbounds, steps;
- // Look at the do/do concurrent loops to extract bounds information.
- processDoLoopBounds(converter, currentLocation, stmtCtx, builder,
- outerDoConstruct, eval, lowerbounds, upperbounds, steps,
- privateOperands, ivPrivate, privatizationRecipes, ivTypes,
- ivLocs, inclusiveBounds, locs, loopsToProcess);
-
- // Prepare the operand segment size attribute and the operands value range.
+ // Look at the do/do concurrent loops to extract bounds information unless
+ // this loop is lowered in an unstructured fashion, in which case bounds are
+ // not represented on acc.loop and explicit control flow is used inside body.
+ if (!eval.lowerAsUnstructured()) {
+ processDoLoopBounds(converter, currentLocation, stmtCtx, builder,
+ outerDoConstruct, eval, lowerbounds, upperbounds, steps,
+ privateOperands, ivPrivate, privatizationRecipes,
+ ivTypes, ivLocs, inclusiveBounds, locs, loopsToProcess);
+ } else {
+ // When the loop contains early exits, privatize induction variables, but do
+ // not create acc.loop bounds. The control flow of the loop will be
+ // generated explicitly in the acc.loop body that is just a container.
+ privatizeInductionVariables(converter, currentLocation, outerDoConstruct,
+ eval, privateOperands, ivPrivate,
+ privatizationRecipes, locs, loopsToProcess);
+ }
llvm::SmallVector<mlir::Value> operands;
llvm::SmallVector<int32_t> operandSegments;
addOperands(operands, operandSegments, lowerbounds);
@@ -2360,20 +2366,36 @@ static mlir::acc::LoopOp buildACCLoopOp(
// Remap symbols from data clauses to use data operation results
remapDataOperandSymbols(converter, builder, loopOp, dataOperandSymbolPairs);
- for (auto [arg, iv] :
- llvm::zip(loopOp.getLoopRegions().front()->front().getArguments(),
- ivPrivate)) {
- // Store block argument to the related iv private variable.
- mlir::Value privateValue =
- converter.getSymbolAddress(std::get<Fortran::semantics::SymbolRef>(iv));
- fir::StoreOp::create(builder, currentLocation, arg, privateValue);
+ if (!eval.lowerAsUnstructured()) {
+ for (auto [arg, iv] :
+ llvm::zip(loopOp.getLoopRegions().front()->front().getArguments(),
+ ivPrivate)) {
+ // Store block argument to the related iv private variable.
+ mlir::Value privateValue = converter.getSymbolAddress(
+ std::get<Fortran::semantics::SymbolRef>(iv));
+ fir::StoreOp::create(builder, currentLocation, arg, privateValue);
+ }
+ loopOp.setInclusiveUpperbound(inclusiveBounds);
+ } else {
+ loopOp.setUnstructuredAttr(builder.getUnitAttr());
}
- loopOp.setInclusiveUpperbound(inclusiveBounds);
-
return loopOp;
}
+static bool hasEarlyReturn(Fortran::lower::pft::Evaluation &eval) {
+ bool hasReturnStmt = false;
+ for (auto &e : eval.getNestedEvaluations()) {
+ e.visit(Fortran::common::visitors{
+ [&](const Fortran::parser::ReturnStmt &) { hasReturnStmt = true; },
+ [&](const auto &s) {},
+ });
+ if (e.hasNestedEvaluations())
+ hasReturnStmt = hasEarlyReturn(e);
+ }
+ return hasReturnStmt;
+}
+
static mlir::acc::LoopOp createLoopOp(
Fortran::lower::AbstractConverter &converter,
mlir::Location currentLocation,
@@ -2383,8 +2405,7 @@ static mlir::acc::LoopOp createLoopOp(
Fortran::lower::pft::Evaluation &eval,
const Fortran::parser::AccClauseList &accClauseList,
std::optional<mlir::acc::CombinedConstructsType> combinedConstructs =
- std::nullopt,
- bool needEarlyReturnHandling = false) {
+ std::nullopt) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
llvm::SmallVector<mlir::Value> tileOperands, privateOperands,
reductionOperands, cacheOperands, vectorOperands, workerNumOperands,
@@ -2560,7 +2581,10 @@ static mlir::acc::LoopOp createLoopOp(
llvm::SmallVector<mlir::Type> retTy;
mlir::Value yieldValue;
- if (needEarlyReturnHandling) {
+ if (eval.lowerAsUnstructured() && hasEarlyReturn(eval)) {
+ // When there is a return statement inside the loop, add a result to the
+ // acc.loop that will be used in a conditional branch after the loop to
+ // return.
mlir::Type i1Ty = builder.getI1Type();
yieldValue = builder.createIntegerConstant(currentLocation, i1Ty, 0);
retTy.push_back(i1Ty);
@@ -2641,19 +2665,6 @@ static mlir::acc::LoopOp createLoopOp(
return loopOp;
}
-static bool hasEarlyReturn(Fortran::lower::pft::Evaluation &eval) {
- bool hasReturnStmt = false;
- for (auto &e : eval.getNestedEvaluations()) {
- e.visit(Fortran::common::visitors{
- [&](const Fortran::parser::ReturnStmt &) { hasReturnStmt = true; },
- [&](const auto &s) {},
- });
- if (e.hasNestedEvaluations())
- hasReturnStmt = hasEarlyReturn(e);
- }
- return hasReturnStmt;
-}
-
static mlir::Value
genACC(Fortran::lower::AbstractConverter &converter,
Fortran::semantics::SemanticsContext &semanticsContext,
@@ -2667,17 +2678,6 @@ genACC(Fortran::lower::AbstractConverter &converter,
mlir::Location currentLocation =
converter.genLocation(beginLoopDirective.source);
- bool needEarlyExitHandling = false;
- if (eval.lowerAsUnstructured()) {
- needEarlyExitHandling = hasEarlyReturn(eval);
- // If the loop is lowered in an unstructured fashion, lowering generates
- // explicit control flow that duplicates the looping semantics of the
- // loops.
- if (!needEarlyExitHandling)
- TODO(currentLocation,
- "loop with early exit inside OpenACC loop construct");
- }
-
Fortran::lower::StatementContext stmtCtx;
assert(loopDirective.v == llvm::acc::ACCD_loop &&
@@ -2690,8 +2690,8 @@ genACC(Fortran::lower::AbstractConverter &converter,
std::get<std::optional<Fortran::parser::DoConstruct>>(loopConstruct.t);
auto loopOp = createLoopOp(converter, currentLocation, semanticsContext,
stmtCtx, *outerDoConstruct, eval, accClauseList,
- /*combinedConstructs=*/{}, needEarlyExitHandling);
- if (needEarlyExitHandling)
+ /*combinedConstructs=*/{});
+ if (loopOp.getNumResults() == 1)
return loopOp.getResult(0);
return mlir::Value{};
@@ -3476,10 +3476,6 @@ genACC(Fortran::lower::AbstractConverter &converter,
converter.genLocation(beginCombinedDirective.source);
Fortran::lower::StatementContext stmtCtx;
- if (eval.lowerAsUnstructured())
- TODO(currentLocation,
- "loop with early exit inside OpenACC combined construct");
-
if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) {
createComputeOp<mlir::acc::KernelsOp>(
converter, currentLocation, eval, semanticsContext, stmtCtx,
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index 0f60b4799100..2575b7081039 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -249,8 +249,10 @@ MAKE_EMPTY_CLASS(Groupprivate, Groupprivate);
MAKE_INCOMPLETE_CLASS(AdjustArgs, AdjustArgs);
MAKE_INCOMPLETE_CLASS(AppendArgs, AppendArgs);
+MAKE_INCOMPLETE_CLASS(Collector, Collector);
MAKE_INCOMPLETE_CLASS(GraphId, GraphId);
MAKE_INCOMPLETE_CLASS(GraphReset, GraphReset);
+MAKE_INCOMPLETE_CLASS(Inductor, Inductor);
MAKE_INCOMPLETE_CLASS(Replayable, Replayable);
MAKE_INCOMPLETE_CLASS(Transparent, Transparent);
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index 4c0d26642863..18b56d384b47 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -1309,34 +1309,22 @@ CUDAIntrinsicLibrary::genThisWarp(mlir::Type resultType,
// THREADFENCE
void CUDAIntrinsicLibrary::genThreadFence(
llvm::ArrayRef<fir::ExtendedValue> args) {
- constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.gl";
- mlir::FunctionType funcType =
- mlir::FunctionType::get(builder.getContext(), {}, {});
- auto funcOp = builder.createFunction(loc, funcName, funcType);
- llvm::SmallVector<mlir::Value> noArgs;
- fir::CallOp::create(builder, loc, funcOp, noArgs);
+ assert(args.size() == 0);
+ mlir::NVVM::MembarOp::create(builder, loc, mlir::NVVM::MemScopeKind::GPU);
}
// THREADFENCE_BLOCK
void CUDAIntrinsicLibrary::genThreadFenceBlock(
llvm::ArrayRef<fir::ExtendedValue> args) {
- constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.cta";
- mlir::FunctionType funcType =
- mlir::FunctionType::get(builder.getContext(), {}, {});
- auto funcOp = builder.createFunction(loc, funcName, funcType);
- llvm::SmallVector<mlir::Value> noArgs;
- fir::CallOp::create(builder, loc, funcOp, noArgs);
+ assert(args.size() == 0);
+ mlir::NVVM::MembarOp::create(builder, loc, mlir::NVVM::MemScopeKind::CTA);
}
// THREADFENCE_SYSTEM
void CUDAIntrinsicLibrary::genThreadFenceSystem(
llvm::ArrayRef<fir::ExtendedValue> args) {
- constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.sys";
- mlir::FunctionType funcType =
- mlir::FunctionType::get(builder.getContext(), {}, {});
- auto funcOp = builder.createFunction(loc, funcName, funcType);
- llvm::SmallVector<mlir::Value> noArgs;
- fir::CallOp::create(builder, loc, funcOp, noArgs);
+ assert(args.size() == 0);
+ mlir::NVVM::MembarOp::create(builder, loc, mlir::NVVM::MemScopeKind::SYS);
}
// TMA_BULK_COMMIT_GROUP
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 7b69b7d428a8..793be32400db 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -18,7 +18,6 @@
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
-#include "flang/Optimizer/OpenMP/Passes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/TypeSwitch.h"
diff --git a/flang/lib/Optimizer/OpenACC/Analysis/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Analysis/CMakeLists.txt
new file mode 100644
index 000000000000..e05d1456e6db
--- /dev/null
+++ b/flang/lib/Optimizer/OpenACC/Analysis/CMakeLists.txt
@@ -0,0 +1,22 @@
+add_flang_library(FIROpenACCAnalysis
+ FIROpenACCSupportAnalysis.cpp
+
+ DEPENDS
+ FIRAnalysis
+ FIRDialect
+ FIROpenACCSupport
+ HLFIRDialect
+
+ LINK_LIBS
+ FIRAnalysis
+ FIRDialect
+ FIROpenACCSupport
+ HLFIRDialect
+
+ MLIR_DEPS
+ MLIROpenACCDialect
+
+ MLIR_LIBS
+ MLIROpenACCDialect
+)
+
diff --git a/flang/lib/Optimizer/OpenACC/Analysis/FIROpenACCSupportAnalysis.cpp b/flang/lib/Optimizer/OpenACC/Analysis/FIROpenACCSupportAnalysis.cpp
new file mode 100644
index 000000000000..8cdbe1d5b170
--- /dev/null
+++ b/flang/lib/Optimizer/OpenACC/Analysis/FIROpenACCSupportAnalysis.cpp
@@ -0,0 +1,40 @@
+//===- FIROpenACCSupportAnalysis.cpp - FIR OpenACCSupport Analysis -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the FIR-specific OpenACCSupport analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/OpenACC/Analysis/FIROpenACCSupportAnalysis.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h"
+
+using namespace mlir;
+
+namespace fir {
+namespace acc {
+
+std::string FIROpenACCSupportAnalysis::getVariableName(Value v) {
+ return fir::acc::getVariableName(v, /*preferDemangledName=*/true);
+}
+
+std::string FIROpenACCSupportAnalysis::getRecipeName(mlir::acc::RecipeKind kind,
+ Type type, Value var) {
+ return fir::acc::getRecipeName(kind, type, var);
+}
+
+mlir::InFlightDiagnostic
+FIROpenACCSupportAnalysis::emitNYI(Location loc, const Twine &message) {
+ TODO(loc, message);
+ // Should be unreachable, but we return an actual diagnostic
+ // to satisfy the interface.
+ return mlir::emitError(loc, "not yet implemented: " + message.str());
+}
+
+} // namespace acc
+} // namespace fir
diff --git a/flang/lib/Optimizer/OpenACC/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/CMakeLists.txt
index 790b9fdb1589..16a40254dbfe 100644
--- a/flang/lib/Optimizer/OpenACC/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenACC/CMakeLists.txt
@@ -1,2 +1,3 @@
+add_subdirectory(Analysis)
add_subdirectory(Support)
add_subdirectory(Transforms)
diff --git a/flang/lib/Optimizer/OpenACC/Support/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Support/CMakeLists.txt
index 898fb00d41df..9c6f0ee74f4c 100644
--- a/flang/lib/Optimizer/OpenACC/Support/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenACC/Support/CMakeLists.txt
@@ -4,6 +4,7 @@ add_flang_library(FIROpenACCSupport
FIROpenACCAttributes.cpp
FIROpenACCOpsInterfaces.cpp
FIROpenACCTypeInterfaces.cpp
+ FIROpenACCUtils.cpp
RegisterOpenACCExtensions.cpp
DEPENDS
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
new file mode 100644
index 000000000000..e5b8123305c6
--- /dev/null
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
@@ -0,0 +1,269 @@
+//===- FIROpenACCUtils.cpp - FIR OpenACC Utilities ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utility functions for FIR OpenACC support.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Support/InternalNames.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/Interfaces/ViewLikeInterface.h"
+#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+
+namespace fir {
+namespace acc {
+
+std::string getVariableName(Value v, bool preferDemangledName) {
+ std::string srcName;
+ std::string prefix;
+ llvm::SmallVector<std::string, 4> arrayIndices;
+ bool iterate = true;
+ mlir::Operation *defOp;
+
+ // For integer constants, no need to further iterate - print their value
+ // immediately.
+ if (v.getDefiningOp()) {
+ IntegerAttr::ValueType val;
+ if (matchPattern(v.getDefiningOp(), m_ConstantInt(&val))) {
+ llvm::raw_string_ostream os(prefix);
+ val.print(os, /*isSigned=*/true);
+ return prefix;
+ }
+ }
+
+ while (v && (defOp = v.getDefiningOp()) && iterate) {
+ iterate =
+ llvm::TypeSwitch<mlir::Operation *, bool>(defOp)
+ .Case<mlir::ViewLikeOpInterface>(
+ [&v](mlir::ViewLikeOpInterface op) {
+ v = op.getViewSource();
+ return true;
+ })
+ .Case<fir::ReboxOp>([&v](fir::ReboxOp op) {
+ v = op.getBox();
+ return true;
+ })
+ .Case<fir::EmboxOp>([&v](fir::EmboxOp op) {
+ v = op.getMemref();
+ return true;
+ })
+ .Case<fir::ConvertOp>([&v](fir::ConvertOp op) {
+ v = op.getValue();
+ return true;
+ })
+ .Case<fir::LoadOp>([&v](fir::LoadOp op) {
+ v = op.getMemref();
+ return true;
+ })
+ .Case<fir::BoxAddrOp>([&v](fir::BoxAddrOp op) {
+ // The box holds the name of the variable.
+ v = op.getVal();
+ return true;
+ })
+ .Case<fir::AddrOfOp>([&](fir::AddrOfOp op) {
+ // Only use address_of symbol if mangled name is preferred
+ if (!preferDemangledName) {
+ auto symRef = op.getSymbol();
+ srcName = symRef.getLeafReference().getValue().str();
+ }
+ return false;
+ })
+ .Case<fir::ArrayCoorOp>([&](fir::ArrayCoorOp op) {
+ v = op.getMemref();
+ for (auto coor : op.getIndices()) {
+ auto idxName = getVariableName(coor, preferDemangledName);
+ arrayIndices.push_back(idxName.empty() ? "?" : idxName);
+ }
+ return true;
+ })
+ .Case<fir::CoordinateOp>([&](fir::CoordinateOp op) {
+ std::optional<llvm::ArrayRef<int32_t>> fieldIndices =
+ op.getFieldIndices();
+ if (fieldIndices && fieldIndices->size() > 0 &&
+ (*fieldIndices)[0] != fir::CoordinateOp::kDynamicIndex) {
+ int fieldId = (*fieldIndices)[0];
+ mlir::Type baseType =
+ fir::getFortranElementType(op.getRef().getType());
+ if (auto recType = llvm::dyn_cast<fir::RecordType>(baseType)) {
+ srcName = recType.getTypeList()[fieldId].first;
+ }
+ }
+ if (!srcName.empty()) {
+ // If the field name is known - attempt to continue building
+ // name by looking at its parents.
+ prefix =
+ getVariableName(op.getRef(), preferDemangledName) + "%";
+ }
+ return false;
+ })
+ .Case<hlfir::DesignateOp>([&](hlfir::DesignateOp op) {
+ if (op.getComponent()) {
+ srcName = op.getComponent().value().str();
+ prefix =
+ getVariableName(op.getMemref(), preferDemangledName) + "%";
+ return false;
+ }
+ for (auto coor : op.getIndices()) {
+ auto idxName = getVariableName(coor, preferDemangledName);
+ arrayIndices.push_back(idxName.empty() ? "?" : idxName);
+ }
+ v = op.getMemref();
+ return true;
+ })
+ .Case<fir::DeclareOp, hlfir::DeclareOp>([&](auto op) {
+ srcName = op.getUniqName().str();
+ return false;
+ })
+ .Case<fir::AllocaOp>([&](fir::AllocaOp op) {
+ if (preferDemangledName) {
+ // Prefer demangled name (bindc_name over uniq_name)
+ srcName = op.getBindcName() ? *op.getBindcName()
+ : op.getUniqName() ? *op.getUniqName()
+ : "";
+ } else {
+ // Prefer mangled name (uniq_name over bindc_name)
+ srcName = op.getUniqName() ? *op.getUniqName()
+ : op.getBindcName() ? *op.getBindcName()
+ : "";
+ }
+ return false;
+ })
+ .Default([](mlir::Operation *) { return false; });
+ }
+
+ // Fallback to the default implementation.
+ if (srcName.empty())
+ return acc::getVariableName(v);
+
+ // Build array index suffix if present
+ std::string suffix;
+ if (!arrayIndices.empty()) {
+ llvm::raw_string_ostream os(suffix);
+ os << "(";
+ llvm::interleaveComma(arrayIndices, os);
+ os << ")";
+ }
+
+ // Names from FIR operations may be mangled.
+ // When the demangled name is requested - demangle it.
+ if (preferDemangledName) {
+ auto [kind, deconstructed] = fir::NameUniquer::deconstruct(srcName);
+ if (kind != fir::NameUniquer::NameKind::NOT_UNIQUED)
+ return prefix + deconstructed.name + suffix;
+ }
+
+ return prefix + srcName + suffix;
+}
+
+bool areAllBoundsConstant(llvm::ArrayRef<Value> bounds) {
+ for (auto bound : bounds) {
+ auto dataBound =
+ mlir::dyn_cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp());
+ if (!dataBound)
+ return false;
+
+ // Check if this bound has constant values
+ bool hasConstant = false;
+ if (dataBound.getLowerbound() && dataBound.getUpperbound())
+ hasConstant =
+ fir::getIntIfConstant(dataBound.getLowerbound()).has_value() &&
+ fir::getIntIfConstant(dataBound.getUpperbound()).has_value();
+ else if (dataBound.getExtent())
+ hasConstant = fir::getIntIfConstant(dataBound.getExtent()).has_value();
+
+ if (!hasConstant)
+ return false;
+ }
+ return true;
+}
+
+static std::string getBoundsString(llvm::ArrayRef<Value> bounds) {
+ if (bounds.empty())
+ return "";
+
+ std::string boundStr;
+ llvm::raw_string_ostream os(boundStr);
+ os << "_section_";
+
+ llvm::interleave(
+ bounds,
+ [&](Value bound) {
+ auto boundsOp =
+ mlir::cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp());
+ if (boundsOp.getLowerbound() &&
+ fir::getIntIfConstant(boundsOp.getLowerbound()) &&
+ boundsOp.getUpperbound() &&
+ fir::getIntIfConstant(boundsOp.getUpperbound())) {
+ os << "lb" << *fir::getIntIfConstant(boundsOp.getLowerbound())
+ << ".ub" << *fir::getIntIfConstant(boundsOp.getUpperbound());
+ } else if (boundsOp.getExtent() &&
+ fir::getIntIfConstant(boundsOp.getExtent())) {
+ os << "ext" << *fir::getIntIfConstant(boundsOp.getExtent());
+ } else {
+ os << "?";
+ }
+ },
+ [&] { os << "x"; });
+
+ return os.str();
+}
+
+std::string getRecipeName(mlir::acc::RecipeKind kind, Type type, Value var,
+ llvm::ArrayRef<Value> bounds,
+ mlir::acc::ReductionOperator reductionOp) {
+ assert(fir::isa_fir_type(type) && "getRecipeName expects a FIR type");
+
+ // Build the complete prefix with all components before calling
+ // getTypeAsString
+ std::string prefixStr;
+ llvm::raw_string_ostream prefixOS(prefixStr);
+
+ switch (kind) {
+ case mlir::acc::RecipeKind::private_recipe:
+ prefixOS << "privatization";
+ // Private recipes do not currently include bounds in the name
+ // TODO: They should include them - but lowering tests would need to
+ // be updated.
+ break;
+ case mlir::acc::RecipeKind::firstprivate_recipe:
+ prefixOS << "firstprivatization";
+ // Add bounds to the prefix if applicable (only for firstprivate)
+ if (!bounds.empty() && areAllBoundsConstant(bounds))
+ prefixOS << getBoundsString(bounds);
+ break;
+ case mlir::acc::RecipeKind::reduction_recipe:
+ prefixOS << "reduction";
+ // Embed the reduction operator in the prefix
+ if (reductionOp != mlir::acc::ReductionOperator::AccNone)
+ prefixOS << "_"
+ << mlir::acc::stringifyReductionOperator(reductionOp).str();
+ // Add bounds to the prefix if applicable (only for reduction)
+ if (!bounds.empty() && areAllBoundsConstant(bounds))
+ prefixOS << getBoundsString(bounds);
+ break;
+ }
+
+ auto kindMap = var && var.getDefiningOp()
+ ? fir::getKindMapping(var.getDefiningOp())
+ : fir::KindMapping(type.getContext());
+ return fir::getTypeAsString(type, kindMap, prefixOS.str());
+}
+
+} // namespace acc
+} // namespace fir
diff --git a/flang/lib/Optimizer/OpenACC/Transforms/ACCInitializeFIRAnalyses.cpp b/flang/lib/Optimizer/OpenACC/Transforms/ACCInitializeFIRAnalyses.cpp
new file mode 100644
index 000000000000..679b29bb462b
--- /dev/null
+++ b/flang/lib/Optimizer/OpenACC/Transforms/ACCInitializeFIRAnalyses.cpp
@@ -0,0 +1,56 @@
+//===- ACCInitializeFIRAnalyses.cpp - Initialize FIR analyses ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass initializes analyses that can be reused by subsequent OpenACC
+// passes in the pipeline.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/OpenACC/Analysis/FIROpenACCSupportAnalysis.h"
+#include "flang/Optimizer/OpenACC/Passes.h"
+#include "mlir/Analysis/AliasAnalysis.h"
+#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"
+
+namespace fir {
+namespace acc {
+#define GEN_PASS_DEF_ACCINITIALIZEFIRANALYSES
+#include "flang/Optimizer/OpenACC/Passes.h.inc"
+} // namespace acc
+} // namespace fir
+
+#define DEBUG_TYPE "acc-initialize-fir-analyses"
+
+namespace {
+
+/// This pass initializes analyses for reuse by subsequent OpenACC passes in the
+/// pipeline. It creates and caches analyses like OpenACCSupport so they can be
+/// retrieved by later passes using getAnalysis() or getCachedAnalysis().
+class ACCInitializeFIRAnalysesPass
+ : public fir::acc::impl::ACCInitializeFIRAnalysesBase<
+ ACCInitializeFIRAnalysesPass> {
+public:
+ void runOnOperation() override {
+ // Initialize OpenACCSupport with FIR-specific implementation.
+ auto &openACCSupport = getAnalysis<mlir::acc::OpenACCSupport>();
+ openACCSupport.setImplementation(fir::acc::FIROpenACCSupportAnalysis());
+
+ // Initialize AliasAnalysis with FIR-specific implementation.
+ auto &aliasAnalysis = getAnalysis<mlir::AliasAnalysis>();
+ aliasAnalysis.addAnalysisImplementation(fir::AliasAnalysis());
+
+ // Mark all analyses as preserved since this pass only initializes them
+ markAllAnalysesPreserved();
+ }
+};
+
+} // namespace
+
+std::unique_ptr<mlir::Pass> fir::acc::createACCInitializeFIRAnalysesPass() {
+ return std::make_unique<ACCInitializeFIRAnalysesPass>();
+}
diff --git a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
index ed177baf52be..35aa87d6f1c8 100644
--- a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
@@ -1,11 +1,15 @@
add_flang_library(FIROpenACCTransforms
+ ACCInitializeFIRAnalyses.cpp
ACCRecipeBufferization.cpp
DEPENDS
FIROpenACCPassesIncGen
LINK_LIBS
+ FIRAnalysis
FIRDialect
+ FIROpenACCAnalysis
+ HLFIRDialect
MLIR_LIBS
MLIRIR
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index bd07d7fe01b8..d60da8971efd 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -477,58 +477,6 @@ class MapInfoFinalizationPass
return false;
}
- mlir::omp::MapInfoOp genBoxcharMemberMap(mlir::omp::MapInfoOp op,
- fir::FirOpBuilder &builder) {
- if (!op.getMembers().empty())
- return op;
- mlir::Location loc = op.getVarPtr().getLoc();
- mlir::Value boxChar = op.getVarPtr();
-
- if (mlir::isa<fir::ReferenceType>(op.getVarPtr().getType()))
- boxChar = fir::LoadOp::create(builder, loc, op.getVarPtr());
-
- fir::BoxCharType boxCharType =
- mlir::dyn_cast<fir::BoxCharType>(boxChar.getType());
- mlir::Value boxAddr = fir::BoxOffsetOp::create(
- builder, loc, op.getVarPtr(), fir::BoxFieldAttr::base_addr);
-
- mlir::ArrayAttr newMembersAttr;
- llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}};
- newMembersAttr = builder.create2DI64ArrayAttr(memberIdx);
-
- mlir::Value varPtr = op.getVarPtr();
- mlir::omp::MapInfoOp memberMapInfoOp = mlir::omp::MapInfoOp::create(
- builder, op.getLoc(), varPtr.getType(), varPtr,
- mlir::TypeAttr::get(boxCharType.getEleTy()),
- builder.getAttr<mlir::omp::ClauseMapFlagsAttr>(
- mlir::omp::ClauseMapFlags::to |
- mlir::omp::ClauseMapFlags::implicit),
- builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
- mlir::omp::VariableCaptureKind::ByRef),
- /*varPtrPtr=*/boxAddr,
- /*members=*/llvm::SmallVector<mlir::Value>{},
- /*member_index=*/mlir::ArrayAttr{},
- /*bounds=*/op.getBounds(),
- /*mapperId=*/mlir::FlatSymbolRefAttr(), /*name=*/op.getNameAttr(),
- builder.getBoolAttr(false));
-
- mlir::omp::MapInfoOp newMapInfoOp = mlir::omp::MapInfoOp::create(
- builder, op.getLoc(), op.getResult().getType(), varPtr,
- mlir::TypeAttr::get(
- llvm::cast<mlir::omp::PointerLikeType>(varPtr.getType())
- .getElementType()),
- op.getMapTypeAttr(), op.getMapCaptureTypeAttr(),
- /*varPtrPtr=*/mlir::Value{},
- /*members=*/llvm::SmallVector<mlir::Value>{memberMapInfoOp},
- /*member_index=*/newMembersAttr,
- /*bounds=*/llvm::SmallVector<mlir::Value>{},
- /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(),
- /*partial_map=*/builder.getBoolAttr(false));
- op.replaceAllUsesWith(newMapInfoOp.getResult());
- op->erase();
- return newMapInfoOp;
- }
-
// Expand mappings of type(C_PTR) to map their `__address` field explicitly
// as a single pointer-sized member (USM-gated at callsite). This helps in
// USM scenarios to ensure the pointer-sized mapping is used.
@@ -956,6 +904,14 @@ class MapInfoFinalizationPass
baseAddr.erase();
}
+ static bool hasADescriptor(mlir::Operation *varOp, mlir::Type varType) {
+ if (fir::isTypeWithDescriptor(varType) ||
+ mlir::isa<fir::BoxCharType>(varType) ||
+ mlir::isa_and_present<fir::BoxAddrOp>(varOp))
+ return true;
+ return false;
+ }
+
// This pass executes on omp::MapInfoOp's containing descriptor based types
// (allocatables, pointers, assumed shape etc.) and expanding them into
// multiple omp::MapInfoOp's for each pointer member contained within the
@@ -1209,36 +1165,6 @@ class MapInfoFinalizationPass
return mlir::WalkResult::advance();
});
- func->walk([&](mlir::omp::MapInfoOp op) {
- if (!op.getMembers().empty())
- return;
-
- if (!mlir::isa<fir::BoxCharType>(fir::unwrapRefType(op.getVarType())))
- return;
-
- // POSSIBLE_HACK_ALERT: If the boxchar has been implicitly mapped then
- // it is likely that the underlying pointer to the data
- // (!fir.ref<fir.char<k,?>>) has already been mapped. So, skip such
- // boxchars. We are primarily interested in boxchars that were mapped
- // by passes such as MapsForPrivatizedSymbols that map boxchars that
- // are privatized. At present, such boxchar maps are not marked
- // implicit. Should they be? I don't know. If they should be then
- // we need to change this check for early return OR live with
- // over-mapping.
- bool hasImplicitMap =
- (op.getMapType() & mlir::omp::ClauseMapFlags::implicit) ==
- mlir::omp::ClauseMapFlags::implicit;
- if (hasImplicitMap)
- return;
-
- assert(llvm::hasSingleElement(op->getUsers()) &&
- "OMPMapInfoFinalization currently only supports single users "
- "of a MapInfoOp");
-
- builder.setInsertionPoint(op);
- genBoxcharMemberMap(op, builder);
- });
-
// Expand type(C_PTR) only when unified_shared_memory is required,
// to ensure device-visible pointer size/behavior in USM scenarios
// without changing default expectations elsewhere.
@@ -1266,9 +1192,8 @@ class MapInfoFinalizationPass
"OMPMapInfoFinalization currently only supports single users "
"of a MapInfoOp");
- if (fir::isTypeWithDescriptor(op.getVarType()) ||
- mlir::isa_and_present<fir::BoxAddrOp>(
- op.getVarPtr().getDefiningOp())) {
+ if (hasADescriptor(op.getVarPtr().getDefiningOp(),
+ fir::unwrapRefType(op.getVarType()))) {
builder.setInsertionPoint(op);
mlir::Operation *targetUser = getFirstTargetUser(op);
assert(targetUser && "expected user of map operation was not found");
diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
index e1e6125fc348..8019c399f377 100644
--- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
+++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
@@ -718,6 +718,31 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr,
return convertRecordType(recTy, fileAttr, scope, declOp);
} else if (auto tupleTy = mlir::dyn_cast_if_present<mlir::TupleType>(Ty)) {
return convertTupleType(tupleTy, fileAttr, scope, declOp);
+ } else if (mlir::isa<mlir::FunctionType>(Ty)) {
+ // Handle function types - these represent procedure pointers after the
+ // BoxedProcedure pass has run and unwrapped the fir.boxproc type, as well
+ // as dummy procedures (which are represented as function types in FIR)
+ llvm::SmallVector<mlir::LLVM::DITypeAttr> types;
+
+ auto funcTy = mlir::cast<mlir::FunctionType>(Ty);
+ // Add return type (or void if no return type)
+ if (funcTy.getNumResults() == 0)
+ types.push_back(mlir::LLVM::DINullTypeAttr::get(context));
+ else
+ types.push_back(
+ convertType(funcTy.getResult(0), fileAttr, scope, declOp));
+
+ for (mlir::Type paramTy : funcTy.getInputs())
+ types.push_back(convertType(paramTy, fileAttr, scope, declOp));
+
+ auto subroutineTy = mlir::LLVM::DISubroutineTypeAttr::get(
+ context, /*callingConvention=*/0, types);
+
+ return mlir::LLVM::DIDerivedTypeAttr::get(
+ context, llvm::dwarf::DW_TAG_pointer_type,
+ mlir::StringAttr::get(context, ""), subroutineTy,
+ /*sizeInBits=*/ptrSize * 8, /*alignInBits=*/0, /*offset=*/0,
+ /*optional<address space>=*/std::nullopt, /*extra data=*/nullptr);
} else if (auto refTy = mlir::dyn_cast_if_present<fir::ReferenceType>(Ty)) {
auto elTy = refTy.getEleTy();
return convertPointerLikeType(elTy, fileAttr, scope, declOp,
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index d7db15dd3794..91308281f2cb 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -5466,6 +5466,7 @@ CHECK_SIMPLE_CLAUSE(Affinity, OMPC_affinity)
CHECK_SIMPLE_CLAUSE(AppendArgs, OMPC_append_args)
CHECK_SIMPLE_CLAUSE(Bind, OMPC_bind)
CHECK_SIMPLE_CLAUSE(Capture, OMPC_capture)
+CHECK_SIMPLE_CLAUSE(Collector, OMPC_collector)
CHECK_SIMPLE_CLAUSE(Compare, OMPC_compare)
CHECK_SIMPLE_CLAUSE(Contains, OMPC_contains)
CHECK_SIMPLE_CLAUSE(Default, OMPC_default)
@@ -5487,6 +5488,7 @@ CHECK_SIMPLE_CLAUSE(Holds, OMPC_holds)
CHECK_SIMPLE_CLAUSE(Inbranch, OMPC_inbranch)
CHECK_SIMPLE_CLAUSE(Inclusive, OMPC_inclusive)
CHECK_SIMPLE_CLAUSE(Indirect, OMPC_indirect)
+CHECK_SIMPLE_CLAUSE(Inductor, OMPC_inductor)
CHECK_SIMPLE_CLAUSE(Initializer, OMPC_initializer)
CHECK_SIMPLE_CLAUSE(Init, OMPC_init)
CHECK_SIMPLE_CLAUSE(Link, OMPC_link)
diff --git a/flang/test/Driver/convert.f90 b/flang/test/Driver/convert.f90
index 0ba31d2188cd..0b4da0282f3a 100755
--- a/flang/test/Driver/convert.f90
+++ b/flang/test/Driver/convert.f90
@@ -1,5 +1,5 @@
! Ensure argument -fconvert=<value> accepts all relevant options and produces an
-! error if an invalid value is specified.
+! error if an invalid value is specified.
!--------------------------
! FLANG DRIVER (flang)
diff --git a/flang/test/Driver/do_concurrent_to_omp_cli.f90 b/flang/test/Driver/do_concurrent_to_omp_cli.f90
index bdb603f35639..e44db04fb2ce 100644
--- a/flang/test/Driver/do_concurrent_to_omp_cli.f90
+++ b/flang/test/Driver/do_concurrent_to_omp_cli.f90
@@ -3,12 +3,12 @@
! RUN: %flang --help | FileCheck %s --check-prefix=FLANG
! FLANG: -fdo-concurrent-to-openmp=<value>
-! FLANG-NEXT: Try to map `do concurrent` loops to OpenMP [none|host|device]
+! FLANG-NEXT: Try to map `do concurrent` loops to OpenMP [none|host|device]
! RUN: bbc --help | FileCheck %s --check-prefix=BBC
! BBC: -fdo-concurrent-to-openmp=<string>
-! BBC-SAME: Try to map `do concurrent` loops to OpenMP [none|host|device]
+! BBC-SAME: Try to map `do concurrent` loops to OpenMP [none|host|device]
! RUN: %flang -c -fdo-concurrent-to-openmp=host %s 2>&1 \
! RUN: | FileCheck %s --check-prefix=OPT
diff --git a/flang/test/Driver/emit-mlir.f90 b/flang/test/Driver/emit-mlir.f90
index de5a62d6bc7f..f2a4b6cf7670 100644
--- a/flang/test/Driver/emit-mlir.f90
+++ b/flang/test/Driver/emit-mlir.f90
@@ -21,7 +21,7 @@
! CHECK-NEXT: func.func @main(%arg0: i32, %arg1: !llvm.ptr, %arg2: !llvm.ptr) -> i32 {
! CHECK-NEXT: %c0_i32 = arith.constant 0 : i32
! CHECK-NEXT: %0 = fir.zero_bits !fir.ref<tuple<i32, !fir.ref<!fir.array<0xtuple<!fir.ref<i8>, !fir.ref<i8>>>>>>
-! CHECK-NEXT: fir.call @_FortranAProgramStart(%arg0, %arg1, %arg2, %0) {{.*}} : (i32, !llvm.ptr, !llvm.ptr, !fir.ref<tuple<i32, !fir.ref<!fir.array<0xtuple<!fir.ref<i8>, !fir.ref<i8>>>>>>)
+! CHECK-NEXT: fir.call @_FortranAProgramStart(%arg0, %arg1, %arg2, %0) {{.*}} : (i32, !llvm.ptr, !llvm.ptr, !fir.ref<tuple<i32, !fir.ref<!fir.array<0xtuple<!fir.ref<i8>, !fir.ref<i8>>>>>>)
! CHECK-NEXT: fir.call @_QQmain() fastmath<contract> : () -> ()
! CHECK-NEXT: fir.call @_FortranAProgramEndStatement() {{.*}} : () -> ()
! CHECK-NEXT: return %c0_i32 : i32
diff --git a/flang/test/Driver/fatal-errors-parsing.f90 b/flang/test/Driver/fatal-errors-parsing.f90
index 185a6e08481d..fd8e167a4807 100644
--- a/flang/test/Driver/fatal-errors-parsing.f90
+++ b/flang/test/Driver/fatal-errors-parsing.f90
@@ -7,7 +7,7 @@ program p
! CHECK2: fatal-errors-parsing.f90:{{.*}} error:
continue
end
-
+
subroutine s
contains
! CHECK1-NOT: error:
diff --git a/flang/test/Driver/fatal-errors-semantics.f90 b/flang/test/Driver/fatal-errors-semantics.f90
index 54740dd6deec..3d3f64225288 100644
--- a/flang/test/Driver/fatal-errors-semantics.f90
+++ b/flang/test/Driver/fatal-errors-semantics.f90
@@ -37,4 +37,3 @@ module m
call soa(null())
end
end
- \ No newline at end of file
diff --git a/flang/test/Driver/flang-ld-aarch64.f90 b/flang/test/Driver/flang-ld-aarch64.f90
index 61cd46cea5cd..4039859327a3 100644
--- a/flang/test/Driver/flang-ld-aarch64.f90
+++ b/flang/test/Driver/flang-ld-aarch64.f90
@@ -1,4 +1,4 @@
-! Check linker flags for AArch64 linux, since it needs both libgcc and
+! Check linker flags for AArch64 linux, since it needs both libgcc and
! compiler-rt, with compiler-rt second when -rtlib=libgcc.
! RUN: %flang -### -rtlib=libgcc --target=aarch64-linux-gnu %S/Inputs/hello.f90 2>&1 | FileCheck %s
diff --git a/flang/test/Driver/flang-ld-powerpc.f90 b/flang/test/Driver/flang-ld-powerpc.f90
index 5328077ac21a..90586793a666 100644
--- a/flang/test/Driver/flang-ld-powerpc.f90
+++ b/flang/test/Driver/flang-ld-powerpc.f90
@@ -4,7 +4,7 @@
!! -static-libflang_rt in the future. Need to add that option here.
!! Because flang-rt currently only supports
-!! LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON, use
+!! LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON, use
!! resource_dir_with_per_target_subdir as inputs.
! Check powerpc64-ibm-aix 64-bit linking to static flang-rt by default
@@ -26,7 +26,7 @@
! AIX64-LD-PER-TARGET-DEFAULT-NOT: "-L/[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64-ibm-aix"
-! Check powerpc64-ibm-aix 64-bit linking to static flang-rt by option
+! Check powerpc64-ibm-aix 64-bit linking to static flang-rt by option
! RUN: %flang -static-libflangrt -Werror %s -### 2>&1 \
! RUN: --target=powerpc64-ibm-aix \
! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \
@@ -44,7 +44,7 @@
! AIX64-LD-PER-TARGET-STATIC-SAME: "-lpthread"
-! Check powerpc64-ibm-aix 64-bit linking to shared flang-rt by option
+! Check powerpc64-ibm-aix 64-bit linking to shared flang-rt by option
! RUN: %flang -shared-libflangrt -Werror %s -### 2>&1 \
! RUN: --target=powerpc64-ibm-aix \
! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \
diff --git a/flang/test/Driver/gcc-toolchain-install-dir.f90 b/flang/test/Driver/gcc-toolchain-install-dir.f90
index e195bdde6d2c..05b73bcc6a2e 100644
--- a/flang/test/Driver/gcc-toolchain-install-dir.f90
+++ b/flang/test/Driver/gcc-toolchain-install-dir.f90
@@ -5,10 +5,10 @@
! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=i386-unknown-linux-gnu --gcc-install-dir=%S/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0 | FileCheck %s --check-prefix=CHECK-I386
! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=i386-unknown-linux-gnu --gcc-toolchain=%S/Inputs/basic_cross_linux_tree/usr | FileCheck %s --check-prefix=CHECK-I386
! CHECK-I386: Selected GCC installation: [[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0
-! CHECK-I386: "-fc1" "-triple" "i386-unknown-linux-gnu"
+! CHECK-I386: "-fc1" "-triple" "i386-unknown-linux-gnu"
! CHECK-I386: "[[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/../../../../i386-unknown-linux-gnu/bin{{/|\\\\}}as"
! CHECK-I386: "[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/../../../../i386-unknown-linux-gnu/bin{{/|\\\\}}ld" {{.*}} "-m" "elf_i386"
-! CHECK-I386-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0"
+! CHECK-I386-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0"
! CHECK-I386-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/../../../../i386-unknown-linux-gnu/lib"
! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=x86_64-unknown-linux-gnu --gcc-install-dir=%S/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0 | FileCheck %s --check-prefix=CHECK-X86-64
@@ -17,5 +17,5 @@
! CHECK-X86-64: "-fc1" "-triple" "x86_64-unknown-linux-gnu"
! CHECK-X86-64: "[[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/../../../../x86_64-unknown-linux-gnu/bin{{/|\\\\}}as" "--64"
! CHECK-X86-64: "[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/../../../../x86_64-unknown-linux-gnu/bin{{/|\\\\}}ld" {{.*}} "-m" "elf_x86_64"
-! CHECK-X86-64-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0"
+! CHECK-X86-64-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0"
! CHECK-X86-64-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/../../../../x86_64-unknown-linux-gnu/lib"
diff --git a/flang/test/Driver/large-data-threshold.f90 b/flang/test/Driver/large-data-threshold.f90
index 6a7eef79559d..fa2d4aef911e 100644
--- a/flang/test/Driver/large-data-threshold.f90
+++ b/flang/test/Driver/large-data-threshold.f90
@@ -5,8 +5,8 @@
! RUN: %flang -### -c --target=x86_64 -mlarge-data-threshold=32768 %s 2>&1 | FileCheck %s --check-prefix=NO-MCMODEL
! RUN: %flang -### -c --target=x86_64 -mcmodel=small -mlarge-data-threshold=32768 %s 2>&1 | FileCheck %s --check-prefix=NO-MCMODEL
! RUN: not %flang -### -c --target=aarch64 -mcmodel=small -mlarge-data-threshold=32768 %s 2>&1 | FileCheck %s --check-prefix=NOT-SUPPORTED
-
-
+
+
! CHECK: "{{.*}}flang" "-fc1"
! CHECK-SAME: "-mlarge-data-threshold=32768"
! CHECK-59000: "{{.*}}flang" "-fc1"
diff --git a/flang/test/Driver/lto-fatlto.f90 b/flang/test/Driver/lto-fatlto.f90
index c52d6e386ef0..2ea251eafacb 100644
--- a/flang/test/Driver/lto-fatlto.f90
+++ b/flang/test/Driver/lto-fatlto.f90
@@ -1,5 +1,5 @@
! REQUIRES: x86-registered-target
-! checks fatlto objects: that valid bitcode is included in the object file generated.
+! checks fatlto objects: that valid bitcode is included in the object file generated.
! RUN: %flang -fc1 -triple x86_64-unknown-linux-gnu -flto -ffat-lto-objects -emit-obj %s -o %t.o
! RUN: llvm-readelf -S %t.o | FileCheck %s --check-prefixes=ELF
diff --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90
index eb5165e36c91..0138d9b15274 100644
--- a/flang/test/Driver/mlir-debug-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90
@@ -100,7 +100,7 @@ end program
! ALL-NEXT: CSE
! ALL-NEXT: (S) 0 num-cse'd - Number of operations CSE'd
! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
-! ALL-NEXT: MIFOpConversion
+! ALL-NEXT: MIFOpConversion
! ALL-NEXT: BoxedProcedurePass
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
@@ -109,10 +109,10 @@ end program
! ALL-NEXT: 'func.func' Pipeline
! ALL-NEXT: AbstractResultOpt
! ALL-NEXT: 'gpu.module' Pipeline
-! ALL-NEXT: Pipeline Collection : ['func.func', 'gpu.func']
-! ALL-NEXT: 'func.func' Pipeline
+! ALL-NEXT: Pipeline Collection : ['func.func', 'gpu.func']
+! ALL-NEXT: 'func.func' Pipeline
! ALL-NEXT: AbstractResultOpt
-! ALL-NEXT: 'gpu.func' Pipeline
+! ALL-NEXT: 'gpu.func' Pipeline
! ALL-NEXT: AbstractResultOpt
! ALL-NEXT: 'omp.declare_reduction' Pipeline
! ALL-NEXT: AbstractResultOpt
diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index 3b6a9d7cda7e..0d68191fedc1 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -142,7 +142,7 @@ end program
! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
! O2-NEXT: 'func.func' Pipeline
! O2-NEXT: SetRuntimeCallAttributes
-! ALL-NEXT: MIFOpConversion
+! ALL-NEXT: MIFOpConversion
! ALL-NEXT: BoxedProcedurePass
! O2-NEXT: AddAliasTags
@@ -152,10 +152,10 @@ end program
! ALL-NEXT: 'func.func' Pipeline
! ALL-NEXT: AbstractResultOpt
! ALL-NEXT: 'gpu.module' Pipeline
-! ALL-NEXT: Pipeline Collection : ['func.func', 'gpu.func']
-! ALL-NEXT: 'func.func' Pipeline
+! ALL-NEXT: Pipeline Collection : ['func.func', 'gpu.func']
+! ALL-NEXT: 'func.func' Pipeline
! ALL-NEXT: AbstractResultOpt
-! ALL-NEXT: 'gpu.func' Pipeline
+! ALL-NEXT: 'gpu.func' Pipeline
! ALL-NEXT: AbstractResultOpt
! ALL-NEXT: 'omp.declare_reduction' Pipeline
! ALL-NEXT: AbstractResultOpt
diff --git a/flang/test/Driver/multiple-actions-error.f95 b/flang/test/Driver/multiple-actions-error.f95
index 5ec4e9166657..3b2b7dc26d2c 100644
--- a/flang/test/Driver/multiple-actions-error.f95
+++ b/flang/test/Driver/multiple-actions-error.f95
@@ -1,8 +1,30 @@
-! Verify that the frontend driver error-out if multiple actions are specified
-
-! RUN: not %flang_fc1 -E -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix=ERROR
-! RUN: not %flang_fc1 -fsyntax-only -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix=ERROR
-
-! ERROR: error: Only one action option is allowed
-
-end progream
+! Verify that the frontend driver raises the expected error when multiple
+! actions are specified.
+!
+! RUN: not %flang_fc1 -fsyntax-only -fsyntax-only %s 2>&1 \
+! RUN: | FileCheck %s --check-prefixes=ERROR,ACTIONS-1
+!
+! RUN: not %flang_fc1 -E -fsyntax-only %s 2>&1 \
+! RUN: | FileCheck %s --check-prefixes=ERROR,ACTIONS-2
+!
+! RUN: not %flang_fc1 -fsyntax-only -E -emit-llvm %s 2>&1 \
+! RUN: | FileCheck %s --check-prefixes=ERROR,ACTIONS-3
+!
+! If one or more options are specified with -Xflang, they will appear last in
+! the error message.
+!
+! RUN: not %flang -S -Xflang -emit-llvm %s 2>&1 \
+! RUN: | FileCheck %s --check-prefixes=ERROR,ACTIONS-4
+!
+! RUN: not %flang -Xflang -emit-llvm -S %s 2>&1 \
+! RUN: | FileCheck %s --check-prefixes=ERROR,ACTIONS-4
+!
+! RUN: not %flang -Xflang -emit-obj -S -Xflang -emit-llvm %s 2>&1 \
+! RUN: | FileCheck %s --check-prefixes=ERROR,ACTIONS-5
+!
+! ERROR: error: only one action option is allowed.
+! ACTIONS-1: Got '-fsyntax-only', '-fsyntax-only'
+! ACTIONS-2: Got '-E', '-fsyntax-only'
+! ACTIONS-3: Got '-fsyntax-only', '-E', '-emit-llvm'
+! ACTIONS-4: Got '-S', '-emit-llvm'
+! ACTIONS-5: Got '-S', '-emit-obj', '-emit-llvm'
diff --git a/flang/test/Driver/multiple-fc1-input.f90 b/flang/test/Driver/multiple-fc1-input.f90
index 57f7c5e92b4c..e142f358b6c1 100644
--- a/flang/test/Driver/multiple-fc1-input.f90
+++ b/flang/test/Driver/multiple-fc1-input.f90
@@ -5,5 +5,5 @@
! RUN: %flang_fc1 -emit-fir %s %s -o - | FileCheck %s
subroutine foo()
end subroutine
-! CHECK: func @_QPfoo()
-! CHECK: func @_QPfoo()
+! CHECK: func @_QPfoo()
+! CHECK: func @_QPfoo()
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index 09248572b9ff..8660bec7e1ea 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -1,9 +1,9 @@
-! Test that flang OpenMP and OpenMP offload related
-! commands forward or expand to the appropriate commands
+! Test that flang OpenMP and OpenMP offload related
+! commands forward or expand to the appropriate commands
! for flang -fc1 as expected. Assumes a gfx90a, aarch64,
-! and sm_70 architecture, but doesn't require one to be
-! installed or compiled for, just testing the appropriate
-! generation of jobs are created with the correct
+! and sm_70 architecture, but doesn't require one to be
+! installed or compiled for, just testing the appropriate
+! generation of jobs are created with the correct
! corresponding arguments.
! Test regular -fopenmp with no offload
@@ -47,7 +47,7 @@
! OFFLOAD-DEVICE-NEXT: "{{[^"]*}}flang" "-fc1" "-triple" "nvptx64-nvidia-cuda"
! OFFLOAD-DEVICE-NOT: "{{[^"]*}}flang" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
-! Test regular -fopenmp with offload for basic fopenmp-is-target-device flag addition and correct fopenmp
+! Test regular -fopenmp with offload for basic fopenmp-is-target-device flag addition and correct fopenmp
! RUN: %flang -### -fopenmp --offload-arch=gfx90a -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib %s 2>&1 | FileCheck --check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
! CHECK-OPENMP-IS-TARGET-DEVICE: "{{[^"]*}}flang" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-target-device" {{.*}}.f90"
@@ -169,7 +169,7 @@
! RUN: | FileCheck %s --check-prefixes=CHECK-OPENMP-VERSION
! CHECK-OPENMP-VERSION: "{{[^"]*}}flang" "-fc1" {{.*}} "-fopenmp" "-fopenmp-version=45" {{.*}}.f90"
-! Test diagnostic error when host IR file is non-existent
+! Test diagnostic error when host IR file is non-existent
! RUN: not %flang_fc1 %s -o %t 2>&1 -fopenmp -fopenmp-is-target-device \
! RUN: -fopenmp-host-ir-file-path non-existant-file.bc \
! RUN: | FileCheck %s --check-prefix=HOST-IR-MISSING
diff --git a/flang/test/Driver/tune-cpu-fir.f90 b/flang/test/Driver/tune-cpu-fir.f90
index 43c13b426d5d..843feebfa12c 100644
--- a/flang/test/Driver/tune-cpu-fir.f90
+++ b/flang/test/Driver/tune-cpu-fir.f90
@@ -14,7 +14,7 @@
! ARMTUNE-SAME: fir.tune_cpu = "neoverse-n1"
! ARMBOTH-SAME: fir.target_cpu = "aarch64"
-! ARMBOTH-SAME: fir.tune_cpu = "neoverse-n1"
+! ARMBOTH-SAME: fir.tune_cpu = "neoverse-n1"
! X86CPU-SAME: fir.target_cpu = "x86-64"
! X86CPU-NOT: fir.tune_cpu = "pentium4"
diff --git a/flang/test/Driver/version-loops.f90 b/flang/test/Driver/version-loops.f90
index d206393a04f4..c4caf4688ab4 100644
--- a/flang/test/Driver/version-loops.f90
+++ b/flang/test/Driver/version-loops.f90
@@ -1,22 +1,22 @@
-! Test that flang forwards the -f{no-,}version-loops-for-stride
+! Test that flang forwards the -f{no-,}version-loops-for-stride
! options correctly to flang -fc1 for different variants of optimisation
! and explicit flags.
! RUN: %flang -### %s -o %t 2>&1 -O3 \
! RUN: | FileCheck %s
-
+
! RUN: %flang -### %s -o %t 2>&1 -O2 \
! RUN: | FileCheck %s --check-prefix=CHECK-O2
! RUN: %flang -### %s -o %t 2>&1 -O2 -fversion-loops-for-stride \
! RUN: | FileCheck %s --check-prefix=CHECK-O2-with
-
+
! RUN: %flang -### %s -o %t 2>&1 -O4 \
! RUN: | FileCheck %s --check-prefix=CHECK-O4
-
+
! RUN: %flang -### %s -o %t 2>&1 -Ofast \
! RUN: | FileCheck %s --check-prefix=CHECK-Ofast
-
+
! RUN: %flang -### %s -o %t 2>&1 -Ofast -fno-version-loops-for-stride \
! RUN: | FileCheck %s --check-prefix=CHECK-Ofast-no
@@ -29,12 +29,12 @@
! CHECK-O2: "{{.*}}flang" "-fc1"
! CHECK-O2-NOT: "-fversion-loops-for-stride"
-! CHECK-O2-SAME: "-O2"
+! CHECK-O2-SAME: "-O2"
! CHECK-O2-with: "{{.*}}flang" "-fc1"
! CHECK-O2-with-SAME: "-fversion-loops-for-stride"
-! CHECK-O2-with-SAME: "-O2"
-
+! CHECK-O2-with-SAME: "-O2"
+
! CHECK-O4: "{{.*}}flang" "-fc1"
! CHECK-O4-SAME: "-fversion-loops-for-stride"
! CHECK-O4-SAME: "-O3"
diff --git a/flang/test/Evaluate/folding33.f90 b/flang/test/Evaluate/folding33.f90
index fb5a23cf1f20..299cb7e1731a 100644
--- a/flang/test/Evaluate/folding33.f90
+++ b/flang/test/Evaluate/folding33.f90
@@ -1,4 +1,4 @@
!RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s
-!CHECK: warning: overflow on REAL(4) to REAL(2) conversion after folding a call to 'exp' [-Wfolding-exception]
+!CHECK: warning: overflow on compilation-time evaluation of a call to 'exp' [-Wfolding-exception]
print *, exp((11.265625_2,1._2))
end
diff --git a/flang/test/Integration/debug-proc-ptr-e2e.f90 b/flang/test/Integration/debug-proc-ptr-e2e.f90
new file mode 100644
index 000000000000..aa89160b7c8f
--- /dev/null
+++ b/flang/test/Integration/debug-proc-ptr-e2e.f90
@@ -0,0 +1,26 @@
+! RUN: %flang_fc1 -emit-llvm -debug-info-kind=standalone %s -o - | FileCheck %s
+
+program test_proc_ptr
+ implicit none
+ procedure(fun1), pointer :: fun_ptr
+
+ fun_ptr => fun1
+ print *, fun_ptr(3)
+
+contains
+ integer function fun1(x)
+ integer :: x
+ fun1 = x + 1
+ end function fun1
+end program test_proc_ptr
+
+! Check that fun_ptr is declared with correct type
+! CHECK-DAG: ![[INT:.*]] = !DIBasicType(name: "integer", size: 32, encoding: DW_ATE_signed)
+! CHECK-DAG: ![[PTR_INT:.*]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[INT]], size: 64)
+
+! Check that fun_ptr variable is a pointer to a subroutine type
+! The order is: DILocalVariable -> pointer type -> subroutine type -> {return, params}
+! CHECK-DAG: ![[FUN_PTR_VAR:.*]] = !DILocalVariable(name: "fun_ptr", {{.*}}type: ![[PROC_PTR:[0-9]+]]
+! CHECK-DAG: ![[PROC_PTR]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[SUBR_TYPE:[0-9]+]], size: 64)
+! CHECK-DAG: ![[SUBR_TYPE]] = !DISubroutineType(types: ![[SUBR_TYPES:[0-9]+]])
+! CHECK-DAG: ![[SUBR_TYPES]] = !{![[INT]], ![[PTR_INT]]}
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 9f8f74a0c7b5..3a255afd5926 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -22,9 +22,6 @@ attributes(global) subroutine devsub()
call syncthreads()
call syncwarp(1)
- call threadfence()
- call threadfence_block()
- call threadfence_system()
ret = syncthreads_and(1)
res = syncthreads_and(tid > offset)
ret = syncthreads_count(1)
@@ -106,9 +103,6 @@ end
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: nvvm.barrier0
! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32
-! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath<contract> : () -> ()
-! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath<contract> : () -> ()
-! CHECK: fir.call @llvm.nvvm.membar.sys() fastmath<contract> : () -> ()
! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.and(%c1{{.*}}) fastmath<contract> : (i32) -> i32
! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: %[[B:.*]] = fir.load %{{.*}} : !fir.ref<i32>
diff --git a/flang/test/Lower/CUDA/cuda-synchronization.cuf b/flang/test/Lower/CUDA/cuda-synchronization.cuf
new file mode 100644
index 000000000000..6e2e23423c36
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-synchronization.cuf
@@ -0,0 +1,14 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+! Test CUDA Fortran instrinsics lowerings for synchronization.
+
+attributes(global) subroutine sync()
+ call threadfence()
+ call threadfence_block()
+ call threadfence_system()
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsync() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK: nvvm.memory.barrier <gpu>
+! CHECK: nvvm.memory.barrier <cta>
+! CHECK: nvvm.memory.barrier <sys>
diff --git a/flang/test/Lower/OpenACC/acc-unstructured.f90 b/flang/test/Lower/OpenACC/acc-unstructured.f90
index c42c7dddc5ca..829ed5486c19 100644
--- a/flang/test/Lower/OpenACC/acc-unstructured.f90
+++ b/flang/test/Lower/OpenACC/acc-unstructured.f90
@@ -1,5 +1,4 @@
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
-! XFAIL: *
subroutine test_unstructured1(a, b, c)
integer :: i, j, k
@@ -55,10 +54,11 @@ subroutine test_unstructured2(a, b, c)
! CHECK-LABEL: func.func @_QPtest_unstructured2
! CHECK: acc.parallel
-! CHECK: acc.loop
+! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
! CHECK: fir.call @_FortranAStopStatementText
! CHECK: acc.yield
! CHECK: acc.yield
+! CHECK: } attributes {independent = [#acc.device_type<none>], unstructured}
! CHECK: acc.yield
end subroutine
diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90
index f3b939780c2b..a6394ea19699 100644
--- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90
+++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90
@@ -156,7 +156,7 @@ end subroutine target_allocatable
! CHECK-SAME: %[[REAL_ARR_DESC_MAP]] -> %[[MAPPED_ARG2:[^,]+]]
! CHECK-SAME: %[[CHAR_VAR_DESC_MAP]] -> %[[MAPPED_ARG3:.[^,]+]]
! CHECK-SAME: %[[MAPPED_MI0]] -> %[[MAPPED_ARG0:[^,]+]]
-! CHECK-SAME: !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.ref<!fir.boxchar<1>>, !fir.ref<i32>, !fir.llvm_ptr<!fir.ref<i32>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.boxchar<1>>
+! CHECK-SAME: !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.ref<!fir.boxchar<1>>, !fir.ref<i32>, !fir.llvm_ptr<!fir.ref<i32>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
! CHECK-SAME: private(
! CHECK-SAME: @[[ALLOC_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[ALLOC_ARG:[^,]+]] [map_idx=0],
! CHECK-SAME: @[[REAL_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[REAL_ARG:[^,]+]],
diff --git a/flang/test/Lower/OpenMP/map-character.f90 b/flang/test/Lower/OpenMP/map-character.f90
index cefd3ac0e54f..9a114238fa9e 100644
--- a/flang/test/Lower/OpenMP/map-character.f90
+++ b/flang/test/Lower/OpenMP/map-character.f90
@@ -39,8 +39,11 @@ end subroutine TestOfCharacter
!CHECK: %[[A1_UB:.*]] = arith.subi %[[UNBOXED_ARG1]]#1, %[[CONST_ONE]] : index
!CHECK: %[[BOUNDS_A1_BOXCHAR:.*]] = omp.map.bounds lower_bound(%[[CONST_ZERO]] : index) upper_bound(%[[A1_UB]] : index) extent(%[[UNBOXED_ARG1]]#1 : index)
!CHECK-SAME: stride(%[[CONST_ONE]] : index) start_idx(%[[CONST_ZERO]] : index) {stride_in_bytes = true}
-!CHECK: %[[A1_BOXCHAR_MAP:.*]] = omp.map.info var_ptr(%[[A1_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(implicit, to)
-!CHECK-SAME: capture(ByRef) bounds(%[[BOUNDS_A1_BOXCHAR]]) -> !fir.ref<!fir.boxchar<1>> {name = ""}
+!CHECK: %[[A1_BOX_ADDR:.*]] = fir.box_offset %[[A1_BOXCHAR_ALLOCA]] base_addr : (!fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
+!CHECK: %[[A1_BOXCHAR_MAP:.*]] = omp.map.info var_ptr(%[[A1_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>, !fir.char<1,?>) map_clauses(implicit, to)
+!CHECK-SAME: capture(ByRef) var_ptr_ptr(%[[A1_BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) bounds(%[[BOUNDS_A1_BOXCHAR]]) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>> {name = ""}
+!CHECK: %[[A1_BOXCHAR_MAP_2:.*]] = omp.map.info var_ptr(%[[A1_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>)
+!CHECK-SAME: map_clauses(implicit, to) capture(ByRef) members(%[[A1_BOXCHAR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) -> !fir.ref<!fir.boxchar<1>> {name = ""}
!CHECK: fir.store %[[ARG0]] to %[[A0_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>
!CHECK: %[[CONST_ZERO:.*]] = arith.constant 0 : index
!CHECK: %[[CONST_ONE:.*]] = arith.constant 1 : index
@@ -48,9 +51,12 @@ end subroutine TestOfCharacter
!CHECK: %[[A0_UB:.*]] = arith.subi %[[UNBOXED_ARG0]]#1, %[[CONST_ONE]] : index
!CHECK: %[[BOUNDS_A0_BOXCHAR:.*]] = omp.map.bounds lower_bound(%[[CONST_ZERO]] : index) upper_bound(%[[A0_UB]] : index) extent(%[[UNBOXED_ARG0]]#1 : index)
!CHECK-SAME: stride(%[[CONST_ONE]] : index) start_idx(%[[CONST_ZERO]] : index) {stride_in_bytes = true}
-!CHECK: %[[A0_BOXCHAR_MAP:.*]] = omp.map.info var_ptr(%[[A0_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(implicit, to)
-!CHECK-SAME: capture(ByRef) bounds(%[[BOUNDS_A0_BOXCHAR]]) -> !fir.ref<!fir.boxchar<1>> {name = ""}
-!CHECK: omp.target map_entries(%[[A0_MAP]] -> %[[TGT_A0:.*]], %[[A1_MAP]] -> %[[TGT_A1:.*]], %[[A1_BOXCHAR_MAP]] -> %[[TGT_A1_BOXCHAR:.*]], %[[A0_BOXCHAR_MAP]] -> %[[TGT_A0_BOXCHAR:.*]] : !fir.ref<!fir.char<1,?>>, !fir.ref<!fir.char<1,?>>, !fir.ref<!fir.boxchar<1>>, !fir.ref<!fir.boxchar<1>>) {
+!CHECK: %[[A0_BOX_ADDR:.*]] = fir.box_offset %[[A0_BOXCHAR_ALLOCA]] base_addr : (!fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
+!CHECK: %[[A0_BOXCHAR_MAP:.*]] = omp.map.info var_ptr(%[[A0_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>, !fir.char<1,?>) map_clauses(implicit, to)
+!CHECK-SAME: capture(ByRef) var_ptr_ptr(%[[A0_BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) bounds(%24) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>> {name = ""}
+!CHECK: %[[A0_BOXCHAR_MAP_2:.*]] = omp.map.info var_ptr(%[[A0_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(implicit, to)
+!CHECK-SAME: capture(ByRef) members(%[[A0_BOXCHAR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) -> !fir.ref<!fir.boxchar<1>> {name = ""}
+!CHECK: omp.target map_entries(%[[A0_MAP]] -> %[[TGT_A0:.*]], %[[A1_MAP]] -> %[[TGT_A1:.*]], %[[A1_BOXCHAR_MAP_2]] -> %[[TGT_A1_BOXCHAR:.*]], %[[A0_BOXCHAR_MAP_2]] -> %[[TGT_A0_BOXCHAR:.*]], %[[A1_BOXCHAR_MAP]] -> %[[TGT_A1_BOXCHAR2:.*]], %[[A0_BOXCHAR_MAP]] -> %[[TGT_A0_BOXCHAR2:.*]] : !fir.ref<!fir.char<1,?>>, !fir.ref<!fir.char<1,?>>, !fir.ref<!fir.boxchar<1>>, !fir.ref<!fir.boxchar<1>>, !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>, !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) {
!CHECK: %[[TGT_A0_BC_LD:.*]] = fir.load %[[TGT_A0_BOXCHAR]] : !fir.ref<!fir.boxchar<1>>
!CHECK: %[[TGT_A1_BC_LD:.*]] = fir.load %[[TGT_A1_BOXCHAR]] : !fir.ref<!fir.boxchar<1>>
!CHECK: %[[UNBOXED_TGT_A1:.*]]:2 = fir.unboxchar %[[TGT_A1_BC_LD]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
diff --git a/flang/test/Lower/OpenMP/optional-argument-map-2.f90 b/flang/test/Lower/OpenMP/optional-argument-map-2.f90
index 791d509028de..a787ad78dfaf 100644
--- a/flang/test/Lower/OpenMP/optional-argument-map-2.f90
+++ b/flang/test/Lower/OpenMP/optional-argument-map-2.f90
@@ -71,11 +71,10 @@ end module mod
! CHECK-FPRIV: %[[VAL_12:.*]]:2 = fir.unboxchar %[[VAL_8]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
! CHECK-FPRIV: %[[VAL_13:.*]] = arith.subi %[[VAL_12]]#1, %[[VAL_11]] : index
! CHECK-FPRIV: %[[VAL_14:.*]] = omp.map.bounds lower_bound(%[[VAL_10]] : index) upper_bound(%[[VAL_13]] : index) extent(%[[VAL_12]]#1 : index) stride(%[[VAL_11]] : index) start_idx(%[[VAL_10]] : index) {stride_in_bytes = true}
-! CHECK-FPRIV: %[[VAL_15:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.boxchar<1>>
! CHECK-FPRIV: %[[VAL_16:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
-! CHECK-FPRIV: %[[VAL_17:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.char<1,?>) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr(%[[VAL_16]] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) bounds(%[[VAL_14]]) -> !fir.ref<!fir.boxchar<1>>
-! CHECK-FPRIV: %[[VAL_18:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(to) capture(ByRef) members(%[[VAL_17]] : [0] : !fir.ref<!fir.boxchar<1>>) -> !fir.ref<!fir.boxchar<1>>
-! CHECK-FPRIV: omp.target map_entries(%[[VAL_7]] -> %[[VAL_19:.*]], %[[VAL_18]] -> %[[VAL_20:.*]], %[[VAL_17]] -> %[[VAL_21:.*]] : !fir.ref<!fir.char<1,4>>, !fir.ref<!fir.boxchar<1>>, !fir.ref<!fir.boxchar<1>>) private(@_QMmodFroutine_boxcharEa_firstprivate_boxchar_c8xU %[[VAL_3]]#0 -> %[[VAL_22:.*]] [map_idx=1] : !fir.boxchar<1>) {
+! CHECK-FPRIV: %[[VAL_17:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.char<1,?>) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[VAL_16]] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) bounds(%[[VAL_14]]) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>> {name = ""}
+! CHECK-FPRIV: %[[VAL_18:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(to) capture(ByRef) members(%[[VAL_17]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) -> !fir.ref<!fir.boxchar<1>>
+! CHECK-FPRIV: omp.target map_entries(%[[VAL_7]] -> %[[VAL_19:.*]], %[[VAL_18]] -> %[[VAL_20:.*]], %[[VAL_17]] -> %[[VAL_21:.*]] : !fir.ref<!fir.char<1,4>>, !fir.ref<!fir.boxchar<1>>, !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) private(@_QMmodFroutine_boxcharEa_firstprivate_boxchar_c8xU %[[VAL_3]]#0 -> %[[VAL_22:.*]] [map_idx=1] : !fir.boxchar<1>) {
! CHECK-FPRIV: %[[VAL_23:.*]] = arith.constant 4 : index
! CHECK-FPRIV: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] typeparams %[[VAL_23]] {uniq_name = "_QMmodFroutine_boxcharEb"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>)
! CHECK-FPRIV: %[[VAL_25:.*]]:2 = fir.unboxchar %[[VAL_22]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
@@ -103,14 +102,16 @@ end module mod
! CHECK-NO-FPRIV: %[[VAL_19:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
! CHECK-NO-FPRIV: %[[VAL_20:.*]] = arith.subi %[[VAL_19]]#1, %[[VAL_18]] : index
! CHECK-NO-FPRIV: %[[VAL_21:.*]] = omp.map.bounds lower_bound(%[[VAL_17]] : index) upper_bound(%[[VAL_20]] : index) extent(%[[VAL_19]]#1 : index) stride(%[[VAL_18]] : index) start_idx(%[[VAL_17]] : index) {stride_in_bytes = true}
-! CHECK-NO-FPRIV: %[[VAL_22:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(implicit, to) capture(ByRef) bounds(%[[VAL_21]]) -> !fir.ref<!fir.boxchar<1>> {name = ""}
-! CHECK-NO-FPRIV: omp.target map_entries(%[[VAL_7]] -> %[[VAL_23:.*]], %[[VAL_16]] -> %[[VAL_24:.*]], %[[VAL_22]] -> %[[VAL_25:.*]] : !fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,?>>, !fir.ref<!fir.boxchar<1>>) {
-! CHECK-NO-FPRIV: %[[VAL_26:.*]] = fir.load %[[VAL_25]] : !fir.ref<!fir.boxchar<1>>
-! CHECK-NO-FPRIV: %[[VAL_27:.*]]:2 = fir.unboxchar %[[VAL_26]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
-! CHECK-NO-FPRIV: %[[VAL_28:.*]] = arith.constant 4 : index
-! CHECK-NO-FPRIV: %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_23]] typeparams %[[VAL_28]] {uniq_name = "_QMmodFroutine_boxcharEb"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>)
-! CHECK-NO-FPRIV: %[[VAL_30:.*]]:2 = hlfir.declare %[[VAL_24]] typeparams %[[VAL_27]]#1 {fortran_attrs = #fir.var_attrs<intent_in, optional>, uniq_name = "_QMmodFroutine_boxcharEa"} : (!fir.ref<!fir.char<1,?>>, index) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
-! CHECK-NO-FPRIV: hlfir.assign %[[VAL_30]]#0 to %[[VAL_29]]#0 : !fir.boxchar<1>, !fir.ref<!fir.char<1,4>>
+! CHECK-NO-FPRIV: %[[VAL_22:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
+! CHECK-NO-FPRIV: %[[VAL_23:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.char<1,?>) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr(%[[VAL_22]] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) bounds(%14) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>> {name = ""}
+! CHECK-NO-FPRIV: %[[VAL_24:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_23]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) -> !fir.ref<!fir.boxchar<1>> {name = ""}
+! CHECK-NO-FPRIV: omp.target map_entries(%[[VAL_7]] -> %[[VAL_25:.*]], %[[VAL_16]] -> %[[VAL_26:.*]], %[[VAL_24]] -> %[[VAL_27:.*]], %[[VAL_23]] -> %[[VAL_28:.*]] : !fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,?>>, !fir.ref<!fir.boxchar<1>>, !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) {
+! CHECK-NO-FPRIV: %[[VAL_29:.*]] = fir.load %[[VAL_27]] : !fir.ref<!fir.boxchar<1>>
+! CHECK-NO-FPRIV: %[[VAL_30:.*]]:2 = fir.unboxchar %[[VAL_29]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK-NO-FPRIV: %[[VAL_31:.*]] = arith.constant 4 : index
+! CHECK-NO-FPRIV: %[[VAL_32:.*]]:2 = hlfir.declare %[[VAL_25]] typeparams %[[VAL_31]] {uniq_name = "_QMmodFroutine_boxcharEb"} : (!fir.ref<!fir.char<1,4>>, index) -> (!fir.ref<!fir.char<1,4>>, !fir.ref<!fir.char<1,4>>)
+! CHECK-NO-FPRIV: %[[VAL_33:.*]]:2 = hlfir.declare %[[VAL_26]] typeparams %[[VAL_30]]#1 {fortran_attrs = #fir.var_attrs<intent_in, optional>, uniq_name = "_QMmodFroutine_boxcharEa"} : (!fir.ref<!fir.char<1,?>>, index) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+! CHECK-NO-FPRIV: hlfir.assign %[[VAL_33]]#0 to %[[VAL_32]]#0 : !fir.boxchar<1>, !fir.ref<!fir.char<1,4>>
! CHECK-NO-FPRIV: omp.terminator
! CHECK-NO-FPRIV: }
! CHECK-NO-FPRIV: return
diff --git a/flang/test/Semantics/structconst12.f90 b/flang/test/Semantics/structconst12.f90
new file mode 100644
index 000000000000..345016b236c8
--- /dev/null
+++ b/flang/test/Semantics/structconst12.f90
@@ -0,0 +1,12 @@
+!RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s
+!CHECK: TYPE(t) :: x = t(pp=f)
+!CHECK-NOT: error:
+interface
+ function f()
+ end
+end interface
+type t
+ procedure(f), nopass, pointer :: pp
+end type
+type(t) :: x = t(pp=f)
+end
diff --git a/flang/test/Transforms/OpenACC/acc-implicit-copy-reduction.fir b/flang/test/Transforms/OpenACC/acc-implicit-copy-reduction.fir
new file mode 100644
index 000000000000..d0fc5b7a2ee0
--- /dev/null
+++ b/flang/test/Transforms/OpenACC/acc-implicit-copy-reduction.fir
@@ -0,0 +1,134 @@
+// RUN: fir-opt %s --pass-pipeline="builtin.module(acc-initialize-fir-analyses,acc-implicit-data{enable-implicit-reduction-copy=true})" -split-input-file | FileCheck %s --check-prefix=COPY
+// RUN: fir-opt %s --pass-pipeline="builtin.module(acc-initialize-fir-analyses,acc-implicit-data{enable-implicit-reduction-copy=false})" -split-input-file | FileCheck %s --check-prefix=FIRSTPRIVATE
+
+// Test case: integer reduction in parallel loop
+// This corresponds to Fortran code:
+// integer :: r, i
+// r = 0
+// !$acc parallel
+// !$acc loop gang reduction(+:r)
+// do i = 1, N
+// r = r + 1
+// enddo
+// !$acc end parallel
+
+acc.reduction.recipe @reduction_add_ref_i32 : !fir.ref<i32> reduction_operator <add> init {
+^bb0(%arg0: !fir.ref<i32>):
+ %c0_i32 = arith.constant 0 : i32
+ %0 = fir.alloca i32
+ %1 = fir.declare %0 {uniq_name = "acc.reduction.init"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ fir.store %c0_i32 to %1 : !fir.ref<i32>
+ acc.yield %1 : !fir.ref<i32>
+} combiner {
+^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
+ %0 = fir.load %arg0 : !fir.ref<i32>
+ %1 = fir.load %arg1 : !fir.ref<i32>
+ %2 = arith.addi %0, %1 : i32
+ fir.store %2 to %arg0 : !fir.ref<i32>
+ acc.yield %arg0 : !fir.ref<i32>
+}
+
+func.func @test_reduction_implicit_copy() {
+ %c1_i32 = arith.constant 1 : i32
+ %cN = arith.constant 100 : i32
+ %r = fir.alloca i32 {bindc_name = "r", uniq_name = "_QFEr"}
+ %i = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
+ %r_decl = fir.declare %r {uniq_name = "_QFEr"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %i_decl = fir.declare %i {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %c0_i32 = arith.constant 0 : i32
+ fir.store %c0_i32 to %r_decl : !fir.ref<i32>
+
+ acc.parallel {
+ %red_var = acc.reduction varPtr(%r_decl : !fir.ref<i32>) -> !fir.ref<i32> {name = "r"}
+ acc.loop reduction(@reduction_add_ref_i32 -> %red_var : !fir.ref<i32>) control(%iv : i32) = (%c1_i32 : i32) to (%cN : i32) step (%c1_i32 : i32) {
+ fir.store %iv to %i_decl : !fir.ref<i32>
+ %cur_r = fir.load %red_var : !fir.ref<i32>
+ %new_r = arith.addi %cur_r, %c1_i32 : i32
+ fir.store %new_r to %red_var : !fir.ref<i32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.yield
+ }
+ return
+}
+
+// When enable-implicit-reduction-copy=true: expect copyin/copyout for reduction variable
+// COPY: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "r"}
+// COPY: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<i32>) to varPtr({{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "r"}
+
+// When enable-implicit-reduction-copy=false: expect firstprivate for reduction variable
+// FIRSTPRIVATE: acc.firstprivate varPtr({{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "r"}
+// FIRSTPRIVATE-NOT: acc.copyin
+// FIRSTPRIVATE-NOT: acc.copyout
+
+// -----
+
+// Test case: reduction variable used both in loop and outside (should be firstprivate)
+// This corresponds to Fortran code:
+// integer :: r = 0, i, out
+// !$acc parallel num_gangs(1)
+// !$acc loop reduction(+:r) copyout(out)
+// do i = 1, N
+// r = r + 1
+// enddo
+// out = r
+// !$acc end parallel
+
+acc.reduction.recipe @reduction_add_ref_i32 : !fir.ref<i32> reduction_operator <add> init {
+^bb0(%arg0: !fir.ref<i32>):
+ %c0_i32 = arith.constant 0 : i32
+ %0 = fir.alloca i32
+ %1 = fir.declare %0 {uniq_name = "acc.reduction.init"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ fir.store %c0_i32 to %1 : !fir.ref<i32>
+ acc.yield %1 : !fir.ref<i32>
+} combiner {
+^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
+ %0 = fir.load %arg0 : !fir.ref<i32>
+ %1 = fir.load %arg1 : !fir.ref<i32>
+ %2 = arith.addi %0, %1 : i32
+ fir.store %2 to %arg0 : !fir.ref<i32>
+ acc.yield %arg0 : !fir.ref<i32>
+}
+
+func.func @test_reduction_with_usage_outside_loop() {
+ %c1_i32 = arith.constant 1 : i32
+ %cN = arith.constant 100 : i32
+ %c0_i32 = arith.constant 0 : i32
+
+ %r = fir.alloca i32 {bindc_name = "r", uniq_name = "_QFEr"}
+ %i = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
+ %out = fir.alloca i32 {bindc_name = "out", uniq_name = "_QFEout"}
+
+ %r_decl = fir.declare %r {uniq_name = "_QFEr"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %i_decl = fir.declare %i {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %out_decl = fir.declare %out {uniq_name = "_QFEout"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ fir.store %c0_i32 to %r_decl : !fir.ref<i32>
+
+ %out_copyout = acc.create varPtr(%out_decl : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_copyout>, name = "out"}
+ acc.parallel dataOperands(%out_copyout : !fir.ref<i32>) {
+ %red_var = acc.reduction varPtr(%r_decl : !fir.ref<i32>) -> !fir.ref<i32> {name = "r"}
+ acc.loop reduction(@reduction_add_ref_i32 -> %red_var : !fir.ref<i32>) control(%iv : i32) = (%c1_i32 : i32) to (%cN : i32) step (%c1_i32 : i32) {
+ fir.store %iv to %i_decl : !fir.ref<i32>
+ %cur_r = fir.load %red_var : !fir.ref<i32>
+ %new_r = arith.addi %cur_r, %c1_i32 : i32
+ fir.store %new_r to %red_var : !fir.ref<i32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ // out = r (usage of r outside the loop)
+ %final_r = fir.load %r_decl : !fir.ref<i32>
+ fir.store %final_r to %out_copyout : !fir.ref<i32>
+ acc.yield
+ }
+ acc.copyout accPtr(%out_copyout : !fir.ref<i32>) to varPtr(%out_decl : !fir.ref<i32>) {dataClause = #acc<data_clause acc_copyout>, name = "out"}
+ return
+}
+
+// In this case, r should be firstprivate regardless of the flag setting because it's used outside the reduction context
+// COPY-LABEL: func.func @test_reduction_with_usage_outside_loop
+// COPY: acc.firstprivate varPtr({{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "r"}
+// COPY-NOT: acc.copyin varPtr({{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {{.*}} name = "r"
+
+// FIRSTPRIVATE-LABEL: func.func @test_reduction_with_usage_outside_loop
+// FIRSTPRIVATE: acc.firstprivate varPtr({{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "r"}
+// FIRSTPRIVATE-NOT: acc.copyin varPtr({{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {{.*}} name = "r"
+
diff --git a/flang/test/Transforms/OpenACC/acc-implicit-data-derived-type-member.F90 b/flang/test/Transforms/OpenACC/acc-implicit-data-derived-type-member.F90
new file mode 100644
index 000000000000..71e7d79b7260
--- /dev/null
+++ b/flang/test/Transforms/OpenACC/acc-implicit-data-derived-type-member.F90
@@ -0,0 +1,38 @@
+!RUN: rm -rf %t && mkdir %t && cd %t && \
+!RUN: bbc %s -fopenacc -emit-hlfir -o - \
+!RUN: | fir-opt --pass-pipeline="builtin.module(acc-initialize-fir-analyses,acc-implicit-data)" \
+!RUN: | FileCheck %s
+
+! This test exercises whether the ACCImplicitData pass inserts its new
+! data operations in appropriate position so that parents are copied in before
+! their children.
+
+module types
+ type derivc8r4
+ complex(8) :: member0
+ real(4) :: member1
+ end type derivc8r4
+end module
+program test
+ use types
+ implicit none
+ type (derivc8r4) :: d2
+ type (derivc8r4) :: d4
+ integer(4) :: i0
+ d2%member0 = 123
+ !$acc serial copyin(d2%member0) copyout(d4%member0)
+ do i0 = 1, 1
+ d4%member0 = d2%member0
+ end do
+ !$acc end serial
+end program
+
+!CHECK: acc.copyin {{.*}} {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "d2"}
+!CHECK: acc.copyin {{.*}} {name = "d2%member0"}
+!CHECK: acc.copyin {{.*}} {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "d4"}
+!CHECK: acc.create {{.*}} {dataClause = #acc<data_clause acc_copyout>, name = "d4%member0"}
+!CHECK: acc.delete {{.*}} {dataClause = #acc<data_clause acc_copyin>, name = "d2%member0"}
+!CHECK: acc.copyout {{.*}} {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "d2"}
+!CHECK: acc.copyout {{.*}} {name = "d4%member0"}
+!CHECK: acc.copyout {{.*}} {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "d4"}
+
diff --git a/flang/test/Transforms/OpenACC/acc-implicit-data-fortran.F90 b/flang/test/Transforms/OpenACC/acc-implicit-data-fortran.F90
new file mode 100644
index 000000000000..228aba1b1164
--- /dev/null
+++ b/flang/test/Transforms/OpenACC/acc-implicit-data-fortran.F90
@@ -0,0 +1,79 @@
+!RUN: rm -rf %t && mkdir %t && cd %t && \
+!RUN: bbc %s -fopenacc -emit-hlfir -o - \
+!RUN: | fir-opt --pass-pipeline="builtin.module(acc-initialize-fir-analyses,acc-implicit-data)" \
+!RUN: | FileCheck %s --check-prefix=CHECKHLFIR
+
+!RUN: rm -rf %t && mkdir %t && cd %t && \
+!RUN: bbc %s -fopenacc -emit-hlfir -o - \
+!RUN: | fir-opt --pass-pipeline="builtin.module(cse,acc-initialize-fir-analyses,acc-implicit-data)" \
+!RUN: | FileCheck %s --check-prefix=CHECKCSE
+
+!RUN: rm -rf %t && mkdir %t && cd %t && \
+!RUN: bbc %s -fopenacc -emit-fir -o - \
+!RUN: | fir-opt --pass-pipeline="builtin.module(cse,acc-initialize-fir-analyses,acc-implicit-data)" \
+!RUN: | FileCheck %s --check-prefix=CHECKCSE
+
+! This test uses bbc to generate both HLFIR and FIR for this test. The intent is
+! that it is exercising the acc implicit data pipeline and ensures that
+! correct clauses are generated. It also runs CSE which eliminates redundant
+! interior pointer computations (and thus different live-ins are found).
+
+program main
+ type aggr
+ real :: field
+ end type
+ type nested
+ type(aggr) :: outer
+ end type
+ type(aggr) :: aggrvar
+ type(nested) :: nestaggrvar
+ real :: scalarvar
+ real :: arrayvar(10)
+ complex :: scalarcomp
+
+ aggrvar%field = 1
+ scalarvar = aggrvar%field
+ nestaggrvar%outer%field = scalarvar
+ scalarcomp = scalarvar
+ arrayvar = real(scalarcomp)
+ arrayvar(2) = aggrvar%field
+
+ !$acc kernels
+ arrayvar = aggrvar%field + scalarvar + nestaggrvar%outer%field + real(scalarcomp) + arrayvar(2)
+ !$acc end kernels
+
+ !$acc parallel
+ arrayvar = aggrvar%field + scalarvar + nestaggrvar%outer%field + real(scalarcomp) + arrayvar(2)
+ !$acc end parallel
+end program
+
+!CHECKHLFIR-LABEL: @_QQmain
+!CHECKHLFIR-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar"}
+!CHECKHLFIR-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
+!CHECKHLFIR-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.type<_QFTnested{outer:!fir.type<_QFTaggr{field:f32}>}>>) -> !fir.ref<!fir.type<_QFTnested{outer:!fir.type<_QFTaggr{field:f32}>}>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "nestaggrvar"}
+!CHECKHLFIR-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarcomp"}
+!CHECKHLFIR-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarvar"}
+!CHECKHLFIR: acc.kernels
+!CHECKHLFIR-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar"}
+!CHECKHLFIR-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
+!CHECKHLFIR-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.type<_QFTnested{outer:!fir.type<_QFTaggr{field:f32}>}>>) -> !fir.ref<!fir.type<_QFTnested{outer:!fir.type<_QFTaggr{field:f32}>}>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "nestaggrvar"}
+!CHECKHLFIR-DAG: acc.firstprivate varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {implicit = true, name = "scalarcomp"}
+!CHECKHLFIR-DAG: acc.firstprivate varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {implicit = true, name = "scalarvar"}
+!CHECKHLFIR: acc.parallel
+
+!CHECKCSE-LABEL: @_QQmain
+!CHECKCSE-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
+!CHECKCSE-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarcomp"}
+!CHECKCSE-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarvar"}
+!CHECKCSE-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar%field"}
+!CHECKCSE-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "nestaggrvar%outer%field"}
+!CHECKCSE-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar(2)"}
+!CHECKCSE: acc.kernels
+!CHECKCSE-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
+!CHECKCSE-DAG: acc.firstprivate varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {implicit = true, name = "scalarcomp"}
+!CHECKCSE-DAG: acc.firstprivate varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {implicit = true, name = "scalarvar"}
+!CHECKCSE-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar%field"}
+!CHECKCSE-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "nestaggrvar%outer%field"}
+!CHECKCSE-DAG: acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar(2)"}
+!CHECKCSE: acc.parallel
+
diff --git a/flang/test/Transforms/OpenACC/acc-implicit-data.fir b/flang/test/Transforms/OpenACC/acc-implicit-data.fir
new file mode 100644
index 000000000000..7f6a57cb4d8c
--- /dev/null
+++ b/flang/test/Transforms/OpenACC/acc-implicit-data.fir
@@ -0,0 +1,358 @@
+// RUN: fir-opt %s --pass-pipeline="builtin.module(acc-initialize-fir-analyses,acc-implicit-data)" -split-input-file | FileCheck %s
+
+// -----
+
+func.func @test_fir_scalar_in_serial() {
+ %livein = fir.alloca i64 {bindc_name = "scalarvar"}
+ acc.serial {
+ %load = fir.load %livein : !fir.ref<i64>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: acc.firstprivate varPtr({{.*}} : !fir.ref<i64>) -> !fir.ref<i64> {implicit = true, name = "scalarvar"}
+
+// -----
+
+func.func @test_fir_scalar_in_parallel() {
+ %livein = fir.alloca f32 {bindc_name = "scalarvar"}
+ acc.parallel {
+ %load = fir.load %livein : !fir.ref<f32>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: acc.firstprivate varPtr({{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {implicit = true, name = "scalarvar"}
+
+// -----
+
+func.func @test_fir_scalar_in_kernels() {
+ %livein = fir.alloca f64 {bindc_name = "scalarvar"}
+ acc.kernels {
+ %load = fir.load %livein : !fir.ref<f64>
+ acc.terminator
+ }
+ return
+}
+
+// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<f64>) -> !fir.ref<f64> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarvar"}
+// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<f64>) to varPtr({{.*}} : !fir.ref<f64>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarvar"}
+
+// -----
+
+func.func @test_fir_scalar_in_parallel_defaultnone() {
+ %livein = fir.alloca f32 {bindc_name = "scalarvar"}
+ acc.parallel {
+ %load = fir.load %livein : !fir.ref<f32>
+ acc.yield
+ } attributes {defaultAttr = #acc<defaultvalue none>}
+ return
+}
+
+// CHECK-NOT: acc.firstprivate
+
+// -----
+
+func.func @test_fir_scalar_in_kernels_defaultnone() {
+ %livein = fir.alloca f64 {bindc_name = "scalarvar"}
+ acc.kernels {
+ %load = fir.load %livein : !fir.ref<f64>
+ acc.terminator
+ } attributes {defaultAttr = #acc<defaultvalue none>}
+ return
+}
+
+// CHECK-NOT: acc.copyin
+
+// -----
+
+func.func @test_fir_derivedtype_in_parallel() {
+ %livein = fir.alloca !fir.type<_QFTaggr{field:f32}> {bindc_name = "aggrvar"}
+ acc.parallel {
+ %load = fir.load %livein : !fir.ref<!fir.type<_QFTaggr{field:f32}>>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar"}
+// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) to varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar"}
+
+// -----
+
+func.func @test_fir_derivedtype_in_kernels() {
+ %livein = fir.alloca !fir.type<_QFTaggr{field:f32}> {bindc_name = "aggrvar"}
+ acc.kernels {
+ %load = fir.load %livein : !fir.ref<!fir.type<_QFTaggr{field:f32}>>
+ acc.terminator
+ }
+ return
+}
+
+// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar"}
+// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) to varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aggrvar"}
+
+// -----
+
+func.func @test_fir_array_in_parallel() {
+ %livein = fir.alloca !fir.array<10xf32> {bindc_name = "arrayvar"}
+ acc.parallel {
+ %load = fir.load %livein : !fir.ref<!fir.array<10xf32>>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
+// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.array<10xf32>>) to varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
+
+// -----
+
+func.func @test_fir_array_in_kernels() {
+ %livein = fir.alloca !fir.array<10xf32> {bindc_name = "arrayvar"}
+ acc.kernels {
+ %load = fir.load %livein : !fir.ref<!fir.array<10xf32>>
+ acc.terminator
+ }
+ return
+}
+
+// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
+// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.array<10xf32>>) to varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "arrayvar"}
+
+// -----
+
+func.func @test_fir_derivedtype_in_parallel_defaultpresent() {
+ %livein = fir.alloca !fir.type<_QFTaggr{field:f32}> {bindc_name = "aggrvar"}
+ acc.parallel {
+ %load = fir.load %livein : !fir.ref<!fir.type<_QFTaggr{field:f32}>>
+ acc.yield
+ } attributes {defaultAttr = #acc<defaultvalue present>}
+ return
+}
+
+// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {implicit = true, name = "aggrvar"}
+// CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "aggrvar"}
+
+// -----
+
+func.func @test_fir_derivedtype_in_kernels_defaultpresent() {
+ %livein = fir.alloca !fir.type<_QFTaggr{field:f32}> {bindc_name = "aggrvar"}
+ acc.kernels {
+ %load = fir.load %livein : !fir.ref<!fir.type<_QFTaggr{field:f32}>>
+ acc.terminator
+ } attributes {defaultAttr = #acc<defaultvalue present>}
+ return
+}
+
+// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) -> !fir.ref<!fir.type<_QFTaggr{field:f32}>> {implicit = true, name = "aggrvar"}
+// CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.type<_QFTaggr{field:f32}>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "aggrvar"}
+
+// -----
+
+func.func @test_fir_array_in_parallel_defaultpresent() {
+ %livein = fir.alloca !fir.array<10xf32> {bindc_name = "arrayvar"}
+ acc.parallel {
+ %load = fir.load %livein : !fir.ref<!fir.array<10xf32>>
+ acc.yield
+ } attributes {defaultAttr = #acc<defaultvalue present>}
+ return
+}
+
+// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {implicit = true, name = "arrayvar"}
+// CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "arrayvar"}
+
+// -----
+
+func.func @test_fir_array_in_kernels_defaultpresent() {
+ %livein = fir.alloca !fir.array<10xf32> {bindc_name = "arrayvar"}
+ acc.kernels {
+ %load = fir.load %livein : !fir.ref<!fir.array<10xf32>>
+ acc.terminator
+ } attributes {defaultAttr = #acc<defaultvalue present>}
+ return
+}
+
+// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {implicit = true, name = "arrayvar"}
+// CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_present>, implicit = true, name = "arrayvar"}
+
+// -----
+
+func.func @test_fir_scalar_in_parallel_defaultpresent() {
+ %livein = fir.alloca f32 {bindc_name = "scalarvar"}
+ acc.parallel {
+ %load = fir.load %livein : !fir.ref<f32>
+ acc.yield
+ } attributes {defaultAttr = #acc<defaultvalue present>}
+ return
+}
+
+// CHECK: acc.firstprivate varPtr({{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {implicit = true, name = "scalarvar"}
+
+// -----
+
+func.func @test_fir_scalar_in_kernels_defaultpresent() {
+ %livein = fir.alloca f64 {bindc_name = "scalarvar"}
+ acc.kernels {
+ %load = fir.load %livein : !fir.ref<f64>
+ acc.terminator
+ } attributes {defaultAttr = #acc<defaultvalue present>}
+ return
+}
+
+// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<f64>) -> !fir.ref<f64> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarvar"}
+// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<f64>) to varPtr({{.*}} : !fir.ref<f64>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "scalarvar"}
+
+// -----
+
+func.func @test_fir_box_ref() {
+ %livein = fir.alloca !fir.box<!fir.array<?xi32>> {bindc_name = "descriptor"}
+ acc.parallel {
+ %load = fir.load %livein : !fir.ref<!fir.box<!fir.array<?xi32>>>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "descriptor"}
+// CHECK: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) to varPtr({{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "descriptor"}
+
+// -----
+
+func.func @test_fir_box_val() {
+ %desc = fir.alloca !fir.box<!fir.array<?xi32>> {bindc_name = "descriptor"}
+ %livein = fir.load %desc : !fir.ref<!fir.box<!fir.array<?xi32>>>
+ acc.parallel {
+ %addr = fir.box_addr %livein : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[COPYIN:.*]] = acc.copyin var({{.*}} : !fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "descriptor"}
+// CHECK: acc.copyout accVar(%[[COPYIN]] : !fir.box<!fir.array<?xi32>>) to var({{.*}} : !fir.box<!fir.array<?xi32>>) {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "descriptor"}
+
+
+// -----
+
+// This test has an explicit data clause for the box - but the pointer held
+// inside the box is used in the region instead of the box itself. Test that
+// implicit present is actually used.
+func.func @test_explicit_box_implicit_ptr() {
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %arr = fir.alloca !fir.array<10xf32> {bindc_name = "aa"}
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+ %arr_decl = fir.declare %arr(%shape) {uniq_name = "aa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<10xf32>>
+ %box = fir.embox %arr_decl(%shape) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf32>>
+ %copyin = acc.copyin var(%box : !fir.box<!fir.array<10xf32>>) -> !fir.box<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "aa"}
+ acc.serial dataOperands(%copyin : !fir.box<!fir.array<10xf32>>) {
+ // Use the pointer, not the box
+ %elem = fir.array_coor %arr_decl(%shape) %c1 : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+ acc.yield
+ }
+ acc.copyout accVar(%copyin : !fir.box<!fir.array<10xf32>>) to var(%box : !fir.box<!fir.array<10xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "aa"}
+ return
+}
+
+// CHECK: acc.present varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>){{.*}}-> !fir.ref<!fir.array<10xf32>> {implicit = true, name = "aa"}
+
+// -----
+
+// This test uses an explicit-shape array with no data clause - it also has
+// an optimization where the pointer is used instead of the boxed entity.
+// It tests that the implicit data pass is able to recover the size despite
+// it not being encoded in the FIR type.
+// It was generated from the following Fortran source:
+// subroutine array(aa,nn)
+// integer :: nn
+// real :: aa(10:nn)
+// !$acc kernels loop
+// do ii = 10, nn
+// aa(ii) = ii
+// end do
+// !$acc end kernels
+// end subroutine
+
+func.func @_QParray(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "aa"}, %arg1: !fir.ref<i32> {fir.bindc_name = "nn"}) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c10_i64 = arith.constant 10 : i64
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFarrayEnn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+ %4 = fir.convert %c10_i64 : (i64) -> index
+ %5 = fir.load %1 : !fir.ref<i32>
+ %6 = fir.convert %5 : (i32) -> i64
+ %7 = fir.convert %6 : (i64) -> index
+ %8 = arith.subi %7, %4 : index
+ %9 = arith.addi %8, %c1 : index
+ %10 = arith.cmpi sgt, %9, %c0 : index
+ %11 = arith.select %10, %9, %c0 : index
+ %12 = fir.shape_shift %4, %11 : (index, index) -> !fir.shapeshift<1>
+ %13 = fir.declare %arg0(%12) dummy_scope %0 {uniq_name = "_QFarrayEaa"} : (!fir.ref<!fir.array<?xf32>>, !fir.shapeshift<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+ acc.kernels {
+ %elem = fir.array_coor %13(%12) %4 : (!fir.ref<!fir.array<?xf32>>, !fir.shapeshift<1>, index) -> !fir.ref<f32>
+ acc.terminator
+ }
+ return
+}
+
+// This tries to confirm that the acc.bounds operation is as expected.
+// Effectively the extent needs to be max(0, nn), stride needs to be 1,
+// adjusted lowerbound is 0, and actual language start index is 10.
+// CHECK: %[[NN:.*]] = fir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFarrayEnn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+// CHECK: %[[C10:.*]] = fir.convert %c10{{.*}} : (i64) -> index
+// CHECK: %[[LOADEDNN:.*]] = fir.load %[[NN]] : !fir.ref<i32>
+// CHECK: %[[CAST1:.*]] = fir.convert %[[LOADEDNN]] : (i32) -> i64
+// CHECK: %[[CAST2:.*]] = fir.convert %[[CAST1]] : (i64) -> index
+// CHECK: %[[SUBI:.*]] = arith.subi %[[CAST2]], %[[C10]] : index
+// CHECK: %[[ADDI:.*]] = arith.addi %[[SUBI]], %c1{{.*}} : index
+// CHECK: %[[CMPI:.*]] = arith.cmpi sgt, %[[ADDI]], %c0{{.*}} : index
+// CHECK: %[[SELECT:.*]] = arith.select %[[CMPI]], %[[ADDI]], %c0{{.*}} : index
+// CHECK: %[[BOUNDS:.*]] = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%{{.*}} : index) extent(%[[SELECT]] : index) stride(%c1{{.*}} : index) startIdx(%[[C10]] : index)
+// CHECK: acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<?xf32>>) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, implicit = true, name = "aa"}
+
+// -----
+
+// Test to confirm that a copyin clause is not implicitly generated for deviceptr symbol.
+func.func @test_deviceptr_no_implicit_copy() {
+ %c10 = arith.constant 10 : index
+ %arr = fir.alloca !fir.array<10xf64> {bindc_name = "a"}
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+ %arr_box = fir.embox %arr(%shape) : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf64>>
+ %devptr = acc.deviceptr var(%arr_box : !fir.box<!fir.array<10xf64>>) -> !fir.box<!fir.array<10xf64>> {name = "a"}
+ acc.parallel dataOperands(%devptr : !fir.box<!fir.array<10xf64>>) {
+ %elem = fir.box_addr %arr_box : (!fir.box<!fir.array<10xf64>>) -> !fir.ref<!fir.array<10xf64>>
+ acc.yield
+ }
+ return
+}
+
+// CHECK-NOT: acc.copyin
+// CHECK: acc.deviceptr
+
+// -----
+
+// Test that acc.declare with deviceptr doesn't generate implicit copyin
+func.func @test_acc_declare_deviceptr() {
+ %c10 = arith.constant 10 : index
+ %arr = fir.alloca !fir.array<10xf64> {bindc_name = "a"}
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+ %arr_box = fir.embox %arr(%shape) : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf64>>
+ %devptr = acc.deviceptr var(%arr_box : !fir.box<!fir.array<10xf64>>) -> !fir.box<!fir.array<10xf64>> {name = "a"}
+ %token = acc.declare_enter dataOperands(%devptr : !fir.box<!fir.array<10xf64>>)
+ acc.parallel {
+ %elem = fir.box_addr %arr_box : (!fir.box<!fir.array<10xf64>>) -> !fir.ref<!fir.array<10xf64>>
+ acc.yield
+ }
+ acc.declare_exit token(%token)
+ return
+}
+
+// CHECK-LABEL: func.func @test_acc_declare_deviceptr
+// CHECK: acc.deviceptr
+// CHECK-NOT: acc.copyin
+// CHECK: acc.deviceptr
+
diff --git a/flang/test/Transforms/OpenACC/acc-implicit-firstprivate.fir b/flang/test/Transforms/OpenACC/acc-implicit-firstprivate.fir
new file mode 100644
index 000000000000..e4a7b8b18bc2
--- /dev/null
+++ b/flang/test/Transforms/OpenACC/acc-implicit-firstprivate.fir
@@ -0,0 +1,284 @@
+// RUN: fir-opt %s --pass-pipeline="builtin.module(acc-initialize-fir-analyses,acc-implicit-data)" -split-input-file | FileCheck %s
+
+// Test implicit firstprivate behavior for various scalar types in parallel and serial constructs.
+// Scalars in parallel/serial constructs should be implicitly firstprivate according to OpenACC spec.
+
+// -----
+
+// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_i32 : !fir.ref<i32> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
+// CHECK: %[[ALLOC:.*]] = fir.alloca i32
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]]
+// CHECK: acc.yield %[[DECL]]#0 : !fir.ref<i32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<i32>, %[[DST:.*]]: !fir.ref<i32>):
+// CHECK: %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<i32>
+// CHECK: fir.store %[[LOADED]] to %[[DST]] : !fir.ref<i32>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @test_i32_scalar_in_parallel
+func.func @test_i32_scalar_in_parallel() {
+ %scalar = fir.alloca i32 {bindc_name = "i32_var"}
+ acc.parallel {
+ %load = fir.load %scalar : !fir.ref<i32>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i32_var"}
+// CHECK: acc.parallel firstprivate(@firstprivatization_ref_i32 -> %[[FIRSTPRIV]] : !fir.ref<i32>)
+
+// -----
+
+// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_i64 : !fir.ref<i64> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<i64>):
+// CHECK: %[[ALLOC:.*]] = fir.alloca i64
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]]
+// CHECK: acc.yield %[[DECL]]#0 : !fir.ref<i64>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<i64>, %[[DST:.*]]: !fir.ref<i64>):
+// CHECK: %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<i64>
+// CHECK: fir.store %[[LOADED]] to %[[DST]] : !fir.ref<i64>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @test_i64_scalar_in_parallel
+func.func @test_i64_scalar_in_parallel() {
+ %scalar = fir.alloca i64 {bindc_name = "i64_var"}
+ acc.parallel {
+ %load = fir.load %scalar : !fir.ref<i64>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<i64>) -> !fir.ref<i64> {implicit = true, name = "i64_var"}
+// CHECK: acc.parallel firstprivate(@firstprivatization_ref_i64 -> %[[FIRSTPRIV]] : !fir.ref<i64>)
+
+// -----
+
+// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_f32 : !fir.ref<f32> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
+// CHECK: %[[ALLOC:.*]] = fir.alloca f32
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]]
+// CHECK: acc.yield %[[DECL]]#0 : !fir.ref<f32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<f32>, %[[DST:.*]]: !fir.ref<f32>):
+// CHECK: %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<f32>
+// CHECK: fir.store %[[LOADED]] to %[[DST]] : !fir.ref<f32>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @test_f32_scalar_in_parallel
+func.func @test_f32_scalar_in_parallel() {
+ %scalar = fir.alloca f32 {bindc_name = "f32_var"}
+ acc.parallel {
+ %load = fir.load %scalar : !fir.ref<f32>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {implicit = true, name = "f32_var"}
+// CHECK: acc.parallel firstprivate(@firstprivatization_ref_f32 -> %[[FIRSTPRIV]] : !fir.ref<f32>)
+
+// -----
+
+// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_f64 : !fir.ref<f64> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<f64>):
+// CHECK: %[[ALLOC:.*]] = fir.alloca f64
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]]
+// CHECK: acc.yield %[[DECL]]#0 : !fir.ref<f64>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<f64>, %[[DST:.*]]: !fir.ref<f64>):
+// CHECK: %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<f64>
+// CHECK: fir.store %[[LOADED]] to %[[DST]] : !fir.ref<f64>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @test_f64_scalar_in_parallel
+func.func @test_f64_scalar_in_parallel() {
+ %scalar = fir.alloca f64 {bindc_name = "f64_var"}
+ acc.parallel {
+ %load = fir.load %scalar : !fir.ref<f64>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<f64>) -> !fir.ref<f64> {implicit = true, name = "f64_var"}
+// CHECK: acc.parallel firstprivate(@firstprivatization_ref_f64 -> %[[FIRSTPRIV]] : !fir.ref<f64>)
+
+// -----
+
+// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_l32 : !fir.ref<!fir.logical<4>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.logical<4>>):
+// CHECK: %[[ALLOC:.*]] = fir.alloca !fir.logical<4>
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]]
+// CHECK: acc.yield %[[DECL]]#0 : !fir.ref<!fir.logical<4>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.logical<4>>, %[[DST:.*]]: !fir.ref<!fir.logical<4>>):
+// CHECK: %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<!fir.logical<4>>
+// CHECK: fir.store %[[LOADED]] to %[[DST]] : !fir.ref<!fir.logical<4>>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @test_logical_scalar_in_parallel
+func.func @test_logical_scalar_in_parallel() {
+ %scalar = fir.alloca !fir.logical<4> {bindc_name = "logical_var"}
+ acc.parallel {
+ %load = fir.load %scalar : !fir.ref<!fir.logical<4>>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {implicit = true, name = "logical_var"}
+// CHECK: acc.parallel firstprivate(@firstprivatization_ref_l32 -> %[[FIRSTPRIV]] : !fir.ref<!fir.logical<4>>)
+
+// -----
+
+// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_z32 : !fir.ref<complex<f32>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<complex<f32>>):
+// CHECK: %[[ALLOC:.*]] = fir.alloca complex<f32>
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]]
+// CHECK: acc.yield %[[DECL]]#0 : !fir.ref<complex<f32>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<complex<f32>>, %[[DST:.*]]: !fir.ref<complex<f32>>):
+// CHECK: %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<complex<f32>>
+// CHECK: fir.store %[[LOADED]] to %[[DST]] : !fir.ref<complex<f32>>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @test_complex_scalar_in_parallel
+func.func @test_complex_scalar_in_parallel() {
+ %scalar = fir.alloca complex<f32> {bindc_name = "complex_var"}
+ acc.parallel {
+ %load = fir.load %scalar : !fir.ref<complex<f32>>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<complex<f32>>) -> !fir.ref<complex<f32>> {implicit = true, name = "complex_var"}
+// CHECK: acc.parallel firstprivate(@firstprivatization_ref_z32 -> %[[FIRSTPRIV]] : !fir.ref<complex<f32>>)
+
+// -----
+
+// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_z64 : !fir.ref<complex<f64>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<complex<f64>>):
+// CHECK: %[[ALLOC:.*]] = fir.alloca complex<f64>
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]]
+// CHECK: acc.yield %[[DECL]]#0 : !fir.ref<complex<f64>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<complex<f64>>, %[[DST:.*]]: !fir.ref<complex<f64>>):
+// CHECK: %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<complex<f64>>
+// CHECK: fir.store %[[LOADED]] to %[[DST]] : !fir.ref<complex<f64>>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @test_complex8_scalar_in_parallel
+func.func @test_complex8_scalar_in_parallel() {
+ %scalar = fir.alloca complex<f64> {bindc_name = "complex8_var"}
+ acc.parallel {
+ %load = fir.load %scalar : !fir.ref<complex<f64>>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<complex<f64>>) -> !fir.ref<complex<f64>> {implicit = true, name = "complex8_var"}
+// CHECK: acc.parallel firstprivate(@firstprivatization_ref_z64 -> %[[FIRSTPRIV]] : !fir.ref<complex<f64>>)
+
+// -----
+
+// Test with serial construct
+
+// CHECK-LABEL: func.func @test_i32_scalar_in_serial
+func.func @test_i32_scalar_in_serial() {
+ %scalar = fir.alloca i32 {bindc_name = "serial_i32_var"}
+ acc.serial {
+ %load = fir.load %scalar : !fir.ref<i32>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "serial_i32_var"}
+// CHECK: acc.serial firstprivate(@firstprivatization_ref_i32 -> %[[FIRSTPRIV]] : !fir.ref<i32>)
+
+// -----
+
+// Test with serial construct and f64
+
+// CHECK-LABEL: func.func @test_f64_scalar_in_serial
+func.func @test_f64_scalar_in_serial() {
+ %scalar = fir.alloca f64 {bindc_name = "serial_f64_var"}
+ acc.serial {
+ %load = fir.load %scalar : !fir.ref<f64>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<f64>) -> !fir.ref<f64> {implicit = true, name = "serial_f64_var"}
+// CHECK: acc.serial firstprivate(@firstprivatization_ref_f64 -> %[[FIRSTPRIV]] : !fir.ref<f64>)
+
+// -----
+
+// Test i8 and i16 scalar types
+
+// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_i8 : !fir.ref<i8> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<i8>):
+// CHECK: %[[ALLOC:.*]] = fir.alloca i8
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]]
+// CHECK: acc.yield %[[DECL]]#0 : !fir.ref<i8>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<i8>, %[[DST:.*]]: !fir.ref<i8>):
+// CHECK: %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<i8>
+// CHECK: fir.store %[[LOADED]] to %[[DST]] : !fir.ref<i8>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @test_i8_scalar_in_parallel
+func.func @test_i8_scalar_in_parallel() {
+ %scalar = fir.alloca i8 {bindc_name = "i8_var"}
+ acc.parallel {
+ %load = fir.load %scalar : !fir.ref<i8>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<i8>) -> !fir.ref<i8> {implicit = true, name = "i8_var"}
+// CHECK: acc.parallel firstprivate(@firstprivatization_ref_i8 -> %[[FIRSTPRIV]] : !fir.ref<i8>)
+
+// -----
+
+// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_i16 : !fir.ref<i16> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<i16>):
+// CHECK: %[[ALLOC:.*]] = fir.alloca i16
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]]
+// CHECK: acc.yield %[[DECL]]#0 : !fir.ref<i16>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<i16>, %[[DST:.*]]: !fir.ref<i16>):
+// CHECK: %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<i16>
+// CHECK: fir.store %[[LOADED]] to %[[DST]] : !fir.ref<i16>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @test_i16_scalar_in_parallel
+func.func @test_i16_scalar_in_parallel() {
+ %scalar = fir.alloca i16 {bindc_name = "i16_var"}
+ acc.parallel {
+ %load = fir.load %scalar : !fir.ref<i16>
+ acc.yield
+ }
+ return
+}
+
+// CHECK: %[[FIRSTPRIV:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<i16>) -> !fir.ref<i16> {implicit = true, name = "i16_var"}
+// CHECK: acc.parallel firstprivate(@firstprivatization_ref_i16 -> %[[FIRSTPRIV]] : !fir.ref<i16>)
+
diff --git a/flang/test/Transforms/debug-proc-ptr.fir b/flang/test/Transforms/debug-proc-ptr.fir
new file mode 100644
index 000000000000..296355778690
--- /dev/null
+++ b/flang/test/Transforms/debug-proc-ptr.fir
@@ -0,0 +1,41 @@
+// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s
+
+module {
+ func.func @_QQmain() attributes {fir.bindc_name = "test"} {
+ %0 = fir.alloca (!fir.ref<i32>) -> i32 {bindc_name = "fun_ptr", uniq_name = "_QFEfun_ptr"}
+ %1 = fircg.ext_declare %0 {uniq_name = "_QFEfun_ptr"} : (!fir.ref<(!fir.ref<i32>) -> i32>) -> !fir.ref<(!fir.ref<i32>) -> i32> loc(#loc1)
+
+ // Procedure pointer with no return: procedure(sub1), pointer :: sub_ptr
+ %2 = fir.alloca () -> () {bindc_name = "sub_ptr", uniq_name = "_QFEsub_ptr"}
+ %3 = fircg.ext_declare %2 {uniq_name = "_QFEsub_ptr"} : (!fir.ref<() -> ()>) -> !fir.ref<() -> ()> loc(#loc2)
+
+ // Procedure pointer with multiple args: procedure(func2), pointer :: func_ptr
+ %4 = fir.alloca (!fir.ref<i32>, !fir.ref<f64>) -> f32 {bindc_name = "func_ptr", uniq_name = "_QFEfunc_ptr"}
+ %5 = fircg.ext_declare %4 {uniq_name = "_QFEfunc_ptr"} : (!fir.ref<(!fir.ref<i32>, !fir.ref<f64>) -> f32>) -> !fir.ref<(!fir.ref<i32>, !fir.ref<f64>) -> f32> loc(#loc3)
+
+ return
+ } loc(#loc)
+}
+#loc = loc("test.f90":1:1)
+#loc1 = loc("test.f90":2:30)
+#loc2 = loc("test.f90":3:30)
+#loc3 = loc("test.f90":4:30)
+
+// CHECK-DAG: #[[INT:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer", sizeInBits = 32, encoding = DW_ATE_signed>
+// CHECK-DAG: #[[REAL32:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real", sizeInBits = 32, encoding = DW_ATE_float>
+// CHECK-DAG: #[[REAL:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real(kind=8)", sizeInBits = 64, encoding = DW_ATE_float>
+
+// CHECK-DAG: #[[PTR_INT:.*]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type{{.*}}baseType = #[[INT]]{{.*}}>
+// CHECK-DAG: #[[PTR_REAL:.*]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type{{.*}}baseType = #[[REAL]]{{.*}}>
+
+// CHECK-DAG: #[[SUB1:.*]] = #llvm.di_subroutine_type<types = #[[INT]], #[[PTR_INT]]>
+// CHECK-DAG: #[[PTR_SUB1:.*]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type{{.*}}baseType = #[[SUB1]]{{.*}}>
+// CHECK-DAG: #llvm.di_local_variable<{{.*}}name = "fun_ptr"{{.*}}type = #[[PTR_SUB1]]{{.*}}>
+
+// CHECK-DAG: #di_subroutine_type{{.*}} = #llvm.di_subroutine_type<types = #di_null_type>
+// CHECK-DAG: #di_local_variable{{.*}} = #llvm.di_local_variable<{{.*}}name = "sub_ptr"{{.*}}type = #di_derived_type{{.*}}>
+// CHECK-DAG: #di_derived_type{{.*}} = #llvm.di_derived_type<tag = DW_TAG_pointer_type{{.*}}baseType = #di_subroutine_type{{.*}}{{.*}}>
+
+// CHECK-DAG: #[[SUB3:.*]] = #llvm.di_subroutine_type<types = #[[REAL32]], #[[PTR_INT]], #[[PTR_REAL]]>
+// CHECK-DAG: #[[PTR_SUB3:.*]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type{{.*}}baseType = #[[SUB3]]{{.*}}>
+// CHECK-DAG: #llvm.di_local_variable<{{.*}}name = "func_ptr"{{.*}}type = #[[PTR_SUB3]]{{.*}}>
diff --git a/flang/test/Transforms/omp-map-info-finalization.fir b/flang/test/Transforms/omp-map-info-finalization.fir
index b30a2fc4e9a8..5b0fd9f23d63 100644
--- a/flang/test/Transforms/omp-map-info-finalization.fir
+++ b/flang/test/Transforms/omp-map-info-finalization.fir
@@ -381,11 +381,10 @@ func.func @_QPrealtest(%arg0: !fir.boxchar<1>) {
// CHECK: %[[VAL_8:.*]]:2 = fir.unboxchar %[[VAL_4]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
// CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_8]]#1, %[[VAL_7]] : index
// CHECK: %[[VAL_10:.*]] = omp.map.bounds lower_bound(%[[VAL_6]] : index) upper_bound(%[[VAL_9]] : index) extent(%[[VAL_8]]#1 : index) stride(%[[VAL_7]] : index) start_idx(%[[VAL_6]] : index) {stride_in_bytes = true}
-// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.boxchar<1>>
// CHECK: %[[VAL_12:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.boxchar<1>>) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
-// CHECK: %[[VAL_13:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.char<1,?>) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr(%[[VAL_12]] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) bounds(%[[VAL_10]]) -> !fir.ref<!fir.boxchar<1>>
-// CHECK: %[[VAL_14:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(to) capture(ByRef) members(%[[VAL_13]] : [0] : !fir.ref<!fir.boxchar<1>>) -> !fir.ref<!fir.boxchar<1>>
-// CHECK: omp.target map_entries(%[[VAL_14]] -> %[[VAL_15:.*]], %[[VAL_13]] -> %[[VAL_16:.*]] : !fir.ref<!fir.boxchar<1>>, !fir.ref<!fir.boxchar<1>>) private(@boxchar.privatizer %[[VAL_3]]#0 -> %[[VAL_17:.*]] [map_idx=0] : !fir.boxchar<1>) {
+// CHECK: %[[VAL_13:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.char<1,?>) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[VAL_12]] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) bounds(%[[VAL_10]]) -> !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>
+// CHECK: %[[VAL_14:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(to) capture(ByRef) members(%[[VAL_13]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) -> !fir.ref<!fir.boxchar<1>>
+// CHECK: omp.target map_entries(%[[VAL_14]] -> %[[VAL_15:.*]], %[[VAL_13]] -> %[[VAL_16:.*]] : !fir.ref<!fir.boxchar<1>>, !fir.llvm_ptr<!fir.ref<!fir.char<1,?>>>) private(@boxchar.privatizer %[[VAL_3]]#0 -> %[[VAL_17:.*]] [map_idx=0] : !fir.boxchar<1>) {
// CHECK: %[[VAL_18:.*]]:2 = fir.unboxchar %[[VAL_17]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
// CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]]#0 typeparams %[[VAL_18]]#1 {uniq_name = "tgt_a0"} : (!fir.ref<!fir.char<1,?>>, index) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
// CHECK: omp.terminator
diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
index c09d4751d390..d76f3b16b30e 100644
--- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
@@ -9,7 +9,7 @@ if(LIBC_TARGET_ARCHITECTURE_IS_X86_64)
set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA)
set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
elseif(LIBC_TARGET_ARCHITECTURE_IS_AARCH64)
- set(ALL_CPU_FEATURES "FullFP16")
+ set(ALL_CPU_FEATURES FullFP16 MOPS SVE SVE2)
set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
endif()
diff --git a/libc/cmake/modules/cpu_features/check_MOPS.cpp b/libc/cmake/modules/cpu_features/check_MOPS.cpp
new file mode 100644
index 000000000000..314fe9b38bc8
--- /dev/null
+++ b/libc/cmake/modules/cpu_features/check_MOPS.cpp
@@ -0,0 +1,5 @@
+#include "src/__support/macros/properties/cpu_features.h"
+
+#ifndef LIBC_TARGET_CPU_HAS_MOPS
+#error unsupported
+#endif
diff --git a/libc/cmake/modules/cpu_features/check_SVE.cpp b/libc/cmake/modules/cpu_features/check_SVE.cpp
new file mode 100644
index 000000000000..725f42f6eb88
--- /dev/null
+++ b/libc/cmake/modules/cpu_features/check_SVE.cpp
@@ -0,0 +1,5 @@
+#include "src/__support/macros/properties/cpu_features.h"
+
+#ifndef LIBC_TARGET_CPU_HAS_SVE
+#error unsupported
+#endif
diff --git a/libc/cmake/modules/cpu_features/check_SVE2.cpp b/libc/cmake/modules/cpu_features/check_SVE2.cpp
new file mode 100644
index 000000000000..37f4b4fa038b
--- /dev/null
+++ b/libc/cmake/modules/cpu_features/check_SVE2.cpp
@@ -0,0 +1,5 @@
+#include "src/__support/macros/properties/cpu_features.h"
+
+#ifndef LIBC_TARGET_CPU_HAS_SVE2
+#error unsupported
+#endif
diff --git a/libc/config/baremetal/aarch64/entrypoints.txt b/libc/config/baremetal/aarch64/entrypoints.txt
index 935c95af0d4a..049adb34d9d7 100644
--- a/libc/config/baremetal/aarch64/entrypoints.txt
+++ b/libc/config/baremetal/aarch64/entrypoints.txt
@@ -269,9 +269,8 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.time.difftime
libc.src.time.gmtime
libc.src.time.gmtime_r
- # TODO: Re-enable these when tests aren't broken.
- # libc.src.time.localtime
- # libc.src.time.localtime_r
+ libc.src.time.localtime
+ libc.src.time.localtime_r
libc.src.time.mktime
libc.src.time.strftime
libc.src.time.strftime_l
@@ -321,6 +320,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.fenv.feupdateenv
# math.h entrypoints
+ libc.src.math.acos
libc.src.math.acosf
libc.src.math.acoshf
libc.src.math.asinf
diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt
index 82e257c1d2b0..2444ec5feff0 100644
--- a/libc/config/baremetal/arm/entrypoints.txt
+++ b/libc/config/baremetal/arm/entrypoints.txt
@@ -269,6 +269,8 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.time.difftime
libc.src.time.gmtime
libc.src.time.gmtime_r
+ libc.src.time.localtime
+ libc.src.time.localtime_r
libc.src.time.mktime
libc.src.time.strftime
libc.src.time.strftime_l
@@ -321,6 +323,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.fenv.feupdateenv
# math.h entrypoints
+ libc.src.math.acos
libc.src.math.acosf
libc.src.math.acoshf
libc.src.math.asinf
diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt
index c10cc1162cc5..a6aef96e9169 100644
--- a/libc/config/baremetal/riscv/entrypoints.txt
+++ b/libc/config/baremetal/riscv/entrypoints.txt
@@ -269,6 +269,8 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.time.difftime
libc.src.time.gmtime
libc.src.time.gmtime_r
+ libc.src.time.localtime
+ libc.src.time.localtime_r
libc.src.time.mktime
libc.src.time.strftime
libc.src.time.strftime_l
diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h
index fc6099ca6ccc..1fe20d9b23a3 100644
--- a/libc/src/__support/macros/properties/cpu_features.h
+++ b/libc/src/__support/macros/properties/cpu_features.h
@@ -18,6 +18,18 @@
#define LIBC_TARGET_CPU_HAS_FULLFP16
#endif
+#if defined(__ARM_FEATURE_SVE)
+#define LIBC_TARGET_CPU_HAS_SVE
+#endif
+
+#if defined(__ARM_FEATURE_SVE2)
+#define LIBC_TARGET_CPU_HAS_SVE2
+#endif
+
+#if defined(__ARM_FEATURE_MOPS)
+#define LIBC_TARGET_CPU_HAS_MOPS
+#endif
+
#if defined(__SSE2__)
#define LIBC_TARGET_CPU_HAS_SSE2
#define LIBC_TARGET_CPU_HAS_FPU_FLOAT
diff --git a/libc/src/time/baremetal/CMakeLists.txt b/libc/src/time/baremetal/CMakeLists.txt
index cbe9cf3db3e2..7a5bad3311cd 100644
--- a/libc/src/time/baremetal/CMakeLists.txt
+++ b/libc/src/time/baremetal/CMakeLists.txt
@@ -24,11 +24,11 @@ add_entrypoint_object(
localtime
SRCS
localtime.cpp
+ ../time_utils.cpp
HDRS
../localtime.h
- time_utils.h
+ ../time_utils.h
DEPENDS
- .time_utils
libc.hdr.types.struct_tm
libc.hdr.types.time_t
)
@@ -37,11 +37,11 @@ add_entrypoint_object(
localtime_r
SRCS
localtime_r.cpp
+ ../time_utils.cpp
HDRS
../localtime.h
- time_utils.h
+ ../time_utils.h
DEPENDS
- .time_utils
libc.hdr.types.struct_tm
libc.hdr.types.time_t
)
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index 0eb373c3fa06..42e8faa3fd69 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -187,6 +187,7 @@ add_header_library(
DEPENDS
libc.src.__support.CPP.type_traits
libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.macros.properties.architectures
)
add_libc_test(
diff --git a/libc/test/src/stdlib/StrfromTest.h b/libc/test/src/stdlib/StrfromTest.h
index fd2e0f120e90..3dacfca9e89f 100644
--- a/libc/test/src/stdlib/StrfromTest.h
+++ b/libc/test/src/stdlib/StrfromTest.h
@@ -8,6 +8,7 @@
#include "src/__support/CPP/type_traits.h"
#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/macros/properties/architectures.h"
#include "test/UnitTest/ErrnoCheckingTest.h"
#include "test/UnitTest/ErrnoSetterMatcher.h"
#include "test/UnitTest/Test.h"
@@ -484,7 +485,9 @@ public:
ASSERT_STREQ_LEN(written, buff, "-NAN");
}
+ // https://github.com/llvm/llvm-project/issues/166795
void charsWrittenOverflow(FunctionT func) {
+#ifndef LIBC_TARGET_ARCH_IS_RISCV32
char buff[100];
// Trigger an overflow in the return value of strfrom by writing more than
// INT_MAX bytes.
@@ -492,6 +495,7 @@ public:
EXPECT_LT(result, 0);
ASSERT_ERRNO_FAILURE();
+#endif
}
};
diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index 58e0ee999306..a6a0ac8670fb 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -66,8 +66,8 @@ Improvements and New Features
by up to 2.5x
- The performance of ``erase(iterator, iterator)`` in the unordered containers has been improved by up to 1.9x
- The performance of ``map::insert_or_assign`` has been improved by up to 2x
-- ``ofstream::write`` has been optimized to pass through large strings to system calls directly instead of copying them
- in chunks into a buffer.
+- ``ofstream::write`` and ``ifstream::read`` have been optimized to pass through large reads and writes to system calls
+ directly instead of copying them in chunks.
- Multiple internal types have been refactored to use ``[[no_unique_address]]``, resulting in faster compile times and
reduced debug information.
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 57032ce26d4f..46e17b584432 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -262,6 +262,7 @@ set(files
__chrono/gps_clock.h
__chrono/hh_mm_ss.h
__chrono/high_resolution_clock.h
+ __chrono/is_clock.h
__chrono/leap_second.h
__chrono/literals.h
__chrono/local_info.h
diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 328ebb663376..37732cc22afd 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -15,6 +15,7 @@
#include <__iterator/iterator_traits.h>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/enable_if.h>
+#include <__type_traits/is_same.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -27,6 +28,15 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Sentinel, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
__fill(_ForwardIterator __first, _Sentinel __last, const _Tp& __value) {
+#ifndef _LIBCPP_CXX03_LANG
+ if constexpr (is_same<_ForwardIterator, _Sentinel>::value && __is_segmented_iterator_v<_ForwardIterator>) {
+ using __local_iterator_t = typename __segmented_iterator_traits<_ForwardIterator>::__local_iterator;
+ std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
+ std::__fill(__lfirst, __llast, __value);
+ });
+ return __last;
+ }
+#endif
for (; __first != __last; ++__first)
*__first = __value;
return __first;
@@ -42,18 +52,6 @@ __fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& _
return std::__fill_n(__first, __last - __first, __value);
}
-#ifndef _LIBCPP_CXX03_LANG
-template <class _SegmentedIterator, class _Tp, __enable_if_t<__is_segmented_iterator_v<_SegmentedIterator>, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
-_SegmentedIterator __fill(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value) {
- using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
- std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
- std::__fill(__lfirst, __llast, __value);
- });
- return __last;
-}
-#endif // !_LIBCPP_CXX03_LANG
-
template <class _ForwardIterator, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
diff --git a/libcxx/include/__algorithm/fill_n.h b/libcxx/include/__algorithm/fill_n.h
index 2bfacf3178c4..426fe228bdab 100644
--- a/libcxx/include/__algorithm/fill_n.h
+++ b/libcxx/include/__algorithm/fill_n.h
@@ -16,10 +16,6 @@
#include <__iterator/iterator_traits.h>
#include <__iterator/segmented_iterator.h>
#include <__memory/pointer_traits.h>
-#include <__type_traits/conjunction.h>
-#include <__type_traits/enable_if.h>
-#include <__type_traits/integral_constant.h>
-#include <__type_traits/negation.h>
#include <__utility/convert_to_integral.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -33,39 +29,24 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// fill_n isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
-template <class _OutputIterator,
- class _Size,
- class _Tp
-#ifndef _LIBCPP_CXX03_LANG
- ,
- __enable_if_t<!_And<_BoolConstant<__is_segmented_iterator_v<_OutputIterator>>,
- __has_random_access_local_iterator<_OutputIterator>>::value,
- int> = 0
-#endif
- >
+template <class _OutputIterator, class _Size, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
+#ifndef _LIBCPP_CXX03_LANG
+ if constexpr (__is_segmented_iterator_v<_OutputIterator>) {
+ using __local_iterator = typename __segmented_iterator_traits<_OutputIterator>::__local_iterator;
+ if constexpr (__has_random_access_iterator_category<__local_iterator>::value) {
+ return std::__for_each_n_segment(__first, __n, [&](__local_iterator __lfirst, __local_iterator __llast) {
+ std::__fill_n(__lfirst, __llast - __lfirst, __value);
+ });
+ }
+ }
+#endif
for (; __n > 0; ++__first, (void)--__n)
*__first = __value;
return __first;
}
-#ifndef _LIBCPP_CXX03_LANG
-template < class _OutputIterator,
- class _Size,
- class _Tp,
- __enable_if_t<_And<_BoolConstant<__is_segmented_iterator_v<_OutputIterator>>,
- __has_random_access_local_iterator<_OutputIterator>>::value,
- int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI
-_LIBCPP_CONSTEXPR_SINCE_CXX14 _OutputIterator __fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
- using __local_iterator_t = typename __segmented_iterator_traits<_OutputIterator>::__local_iterator;
- return std::__for_each_n_segment(__first, __n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
- std::__fill_n(__lfirst, __llast - __lfirst, __value);
- });
-}
-#endif // !_LIBCPP_CXX03_LANG
-
template <bool _FillVal, class _Cp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
__fill_n_bool(__bit_iterator<_Cp, false> __first, typename __size_difference_type_traits<_Cp>::size_type __n) {
diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index 6fb66d25a246..cb26aa4d2656 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -14,8 +14,8 @@
#include <__config>
#include <__functional/identity.h>
#include <__iterator/segmented_iterator.h>
-#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
+#include <__type_traits/is_same.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -25,27 +25,21 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Sent, class _Func, class _Proj>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
-__for_each(_InputIterator __first, _Sent __last, _Func& __f, _Proj& __proj) {
+__for_each(_InputIterator __first, _Sent __last, _Func& __func, _Proj& __proj) {
+#ifndef _LIBCPP_CXX03_LANG
+ if constexpr (is_same<_InputIterator, _Sent>::value && __is_segmented_iterator_v<_InputIterator>) {
+ using __local_iterator_t = typename __segmented_iterator_traits<_InputIterator>::__local_iterator;
+ std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
+ std::__for_each(__lfirst, __llast, __func, __proj);
+ });
+ return __last;
+ }
+#endif
for (; __first != __last; ++__first)
- std::__invoke(__f, std::__invoke(__proj, *__first));
+ std::__invoke(__func, std::__invoke(__proj, *__first));
return __first;
}
-#ifndef _LIBCPP_CXX03_LANG
-template <class _SegmentedIterator,
- class _Func,
- class _Proj,
- __enable_if_t<__is_segmented_iterator_v<_SegmentedIterator>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
-__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Func& __func, _Proj& __proj) {
- using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
- std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
- std::__for_each(__lfirst, __llast, __func, __proj);
- });
- return __last;
-}
-#endif // !_LIBCPP_CXX03_LANG
-
template <class _InputIterator, class _Func>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Func
for_each(_InputIterator __first, _InputIterator __last, _Func __f) {
diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h
index 04650e15b636..72c7adb093f9 100644
--- a/libcxx/include/__algorithm/for_each_n.h
+++ b/libcxx/include/__algorithm/for_each_n.h
@@ -16,10 +16,7 @@
#include <__functional/identity.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/segmented_iterator.h>
-#include <__type_traits/disjunction.h>
-#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
-#include <__type_traits/negation.h>
#include <__utility/convert_to_integral.h>
#include <__utility/move.h>
@@ -32,57 +29,33 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-template <class _InputIterator,
- class _Size,
- class _Func,
- class _Proj,
- __enable_if_t<!__has_random_access_iterator_category<_InputIterator>::value &&
- _Or<integral_constant<bool, !__is_segmented_iterator_v<_InputIterator> >,
- _Not<__has_random_access_local_iterator<_InputIterator> > >::value,
- int> = 0>
+template <class _InputIterator, class _Size, class _Func, class _Proj>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) {
typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
_IntegralSize __n = __orig_n;
- while (__n > 0) {
- std::__invoke(__f, std::__invoke(__proj, *__first));
- ++__first;
- --__n;
- }
- return std::move(__first);
-}
-
-template <class _RandIter,
- class _Size,
- class _Func,
- class _Proj,
- __enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
-__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f, _Proj& __proj) {
- typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n;
- auto __last = __first + __n;
- std::__for_each(__first, __last, __f, __proj);
- return __last;
-}
#ifndef _LIBCPP_CXX03_LANG
-template <class _SegmentedIterator,
- class _Size,
- class _Func,
- class _Proj,
- __enable_if_t<!__has_random_access_iterator_category<_SegmentedIterator>::value &&
- __is_segmented_iterator_v<_SegmentedIterator> &&
- __has_random_access_iterator_category<
- typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
- int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
-__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) {
- using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
- return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
- std::__for_each(__lfirst, __llast, __f, __proj);
- });
+ if constexpr (__is_segmented_iterator_v<_InputIterator>) {
+ using __local_iterator = typename __segmented_iterator_traits<_InputIterator>::__local_iterator;
+ if constexpr (__has_random_access_iterator_category<__local_iterator>::value) {
+ return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator __lfirst, __local_iterator __llast) {
+ std::__for_each(__lfirst, __llast, __f, __proj);
+ });
+ } else {
+ return std::__for_each(__first, __first + __n, __f, __proj);
+ }
+ } else
+#endif
+ {
+ while (__n > 0) {
+ std::__invoke(__f, std::__invoke(__proj, *__first));
+ ++__first;
+ --__n;
+ }
+ return std::move(__first);
+ }
}
-#endif // !_LIBCPP_CXX03_LANG
#if _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__chrono/is_clock.h b/libcxx/include/__chrono/is_clock.h
new file mode 100644
index 000000000000..e63b8485d06e
--- /dev/null
+++ b/libcxx/include/__chrono/is_clock.h
@@ -0,0 +1,72 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CHRONO_IS_CLOCK_H
+#define _LIBCPP___CHRONO_IS_CLOCK_H
+
+#include <__config>
+
+#include <__chrono/duration.h>
+#include <__chrono/time_point.h>
+#include <__concepts/same_as.h>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_arithmetic.h>
+#include <__type_traits/is_class.h>
+#include <__type_traits/is_union.h>
+#include <ratio>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 20
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+namespace chrono {
+
+// Helper to check that _Tp::time_point has the form time_point<_, typename _Tp::duration>.
+template <class _TimePoint, class _ClockType>
+inline constexpr bool __is_valid_clock_time_point_v = false;
+
+template <class _TimePointClock, class _ClockType>
+inline constexpr bool
+ __is_valid_clock_time_point_v<time_point<_TimePointClock, typename _ClockType::duration>, _ClockType> = true;
+
+// Check if a clock satisfies the Cpp17Clock requirements as defined in [time.clock.req]
+template <class _Tp>
+_LIBCPP_NO_SPECIALIZATIONS inline constexpr bool is_clock_v = requires {
+ typename _Tp::rep;
+ requires is_arithmetic_v<typename _Tp::rep> || is_class_v<typename _Tp::rep> || is_union_v<typename _Tp::rep>;
+
+ typename _Tp::period;
+ requires __is_ratio_v<typename _Tp::period>;
+
+ typename _Tp::duration;
+ requires same_as<typename _Tp::duration, duration<typename _Tp::rep, typename _Tp::period>>;
+
+ typename _Tp::time_point;
+ requires __is_valid_clock_time_point_v<typename _Tp::time_point, _Tp>;
+
+ _Tp::is_steady;
+ requires same_as<decltype((_Tp::is_steady)), const bool&>;
+
+ _Tp::now();
+ requires same_as<decltype(_Tp::now()), typename _Tp::time_point>;
+};
+
+template <class _Tp>
+struct _LIBCPP_NO_SPECIALIZATIONS is_clock : bool_constant<is_clock_v<_Tp>> {};
+
+} // namespace chrono
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER
+#endif // _LIBCPP___CHRONO_IS_CLOCK_H
diff --git a/libcxx/include/__iterator/distance.h b/libcxx/include/__iterator/distance.h
index 9be9db0f0c70..1a9fbf27f776 100644
--- a/libcxx/include/__iterator/distance.h
+++ b/libcxx/include/__iterator/distance.h
@@ -11,6 +11,7 @@
#define _LIBCPP___ITERATOR_DISTANCE_H
#include <__algorithm/for_each_segment.h>
+#include <__concepts/same_as.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__iterator/incrementable_traits.h>
@@ -41,35 +42,29 @@ template <class _Iter>
using __iter_distance_t _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type;
#endif
-template <class _InputIter, class _Sent>
-inline _LIBCPP_HIDE_FROM_ABI
-_LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_InputIter> __distance(_InputIter __first, _Sent __last) {
- __iter_distance_t<_InputIter> __r(0);
- for (; __first != __last; ++__first)
- ++__r;
- return __r;
-}
-
template <class _RandIter, __enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_RandIter>
__distance(_RandIter __first, _RandIter __last) {
return __last - __first;
}
+template <class _InputIter, class _Sent>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_InputIter>
+__distance(_InputIter __first, _Sent __last) {
+ __iter_distance_t<_InputIter> __r(0);
#if _LIBCPP_STD_VER >= 20
-template <class _SegmentedIter,
- __enable_if_t<!__has_random_access_iterator_category<_SegmentedIter>::value &&
- __is_segmented_iterator_v<_SegmentedIter>,
- int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_SegmentedIter>
-__distance(_SegmentedIter __first, _SegmentedIter __last) {
- __iter_distance_t<_SegmentedIter> __r(0);
- std::__for_each_segment(__first, __last, [&__r](auto __lfirst, auto __llast) {
- __r += std::__distance(__lfirst, __llast);
- });
+ if constexpr (same_as<_InputIter, _Sent> && __is_segmented_iterator_v<_InputIter>) {
+ std::__for_each_segment(__first, __last, [&__r](auto __lfirst, auto __llast) {
+ __r += std::__distance(__lfirst, __llast);
+ });
+ } else
+#endif
+ {
+ for (; __first != __last; ++__first)
+ ++__r;
+ }
return __r;
}
-#endif // _LIBCPP_STD_VER >= 20
template <class _InputIter>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type
diff --git a/libcxx/include/__iterator/segmented_iterator.h b/libcxx/include/__iterator/segmented_iterator.h
index 5df973713710..dc56a740130b 100644
--- a/libcxx/include/__iterator/segmented_iterator.h
+++ b/libcxx/include/__iterator/segmented_iterator.h
@@ -75,11 +75,6 @@ inline const bool __has_specialization_v<_Tp, sizeof(_Tp) * 0> = true;
template <class _Iterator>
inline const bool __is_segmented_iterator_v = __has_specialization_v<__segmented_iterator_traits<_Iterator> >;
-template <class _SegmentedIterator>
-struct __has_random_access_local_iterator
- : __has_random_access_iterator_category<
- typename __segmented_iterator_traits< _SegmentedIterator >::__local_iterator > {};
-
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___SEGMENTED_ITERATOR_H
diff --git a/libcxx/include/chrono b/libcxx/include/chrono
index 82e99a31bcc9..aa4fc6218f96 100644
--- a/libcxx/include/chrono
+++ b/libcxx/include/chrono
@@ -218,6 +218,9 @@ template <class ToDuration, class Rep, class Period>
template <class ToDuration, class Rep, class Period>
constexpr ToDuration round(const duration<Rep, Period>& d); // C++17
+template <class T> struct is_clock; // C++20
+template <class T> inline constexpr bool is_clock_v = is_clock<T>::value; // C++20
+
// duration I/O
template<class charT, class traits, class Rep, class Period> // C++20
basic_ostream<charT, traits>&
@@ -1057,6 +1060,7 @@ constexpr chrono::year operator ""y(unsigned lo
# include <__chrono/day.h>
# include <__chrono/exception.h>
# include <__chrono/hh_mm_ss.h>
+# include <__chrono/is_clock.h>
# include <__chrono/literals.h>
# include <__chrono/local_info.h>
# include <__chrono/month.h>
diff --git a/libcxx/include/fstream b/libcxx/include/fstream
index 1f88d134fe06..b07ca636094a 100644
--- a/libcxx/include/fstream
+++ b/libcxx/include/fstream
@@ -308,6 +308,19 @@ protected:
return basic_streambuf<_CharT, _Traits>::xsputn(__str, __len);
}
+ _LIBCPP_HIDE_FROM_ABI_VIRTUAL streamsize xsgetn(char_type* __str, streamsize __len) override {
+ if (__always_noconv_) {
+ const streamsize __n = std::min(this->egptr() - this->gptr(), __len);
+ if (__n != 0) {
+ traits_type::copy(__str, this->gptr(), __n);
+ this->__gbump_ptrdiff(__n);
+ }
+ if (__len - __n >= this->egptr() - this->eback())
+ return std::fread(__str + __n, sizeof(char_type), __len - __n, __file_);
+ }
+ return basic_streambuf<_CharT, _Traits>::xsgetn(__str, __len);
+ }
+
private:
char* __extbuf_;
const char* __extbufnext_;
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index 24a2fe761943..f77c885da5b6 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -973,6 +973,10 @@ module std [system] {
header "__chrono/high_resolution_clock.h"
export *
}
+ module is_clock {
+ header "__chrono/is_clock.h"
+ export std_core.type_traits.integral_constant
+ }
module leap_second {
header "__chrono/leap_second.h"
}
diff --git a/libcxx/modules/std/chrono.inc b/libcxx/modules/std/chrono.inc
index 66eccd8d290a..db405d482bf9 100644
--- a/libcxx/modules/std/chrono.inc
+++ b/libcxx/modules/std/chrono.inc
@@ -25,8 +25,8 @@ export namespace std {
using std::chrono::duration_values;
- // using std::chrono::is_clock;
- // using std::chrono::is_clock_v;
+ using std::chrono::is_clock;
+ using std::chrono::is_clock_v;
// [time.duration.nonmember], duration arithmetic
using std::chrono::operator+;
diff --git a/libcxx/test/benchmarks/streams/ofstream.bench.cpp b/libcxx/test/benchmarks/streams/fstream.bench.cpp
index 60606a9d67e2..3ca1801ca8d0 100644
--- a/libcxx/test/benchmarks/streams/ofstream.bench.cpp
+++ b/libcxx/test/benchmarks/streams/fstream.bench.cpp
@@ -11,7 +11,7 @@
#include <benchmark/benchmark.h>
-static void bm_write(benchmark::State& state) {
+static void bm_ofstream_write(benchmark::State& state) {
std::vector<char> buffer;
buffer.resize(16384);
@@ -20,6 +20,24 @@ static void bm_write(benchmark::State& state) {
for (auto _ : state)
stream.write(buffer.data(), buffer.size());
}
-BENCHMARK(bm_write);
+BENCHMARK(bm_ofstream_write);
+
+static void bm_ifstream_read(benchmark::State& state) {
+ std::vector<char> buffer;
+ buffer.resize(16384);
+
+ std::ofstream gen_testfile("testfile");
+ gen_testfile.write(buffer.data(), buffer.size());
+
+ std::ifstream stream("testfile");
+ assert(stream);
+
+ for (auto _ : state) {
+ stream.read(buffer.data(), buffer.size());
+ benchmark::DoNotOptimize(buffer);
+ stream.seekg(0);
+ }
+}
+BENCHMARK(bm_ifstream_read);
BENCHMARK_MAIN();
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp
index 283adbc057d1..30e7b66d4232 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/filebuf/traits_mismatch.verify.cpp
@@ -19,4 +19,4 @@
std::basic_filebuf<char, std::char_traits<wchar_t> > f;
// expected-error-re@*:* {{static assertion failed{{.*}}traits_type::char_type must be the same type as CharT}}
-// expected-error@*:* 10 {{only virtual member functions can be marked 'override'}}
+// expected-error@*:* 11 {{only virtual member functions can be marked 'override'}}
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp
index ba6f3c31d3e3..daafb36f9151 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/traits_mismatch.verify.cpp
@@ -21,7 +21,7 @@ std::basic_fstream<char, std::char_traits<wchar_t> > f;
// expected-error-re@*:* {{static assertion failed{{.*}}traits_type::char_type must be the same type as CharT}}
// expected-error-re@*:* {{static assertion failed{{.*}}traits_type::char_type must be the same type as CharT}}
-// expected-error@*:* 12 {{only virtual member functions can be marked 'override'}}
+// expected-error@*:* 13 {{only virtual member functions can be marked 'override'}}
// FIXME: As of commit r324062 Clang incorrectly generates a diagnostic about mismatching
// exception specifications for types which are already invalid for one reason or another.
diff --git a/libcxx/test/libcxx/time/time.traits/is.clock.verify.cpp b/libcxx/test/libcxx/time/time.traits/is.clock.verify.cpp
new file mode 100644
index 000000000000..f4f438d348a8
--- /dev/null
+++ b/libcxx/test/libcxx/time/time.traits/is.clock.verify.cpp
@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: std-at-least-c++20
+
+// <chrono>
+//
+// template<class T> struct is_clock;
+// template<class T> constexpr bool is_clock_v = is_clock<T>::value;
+
+// [time.traits.is.clock]/3:
+// The behavior of a program that adds specializations for is_clock is undefined.
+
+// [namespace.std]/3:
+// The behavior of a C++ program is undefined if it declares an explicit or partial specialization of any standard
+// library variable template, except where explicitly permitted by the specification of that variable template.
+
+#include <chrono>
+#include <ratio>
+
+#if !__has_warning("-Winvalid-specializations")
+// expected-no-diagnostics
+#else
+
+template <>
+struct std::chrono::is_clock<int> : std::false_type {}; // expected-error@*:* {{'is_clock' cannot be specialized}}
+
+template <>
+constexpr bool std::chrono::is_clock_v<float> = false; // expected-error@*:* {{'is_clock_v' cannot be specialized}}
+
+#endif
diff --git a/libcxx/test/std/time/time.traits/is.clock.compile.pass.cpp b/libcxx/test/std/time/time.traits/is.clock.compile.pass.cpp
new file mode 100644
index 000000000000..4af29d20943e
--- /dev/null
+++ b/libcxx/test/std/time/time.traits/is.clock.compile.pass.cpp
@@ -0,0 +1,230 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: std-at-least-c++20
+
+// <chrono>
+//
+// template<class T> struct is_clock;
+// template<class T> constexpr bool is_clock_v = is_clock<T>::value;
+
+#include <chrono>
+#include <ratio>
+
+#include "test_macros.h"
+
+struct EmptyStruct {};
+
+// Test structs missing required members
+struct MissingRep {
+ using period = std::ratio<1>;
+ using duration = std::chrono::seconds;
+ using time_point = std::chrono::time_point<MissingRep>;
+ static constexpr bool is_steady = false;
+ static time_point now();
+};
+
+struct MissingPeriod {
+ using rep = long;
+ using duration = std::chrono::seconds;
+ using time_point = std::chrono::time_point<MissingPeriod>;
+ static constexpr bool is_steady = false;
+ static time_point now();
+};
+
+struct MissingDuration {
+ using rep = long;
+ using time_point = long;
+ static constexpr bool is_steady = false;
+ static time_point now();
+};
+
+struct MissingTimePoint {
+ using rep = long;
+ using period = std::ratio<1>;
+ using duration = std::chrono::seconds;
+ static constexpr bool is_steady = false;
+ static std::chrono::time_point<MissingTimePoint> now();
+};
+
+struct MissingIsSteady {
+ using rep = long;
+ using period = std::ratio<1>;
+ using duration = std::chrono::seconds;
+ using time_point = std::chrono::time_point<MissingIsSteady>;
+ static time_point now();
+};
+
+struct MissingNow {
+ using rep = long;
+ using period = std::ratio<1>;
+ using duration = std::chrono::seconds;
+ using time_point = std::chrono::time_point<MissingNow>;
+ static constexpr bool is_steady = false;
+};
+
+// Valid clock types
+struct ValidSteadyClock {
+ using rep = long long;
+ using period = std::nano;
+ using duration = std::chrono::nanoseconds;
+ using time_point = std::chrono::time_point<ValidSteadyClock>;
+ static constexpr bool is_steady = true;
+ static time_point now();
+};
+
+struct ValidSystemClock {
+ using rep = long long;
+ using period = std::micro;
+ using duration = std::chrono::microseconds;
+ using time_point = std::chrono::time_point<ValidSystemClock>;
+ static constexpr bool is_steady = false;
+ static time_point now();
+};
+
+// Test clocks with invalid is_steady type
+struct WrongIsSteadyType {
+ using rep = long;
+ using period = std::ratio<1>;
+ using duration = std::chrono::seconds;
+ using time_point = std::chrono::time_point<WrongIsSteadyType>;
+ static bool is_steady; // Not const bool
+ static time_point now();
+};
+
+struct WrongIsSteadyNonBool {
+ using rep = long;
+ using period = std::ratio<1>;
+ using duration = std::chrono::seconds;
+ using time_point = std::chrono::time_point<WrongIsSteadyNonBool>;
+ static constexpr int is_steady = 1; // Not bool
+ static time_point now();
+};
+
+// Test clocks with invalid now() return type
+struct WrongNowReturnType {
+ using rep = long;
+ using period = std::ratio<1>;
+ using duration = std::chrono::seconds;
+ using time_point = std::chrono::time_point<WrongNowReturnType>;
+ static constexpr bool is_steady = false;
+ static int now(); // Wrong return type
+};
+
+// Test clocks with invalid period type
+struct WrongPeriodType {
+ using rep = long;
+ using period = int; // Not a ratio
+ using duration = std::chrono::seconds;
+ using time_point = std::chrono::time_point<WrongPeriodType>;
+ static constexpr bool is_steady = false;
+ static time_point now();
+};
+
+// Test clocks with wrong duration type
+struct WrongDurationType {
+ using rep = long;
+ using period = std::ratio<1>;
+ using duration = std::chrono::milliseconds; // Should be duration<long, ratio<1>>
+ using time_point = std::chrono::time_point<WrongDurationType>;
+ static constexpr bool is_steady = false;
+ static time_point now();
+};
+
+// Test clocks with wrong time_point type
+struct WrongTimePointType {
+ using rep = long;
+ using period = std::ratio<1>;
+ using duration = std::chrono::duration<long, std::ratio<1>>;
+ using time_point = int; // Not a time_point
+ static constexpr bool is_steady = false;
+ static time_point now();
+};
+
+struct WrongTimePointClock {
+ using rep = long;
+ using period = std::ratio<1>;
+ using duration = std::chrono::duration<long, std::ratio<1>>;
+ using time_point = std::chrono::time_point<ValidSystemClock>; // Wrong clock type
+ static constexpr bool is_steady = false;
+ static time_point now();
+};
+
+// Valid clock with time_point that has matching duration instead of matching clock
+struct ValidClockWithDurationMatch {
+ using rep = int;
+ using period = std::milli;
+ using duration = std::chrono::duration<int, std::milli>;
+ using time_point = std::chrono::time_point<ValidSystemClock, duration>; // Valid: matches duration
+ static constexpr bool is_steady = false;
+ static time_point now();
+};
+
+// Test both is_clock and is_clock_v
+static_assert(std::chrono::is_clock<std::chrono::system_clock>::value);
+static_assert(std::chrono::is_clock_v<std::chrono::system_clock>);
+
+// Test standard clock types
+static_assert(std::chrono::is_clock_v<std::chrono::system_clock>);
+static_assert(std::chrono::is_clock_v<std::chrono::high_resolution_clock>);
+
+// Test non-clock types
+static_assert(!std::chrono::is_clock_v<EmptyStruct>);
+static_assert(!std::chrono::is_clock_v<int>);
+static_assert(!std::chrono::is_clock_v<void>);
+static_assert(!std::chrono::is_clock_v<std::chrono::system_clock::time_point>);
+static_assert(!std::chrono::is_clock_v<std::chrono::seconds>);
+static_assert(!std::chrono::is_clock_v<std::chrono::milliseconds>);
+
+// Test structs missing required members
+static_assert(!std::chrono::is_clock_v<MissingRep>);
+static_assert(!std::chrono::is_clock_v<MissingPeriod>);
+static_assert(!std::chrono::is_clock_v<MissingDuration>);
+static_assert(!std::chrono::is_clock_v<MissingTimePoint>);
+static_assert(!std::chrono::is_clock_v<MissingIsSteady>);
+static_assert(!std::chrono::is_clock_v<MissingNow>);
+
+// Test valid custom clocks
+static_assert(std::chrono::is_clock_v<ValidSteadyClock>);
+static_assert(std::chrono::is_clock_v<ValidSystemClock>);
+static_assert(std::chrono::is_clock_v<ValidClockWithDurationMatch>);
+
+// cv-qualified and reference types
+static_assert(std::chrono::is_clock_v<const std::chrono::system_clock>);
+static_assert(std::chrono::is_clock_v<volatile std::chrono::system_clock>);
+static_assert(std::chrono::is_clock_v<const volatile std::chrono::system_clock>);
+static_assert(!std::chrono::is_clock_v<std::chrono::system_clock&>);
+static_assert(!std::chrono::is_clock_v<std::chrono::system_clock&&>);
+static_assert(!std::chrono::is_clock_v<const std::chrono::system_clock&>);
+
+// array and pointer types
+static_assert(!std::chrono::is_clock_v<std::chrono::system_clock[]>);
+static_assert(!std::chrono::is_clock_v<std::chrono::system_clock[10]>);
+static_assert(!std::chrono::is_clock_v<std::chrono::system_clock*>);
+static_assert(!std::chrono::is_clock_v<std::chrono::system_clock* const>);
+
+// The Standard defined a minimum set of checks and allowed implementation to perform stricter checks. The following
+// static asserts are implementation specific and a conforming standard library implementation doesn't have to produce
+// the same outcome.
+
+// Test clocks with invalid is_steady type
+LIBCPP_STATIC_ASSERT(!std::chrono::is_clock_v<WrongIsSteadyType>); // is_steady not const bool
+LIBCPP_STATIC_ASSERT(!std::chrono::is_clock_v<WrongIsSteadyNonBool>); // is_steady not bool type
+
+// Test clocks with invalid now() return type
+LIBCPP_STATIC_ASSERT(!std::chrono::is_clock_v<WrongNowReturnType>); // now() doesn't return time_point
+
+// Test clocks with invalid period type
+LIBCPP_STATIC_ASSERT(!std::chrono::is_clock_v<WrongPeriodType>); // period is not a ratio
+
+// Test clocks with wrong duration type
+LIBCPP_STATIC_ASSERT(!std::chrono::is_clock_v<WrongDurationType>); // duration doesn't match duration<rep, period>
+
+// Test clocks with wrong time_point type
+LIBCPP_STATIC_ASSERT(!std::chrono::is_clock_v<WrongTimePointType>); // time_point is not a time_point
+LIBCPP_STATIC_ASSERT(!std::chrono::is_clock_v<WrongTimePointClock>); // time_point has wrong clock and wrong duration
diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index 6f27da7b9cad..b999438ff2ca 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -1366,7 +1366,7 @@ public:
template <typename Fn> void match(Fn F) const { F(Name, Params, Requires); }
void printLeft(OutputBuffer &OB) const override {
- ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+ ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
OB += "template<";
Params.printWithComma(OB);
OB += "> typename ";
@@ -1550,7 +1550,7 @@ public:
NodeArray getParams() { return Params; }
void printLeft(OutputBuffer &OB) const override {
- ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+ ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
OB += "<";
Params.printWithComma(OB);
OB += ">";
@@ -1824,7 +1824,7 @@ public:
void printDeclarator(OutputBuffer &OB) const {
if (!TemplateParams.empty()) {
- ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+ ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
OB += "<";
TemplateParams.printWithComma(OB);
OB += ">";
@@ -1885,7 +1885,9 @@ public:
}
void printLeft(OutputBuffer &OB) const override {
- bool ParenAll = OB.isGtInsideTemplateArgs() &&
+ // If we're printing a '<' inside of a template argument, and we haven't
+ // yet parenthesized the expression, do so now.
+ bool ParenAll = !OB.isInParensInTemplateArgs() &&
(InfixOperator == ">" || InfixOperator == ">>");
if (ParenAll)
OB.printOpen();
@@ -2061,7 +2063,7 @@ public:
void printLeft(OutputBuffer &OB) const override {
OB += CastKind;
{
- ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+ ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
OB += "<";
OB.printLeft(*To);
OB += ">";
diff --git a/libcxxabi/src/demangle/Utility.h b/libcxxabi/src/demangle/Utility.h
index 76243f5d3280..df5b54dca492 100644
--- a/libcxxabi/src/demangle/Utility.h
+++ b/libcxxabi/src/demangle/Utility.h
@@ -104,18 +104,32 @@ public:
unsigned CurrentPackIndex = std::numeric_limits<unsigned>::max();
unsigned CurrentPackMax = std::numeric_limits<unsigned>::max();
- /// When zero, we're printing template args and '>' needs to be parenthesized.
- /// Use a counter so we can simply increment inside parentheses.
- unsigned GtIsGt = 1;
+ struct {
+ /// The depth of '(' and ')' inside the currently printed template
+ /// arguments.
+ unsigned ParenDepth = 0;
- bool isGtInsideTemplateArgs() const { return GtIsGt == 0; }
+ /// True if we're currently printing a template argument.
+ bool InsideTemplate = false;
+ } TemplateTracker;
+
+ /// Returns true if we're currently between a '(' and ')' when printing
+ /// template args.
+ bool isInParensInTemplateArgs() const {
+ return TemplateTracker.ParenDepth > 0;
+ }
+
+ /// Returns true if we're printing template args.
+ bool isInsideTemplateArgs() const { return TemplateTracker.InsideTemplate; }
void printOpen(char Open = '(') {
- GtIsGt++;
+ if (isInsideTemplateArgs())
+ TemplateTracker.ParenDepth++;
*this += Open;
}
void printClose(char Close = ')') {
- GtIsGt--;
+ if (isInsideTemplateArgs())
+ TemplateTracker.ParenDepth--;
*this += Close;
}
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index 5f4b0902d522..97edff0b87ea 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -332,6 +332,10 @@ if (C_SUPPORTS_COMMENT_LIB_PRAGMA)
endif()
endif()
+if (RUNTIMES_EXECUTE_ONLY_CODE)
+ add_compile_definitions(_LIBUNWIND_EXECUTE_ONLY_CODE)
+endif()
+
#===============================================================================
# Setup Source Code
#===============================================================================
diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S
index 198735fa800a..fd306ed8c523 100644
--- a/libunwind/src/UnwindRegistersRestore.S
+++ b/libunwind/src/UnwindRegistersRestore.S
@@ -18,6 +18,8 @@
#if defined(_AIX)
.toc
+#elif defined(__aarch64__) && defined(__ELF__) && defined(_LIBUNWIND_EXECUTE_ONLY_CODE)
+ .section .text,"axy",@progbits,unique,0
#else
.text
#endif
diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S
index 619a59751151..b7ddd0a621d1 100644
--- a/libunwind/src/UnwindRegistersSave.S
+++ b/libunwind/src/UnwindRegistersSave.S
@@ -18,6 +18,8 @@
#if defined(_AIX)
.toc
+#elif defined(__aarch64__) && defined(__ELF__) && defined(_LIBUNWIND_EXECUTE_ONLY_CODE)
+ .section .text,"axy",@progbits,unique,0
#else
.text
#endif
diff --git a/lldb/bindings/python/CMakeLists.txt b/lldb/bindings/python/CMakeLists.txt
index ef6def3f2687..28a8af8f0631 100644
--- a/lldb/bindings/python/CMakeLists.txt
+++ b/lldb/bindings/python/CMakeLists.txt
@@ -107,6 +107,7 @@ function(finish_swig_python swig_target lldb_python_bindings_dir lldb_python_tar
"plugins"
FILES
"${LLDB_SOURCE_DIR}/examples/python/templates/parsed_cmd.py"
+ "${LLDB_SOURCE_DIR}/examples/python/templates/scripted_frame_provider.py"
"${LLDB_SOURCE_DIR}/examples/python/templates/scripted_process.py"
"${LLDB_SOURCE_DIR}/examples/python/templates/scripted_platform.py"
"${LLDB_SOURCE_DIR}/examples/python/templates/operating_system.py"
diff --git a/lldb/bindings/python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig
index 3ea24f1a3141..a86dc44ce410 100644
--- a/lldb/bindings/python/python-swigsafecast.swig
+++ b/lldb/bindings/python/python-swigsafecast.swig
@@ -37,6 +37,11 @@ PythonObject SWIGBridge::ToSWIGWrapper(lldb::ThreadPlanSP thread_plan_sp) {
SWIGTYPE_p_lldb__SBThreadPlan);
}
+PythonObject SWIGBridge::ToSWIGWrapper(lldb::StackFrameListSP frames_sp) {
+ return ToSWIGHelper(new lldb::SBFrameList(std::move(frames_sp)),
+ SWIGTYPE_p_lldb__SBFrameList);
+}
+
PythonObject SWIGBridge::ToSWIGWrapper(lldb::BreakpointSP breakpoint_sp) {
return ToSWIGHelper(new lldb::SBBreakpoint(std::move(breakpoint_sp)),
SWIGTYPE_p_lldb__SBBreakpoint);
diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig
index e7acba5b95d8..3a0995e84f64 100644
--- a/lldb/bindings/python/python-wrapper.swig
+++ b/lldb/bindings/python/python-wrapper.swig
@@ -556,6 +556,18 @@ void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBExecutionContext(PyOb
return sb_ptr;
}
+void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBFrameList(PyObject *data) {
+ lldb::SBFrameList *sb_ptr = NULL;
+
+ int valid_cast = SWIG_ConvertPtr(data, (void **)&sb_ptr,
+ SWIGTYPE_p_lldb__SBFrameList, 0);
+
+ if (valid_cast == -1)
+ return NULL;
+
+ return sb_ptr;
+}
+
bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallCommand(
const char *python_function_name, const char *session_dictionary_name,
lldb::DebuggerSP debugger, const char *args,
diff --git a/lldb/examples/python/templates/scripted_frame_provider.py b/lldb/examples/python/templates/scripted_frame_provider.py
new file mode 100644
index 000000000000..20f4d76d188c
--- /dev/null
+++ b/lldb/examples/python/templates/scripted_frame_provider.py
@@ -0,0 +1,113 @@
+from abc import ABCMeta, abstractmethod
+
+import lldb
+
+
+class ScriptedFrameProvider(metaclass=ABCMeta):
+ """
+ The base class for a scripted frame provider.
+
+ A scripted frame provider allows you to provide custom stack frames for a
+ thread, which can be used to augment or replace the standard unwinding
+ mechanism. This is useful for:
+
+ - Providing frames for custom calling conventions or languages
+ - Reconstructing missing frames from crash dumps or core files
+ - Adding diagnostic or synthetic frames for debugging
+ - Visualizing state machines or async execution contexts
+
+ Most of the base class methods are `@abstractmethod` that need to be
+ overwritten by the inheriting class.
+
+ Example usage:
+
+ .. code-block:: python
+
+ # Attach a frame provider to a thread
+ thread = process.GetSelectedThread()
+ error = thread.SetScriptedFrameProvider(
+ "my_module.MyFrameProvider",
+ lldb.SBStructuredData()
+ )
+ """
+
+ @abstractmethod
+ def __init__(self, input_frames, args):
+ """Construct a scripted frame provider.
+
+ Args:
+ input_frames (lldb.SBFrameList): The frame list to use as input.
+ This allows you to access frames by index. The frames are
+ materialized lazily as you access them.
+ args (lldb.SBStructuredData): A Dictionary holding arbitrary
+ key/value pairs used by the scripted frame provider.
+ """
+ self.input_frames = None
+ self.args = None
+ self.thread = None
+ self.target = None
+ self.process = None
+
+ if isinstance(input_frames, lldb.SBFrameList) and input_frames.IsValid():
+ self.input_frames = input_frames
+ self.thread = input_frames.GetThread()
+ if self.thread and self.thread.IsValid():
+ self.process = self.thread.GetProcess()
+ if self.process and self.process.IsValid():
+ self.target = self.process.GetTarget()
+
+ if isinstance(args, lldb.SBStructuredData) and args.IsValid():
+ self.args = args
+
+ @abstractmethod
+ def get_frame_at_index(self, index):
+ """Get a single stack frame at the given index.
+
+ This method is called lazily when a specific frame is needed in the
+ thread's backtrace (e.g., via the 'bt' command). Each frame is
+ requested individually as needed.
+
+ Args:
+ index (int): The frame index to retrieve (0 for youngest/top frame).
+
+ Returns:
+ Dict or None: A frame dictionary describing the stack frame, or None
+ if no frame exists at this index. The dictionary should contain:
+
+ Required fields:
+ - idx (int): The synthetic frame index (0 for youngest/top frame)
+ - pc (int): The program counter address for the synthetic frame
+
+ Alternatively, you can return:
+ - A ScriptedFrame object for full control over frame behavior
+ - An integer representing an input frame index to reuse
+ - None to indicate no more frames exist
+
+ Example:
+
+ .. code-block:: python
+
+ def get_frame_at_index(self, index):
+ # Return None when there are no more frames
+ if index >= self.total_frames:
+ return None
+
+ # Re-use an input frame by returning its index
+ if self.should_use_input_frame(index):
+ return index # Returns input frame at this index
+
+ # Or create a custom frame dictionary
+ if index == 0:
+ return {
+ "idx": 0,
+ "pc": 0x100001234,
+ }
+
+ return None
+
+ Note:
+ The frames are indexed from 0 (youngest/top) to N (oldest/bottom).
+ This method will be called repeatedly with increasing indices until
+ None is returned.
+ """
+ pass
diff --git a/lldb/include/lldb/API/SBFrameList.h b/lldb/include/lldb/API/SBFrameList.h
index dba1c1de5d19..0039ffb1f863 100644
--- a/lldb/include/lldb/API/SBFrameList.h
+++ b/lldb/include/lldb/API/SBFrameList.h
@@ -11,6 +11,16 @@
#include "lldb/API/SBDefines.h"
+namespace lldb_private {
+class ScriptInterpreter;
+namespace python {
+class SWIGBridge;
+}
+namespace lua {
+class SWIGBridge;
+}
+} // namespace lldb_private
+
namespace lldb {
/// Represents a list of SBFrame objects.
@@ -66,6 +76,10 @@ public:
protected:
friend class SBThread;
+ friend class lldb_private::python::SWIGBridge;
+ friend class lldb_private::lua::SWIGBridge;
+ friend class lldb_private::ScriptInterpreter;
+
private:
SBFrameList(const lldb::StackFrameListSP &frame_list_sp);
diff --git a/lldb/include/lldb/API/SBModuleSpec.h b/lldb/include/lldb/API/SBModuleSpec.h
index 8d1ecfe6e6f8..b80a52b7a235 100644
--- a/lldb/include/lldb/API/SBModuleSpec.h
+++ b/lldb/include/lldb/API/SBModuleSpec.h
@@ -87,6 +87,16 @@ public:
bool GetDescription(lldb::SBStream &description);
+ lldb::SBTarget GetTarget();
+
+ /// Set the target to be used when resolving a module.
+ ///
+ /// A target can help locate a module specified by a SBModuleSpec. The
+ /// target settings, like the executable and debug info search paths, can
+ /// be essential. The target's platform can also be used to locate or download
+ /// the specified module.
+ void SetTarget(lldb::SBTarget target);
+
private:
friend class SBModuleSpecList;
friend class SBModule;
diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h
index 173fd05b54a1..379a0bb7e951 100644
--- a/lldb/include/lldb/API/SBTarget.h
+++ b/lldb/include/lldb/API/SBTarget.h
@@ -999,6 +999,7 @@ protected:
friend class SBFunction;
friend class SBInstruction;
friend class SBModule;
+ friend class SBModuleSpec;
friend class SBPlatform;
friend class SBProcess;
friend class SBSection;
diff --git a/lldb/include/lldb/Core/ModuleList.h b/lldb/include/lldb/Core/ModuleList.h
index e71f3b2bad6b..df473dff091f 100644
--- a/lldb/include/lldb/Core/ModuleList.h
+++ b/lldb/include/lldb/Core/ModuleList.h
@@ -476,9 +476,9 @@ public:
static Status
GetSharedModule(const ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules,
- bool *did_create_ptr, bool always_create = false);
+ bool *did_create_ptr, bool always_create = false,
+ bool invoke_locate_callback = true);
static bool RemoveSharedModule(lldb::ModuleSP &module_sp);
diff --git a/lldb/include/lldb/Core/ModuleSpec.h b/lldb/include/lldb/Core/ModuleSpec.h
index 86be0383f8b4..acbc85b48f02 100644
--- a/lldb/include/lldb/Core/ModuleSpec.h
+++ b/lldb/include/lldb/Core/ModuleSpec.h
@@ -16,9 +16,11 @@
#include "lldb/Utility/Iterable.h"
#include "lldb/Utility/Stream.h"
#include "lldb/Utility/UUID.h"
+#include "lldb/lldb-forward.h"
#include "llvm/Support/Chrono.h"
+#include <memory>
#include <mutex>
#include <vector>
@@ -126,6 +128,16 @@ public:
lldb::DataBufferSP GetData() const { return m_data; }
+ lldb::TargetSP GetTargetSP() const { return m_target_wp.lock(); }
+
+ /// Set the target to be used when resolving a module.
+ ///
+ /// A target can help locate a module specified by a ModuleSpec. The target
+ /// settings, like the executable and debug info search paths, can be
+ /// essential. The target's platform can also be used to locate or download
+ /// the specified module.
+ void SetTarget(std::shared_ptr<Target> target) { m_target_wp = target; }
+
void Clear() {
m_file.Clear();
m_platform_file.Clear();
@@ -137,6 +149,7 @@ public:
m_object_size = 0;
m_source_mappings.Clear(false);
m_object_mod_time = llvm::sys::TimePoint<>();
+ m_target_wp.reset();
}
explicit operator bool() const {
@@ -265,6 +278,11 @@ protected:
ArchSpec m_arch;
UUID m_uuid;
ConstString m_object_name;
+ /// The target used when resolving a module. A target can help locate a module
+ /// specified by a ModuleSpec. The target settings, like the executable and
+ /// debug info search paths, can be essential. The target's platform can also
+ /// be used to locate or download the specified module.
+ std::weak_ptr<Target> m_target_wp;
uint64_t m_object_offset = 0;
uint64_t m_object_size = 0;
llvm::sys::TimePoint<> m_object_mod_time;
diff --git a/lldb/include/lldb/Core/PluginManager.h b/lldb/include/lldb/Core/PluginManager.h
index aa60b7c6693c..ab2ca58a88dd 100644
--- a/lldb/include/lldb/Core/PluginManager.h
+++ b/lldb/include/lldb/Core/PluginManager.h
@@ -356,6 +356,24 @@ public:
GetScriptInterpreterForLanguage(lldb::ScriptLanguage script_lang,
Debugger &debugger);
+ // SyntheticFrameProvider
+ static bool
+ RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
+ SyntheticFrameProviderCreateInstance create_native_callback,
+ ScriptedFrameProviderCreateInstance create_scripted_callback);
+
+ static bool
+ UnregisterPlugin(SyntheticFrameProviderCreateInstance create_callback);
+
+ static bool
+ UnregisterPlugin(ScriptedFrameProviderCreateInstance create_callback);
+
+ static SyntheticFrameProviderCreateInstance
+ GetSyntheticFrameProviderCreateCallbackForPluginName(llvm::StringRef name);
+
+ static ScriptedFrameProviderCreateInstance
+ GetScriptedFrameProviderCreateCallbackAtIndex(uint32_t idx);
+
// StructuredDataPlugin
/// Register a StructuredDataPlugin class along with optional
diff --git a/lldb/include/lldb/Core/Section.h b/lldb/include/lldb/Core/Section.h
index f0f5a0b3499c..3c5586c489da 100644
--- a/lldb/include/lldb/Core/Section.h
+++ b/lldb/include/lldb/Core/Section.h
@@ -46,6 +46,8 @@ public:
/// Create an empty list.
SectionList() = default;
+ SectionList(const SectionList &lhs);
+
SectionList &operator=(const SectionList &rhs);
size_t AddSection(const lldb::SectionSP &section_sp);
@@ -96,6 +98,17 @@ public:
/// information.
uint64_t GetDebugInfoSize() const;
+ // Callback to decide which of two matching sections should be used in the
+ // merged output.
+ using MergeCallback =
+ std::function<lldb::SectionSP(lldb::SectionSP, lldb::SectionSP)>;
+
+ // Function that merges two different sections into a new output list. All
+ // unique sections will be checked for conflict and resolved using the
+ // supplied merging callback.
+ static SectionList Merge(SectionList &lhs, SectionList &rhs,
+ MergeCallback filter);
+
protected:
collection m_sections;
};
@@ -273,6 +286,9 @@ public:
/// return true.
bool ContainsOnlyDebugInfo() const;
+ /// Returns true if this is a global offset table section.
+ bool IsGOTSection() const;
+
protected:
ObjectFile *m_obj_file; // The object file that data for this section should
// be read from
diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h
new file mode 100644
index 000000000000..2d9f713676f9
--- /dev/null
+++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_INTERPRETER_INTERFACES_SCRIPTEDFRAMEPROVIDERINTERFACE_H
+#define LLDB_INTERPRETER_INTERFACES_SCRIPTEDFRAMEPROVIDERINTERFACE_H
+
+#include "lldb/lldb-private.h"
+
+#include "ScriptedInterface.h"
+
+namespace lldb_private {
+class ScriptedFrameProviderInterface : public ScriptedInterface {
+public:
+ virtual llvm::Expected<StructuredData::GenericSP>
+ CreatePluginObject(llvm::StringRef class_name,
+ lldb::StackFrameListSP input_frames,
+ StructuredData::DictionarySP args_sp) = 0;
+
+ virtual StructuredData::ObjectSP GetFrameAtIndex(uint32_t index) {
+ return {};
+ }
+};
+} // namespace lldb_private
+
+#endif // LLDB_INTERPRETER_INTERFACES_SCRIPTEDFRAMEPROVIDERINTERFACE_H
diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h
index edb80dc66aca..7fed4940b85b 100644
--- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h
+++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h
@@ -16,6 +16,7 @@
#include "lldb/API/SBError.h"
#include "lldb/API/SBEvent.h"
#include "lldb/API/SBExecutionContext.h"
+#include "lldb/API/SBFrameList.h"
#include "lldb/API/SBLaunchInfo.h"
#include "lldb/API/SBMemoryRegionInfo.h"
#include "lldb/API/SBStream.h"
@@ -28,6 +29,7 @@
#include "lldb/Host/StreamFile.h"
#include "lldb/Interpreter/Interfaces/OperatingSystemInterface.h"
#include "lldb/Interpreter/Interfaces/ScriptedFrameInterface.h"
+#include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h"
#include "lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h"
#include "lldb/Interpreter/Interfaces/ScriptedProcessInterface.h"
#include "lldb/Interpreter/Interfaces/ScriptedThreadInterface.h"
@@ -537,6 +539,11 @@ public:
return {};
}
+ virtual lldb::ScriptedFrameProviderInterfaceSP
+ CreateScriptedFrameProviderInterface() {
+ return {};
+ }
+
virtual lldb::ScriptedThreadPlanInterfaceSP
CreateScriptedThreadPlanInterface() {
return {};
@@ -596,6 +603,9 @@ public:
lldb::ExecutionContextRefSP GetOpaqueTypeFromSBExecutionContext(
const lldb::SBExecutionContext &exe_ctx) const;
+ lldb::StackFrameListSP
+ GetOpaqueTypeFromSBFrameList(const lldb::SBFrameList &exe_ctx) const;
+
protected:
Debugger &m_debugger;
lldb::ScriptLanguage m_script_lang;
diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h
index 1b9ae1fb31a6..1de08a857650 100644
--- a/lldb/include/lldb/Symbol/ObjectFile.h
+++ b/lldb/include/lldb/Symbol/ObjectFile.h
@@ -758,6 +758,12 @@ public:
return false;
}
+ /// Returns true if the section is a global offset table section.
+ virtual bool IsGOTSection(const lldb_private::Section &section) const {
+ assert(section.GetObjectFile() == this && "Wrong object file!");
+ return false;
+ }
+
/// Get a hash that can be used for caching object file releated information.
///
/// Data for object files can be cached between runs of debug sessions and
diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h
index 35ffdabf907e..1104722f52c7 100644
--- a/lldb/include/lldb/Target/Platform.h
+++ b/lldb/include/lldb/Target/Platform.h
@@ -127,8 +127,7 @@ public:
/// Returns \b true if this Platform plug-in was able to find
/// a suitable executable, \b false otherwise.
virtual Status ResolveExecutable(const ModuleSpec &module_spec,
- lldb::ModuleSP &exe_module_sp,
- const FileSpecList *module_search_paths_ptr);
+ lldb::ModuleSP &exe_module_sp);
/// Find a symbol file given a symbol file module specification.
///
@@ -304,10 +303,11 @@ public:
/// \return
/// The Status object for any errors found while searching for
/// the binary.
- virtual Status GetSharedModule(
- const ModuleSpec &module_spec, Process *process,
- lldb::ModuleSP &module_sp, const FileSpecList *module_search_paths_ptr,
- llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr);
+ virtual Status
+ GetSharedModule(const ModuleSpec &module_spec, Process *process,
+ lldb::ModuleSP &module_sp,
+ llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules,
+ bool *did_create_ptr);
void CallLocateModuleCallbackIfSet(const ModuleSpec &module_spec,
lldb::ModuleSP &module_sp,
@@ -1039,8 +1039,8 @@ protected:
/// predefined trap handlers, this method may be a no-op.
virtual void CalculateTrapHandlerSymbolNames() = 0;
- Status GetCachedExecutable(ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr);
+ Status GetCachedExecutable(ModuleSpec &module_spec,
+ lldb::ModuleSP &module_sp);
virtual Status DownloadModuleSlice(const FileSpec &src_file_spec,
const uint64_t src_offset,
diff --git a/lldb/include/lldb/Target/RemoteAwarePlatform.h b/lldb/include/lldb/Target/RemoteAwarePlatform.h
index fb2eecfaa23a..de13b18f30d8 100644
--- a/lldb/include/lldb/Target/RemoteAwarePlatform.h
+++ b/lldb/include/lldb/Target/RemoteAwarePlatform.h
@@ -20,10 +20,8 @@ class RemoteAwarePlatform : public Platform {
public:
using Platform::Platform;
- virtual Status
- ResolveExecutable(const ModuleSpec &module_spec,
- lldb::ModuleSP &exe_module_sp,
- const FileSpecList *module_search_paths_ptr) override;
+ virtual Status ResolveExecutable(const ModuleSpec &module_spec,
+ lldb::ModuleSP &exe_module_sp) override;
bool GetModuleSpec(const FileSpec &module_file_spec, const ArchSpec &arch,
ModuleSpec &module_spec) override;
diff --git a/lldb/include/lldb/Target/SyntheticFrameProvider.h b/lldb/include/lldb/Target/SyntheticFrameProvider.h
new file mode 100644
index 000000000000..61a492f356ec
--- /dev/null
+++ b/lldb/include/lldb/Target/SyntheticFrameProvider.h
@@ -0,0 +1,156 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_TARGET_SYNTHETICFRAMEPROVIDER_H
+#define LLDB_TARGET_SYNTHETICFRAMEPROVIDER_H
+
+#include "lldb/Core/PluginInterface.h"
+#include "lldb/Target/StackFrameList.h"
+#include "lldb/Target/ThreadSpec.h"
+#include "lldb/Utility/ScriptedMetadata.h"
+#include "lldb/Utility/Status.h"
+#include "lldb/lldb-forward.h"
+#include "llvm/Support/Error.h"
+
+#include <optional>
+#include <vector>
+
+namespace lldb_private {
+
+/// This struct contains the metadata needed to instantiate a frame provider
+/// and optional filters to control which threads it applies to.
+struct SyntheticFrameProviderDescriptor {
+ /// Metadata for instantiating the provider (e.g. script class name and args).
+ lldb::ScriptedMetadataSP scripted_metadata_sp;
+
+ /// Optional list of thread specifications to which this provider applies.
+ /// If empty, the provider applies to all threads. A thread matches if it
+ /// satisfies ANY of the specs in this vector (OR logic).
+ std::vector<ThreadSpec> thread_specs;
+
+ SyntheticFrameProviderDescriptor() = default;
+
+ SyntheticFrameProviderDescriptor(lldb::ScriptedMetadataSP metadata_sp)
+ : scripted_metadata_sp(metadata_sp) {}
+
+ SyntheticFrameProviderDescriptor(lldb::ScriptedMetadataSP metadata_sp,
+ const std::vector<ThreadSpec> &specs)
+ : scripted_metadata_sp(metadata_sp), thread_specs(specs) {}
+
+ /// Get the name of this descriptor (the scripted class name).
+ llvm::StringRef GetName() const {
+ return scripted_metadata_sp ? scripted_metadata_sp->GetClassName() : "";
+ }
+
+ /// Check if this descriptor applies to the given thread.
+ bool AppliesToThread(Thread &thread) const {
+ // If no thread specs specified, applies to all threads.
+ if (thread_specs.empty())
+ return true;
+
+ // Check if the thread matches any of the specs (OR logic).
+ for (const auto &spec : thread_specs) {
+ if (spec.ThreadPassesBasicTests(thread))
+ return true;
+ }
+ return false;
+ }
+
+ /// Check if this descriptor has valid metadata for script-based providers.
+ bool IsValid() const { return scripted_metadata_sp != nullptr; }
+
+ void Dump(Stream *s) const;
+};
+
+/// Base class for all synthetic frame providers.
+///
+/// Synthetic frame providers allow modifying or replacing the stack frames
+/// shown for a thread. This is useful for:
+/// - Providing frames for custom calling conventions or languages.
+/// - Reconstructing missing frames from crash dumps or core files.
+/// - Adding diagnostic or synthetic frames for debugging.
+/// - Visualizing state machines or async execution contexts.
+class SyntheticFrameProvider : public PluginInterface {
+public:
+ /// Try to create a SyntheticFrameProvider instance for the given input
+ /// frames and descriptor.
+ ///
+ /// This method iterates through all registered SyntheticFrameProvider
+ /// plugins and returns the first one that can handle the given descriptor.
+ ///
+ /// \param[in] input_frames
+ /// The input stack frame list that this provider will transform.
+ /// This could be real unwound frames or output from another provider.
+ ///
+ /// \param[in] descriptor
+ /// The descriptor containing metadata for the provider.
+ ///
+ /// \return
+ /// A shared pointer to a SyntheticFrameProvider if one could be created,
+ /// otherwise an \a llvm::Error.
+ static llvm::Expected<lldb::SyntheticFrameProviderSP>
+ CreateInstance(lldb::StackFrameListSP input_frames,
+ const SyntheticFrameProviderDescriptor &descriptor);
+
+ /// Try to create a SyntheticFrameProvider instance for the given input
+ /// frames using a specific C++ plugin.
+ ///
+ /// This method directly invokes a specific SyntheticFrameProvider plugin
+ /// by name, bypassing the descriptor-based plugin iteration. This is useful
+ /// for C++ plugins that don't require scripted metadata.
+ ///
+ /// \param[in] input_frames
+ /// The input stack frame list that this provider will transform.
+ /// This could be real unwound frames or output from another provider.
+ ///
+ /// \param[in] plugin_name
+ /// The name of the plugin to use for creating the provider.
+ ///
+ /// \param[in] thread_specs
+ /// Optional list of thread specifications to which this provider applies.
+ /// If empty, the provider applies to all threads.
+ ///
+ /// \return
+ /// A shared pointer to a SyntheticFrameProvider if one could be created,
+ /// otherwise an \a llvm::Error.
+ static llvm::Expected<lldb::SyntheticFrameProviderSP>
+ CreateInstance(lldb::StackFrameListSP input_frames,
+ llvm::StringRef plugin_name,
+ const std::vector<ThreadSpec> &thread_specs = {});
+
+ ~SyntheticFrameProvider() override;
+
+ /// Get a single stack frame at the specified index.
+ ///
+ /// This method is called lazily - frames are only created when requested.
+ /// The provider can access its input frames via GetInputFrames() if needed.
+ ///
+ /// \param[in] idx
+ /// The index of the frame to create.
+ ///
+ /// \return
+ /// An Expected containing the StackFrameSP if successful. Returns an
+ /// error when the index is beyond the last frame to signal the end of
+ /// the frame list.
+ virtual llvm::Expected<lldb::StackFrameSP> GetFrameAtIndex(uint32_t idx) = 0;
+
+ /// Get the thread associated with this provider.
+ Thread &GetThread() { return m_input_frames->GetThread(); }
+
+ /// Get the input frames that this provider transforms.
+ lldb::StackFrameListSP GetInputFrames() const { return m_input_frames; }
+
+protected:
+ SyntheticFrameProvider(lldb::StackFrameListSP input_frames);
+
+ lldb::StackFrameListSP m_input_frames;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_TARGET_SYNTHETICFRAMEPROVIDER_H
diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h
index af5656b3dcad..8b8d081ca211 100644
--- a/lldb/include/lldb/lldb-forward.h
+++ b/lldb/include/lldb/lldb-forward.h
@@ -188,6 +188,7 @@ class Scalar;
class ScriptInterpreter;
class ScriptInterpreterLocker;
class ScriptedFrameInterface;
+class ScriptedFrameProviderInterface;
class ScriptedMetadata;
class ScriptedBreakpointInterface;
class ScriptedPlatformInterface;
@@ -235,6 +236,7 @@ class SymbolVendor;
class Symtab;
class SyntheticChildren;
class SyntheticChildrenFrontEnd;
+class SyntheticFrameProvider;
class SystemRuntime;
class Progress;
class Target;
@@ -411,6 +413,10 @@ typedef std::shared_ptr<lldb_private::ScriptSummaryFormat>
typedef std::shared_ptr<lldb_private::ScriptInterpreter> ScriptInterpreterSP;
typedef std::shared_ptr<lldb_private::ScriptedFrameInterface>
ScriptedFrameInterfaceSP;
+typedef std::shared_ptr<lldb_private::ScriptedFrameProviderInterface>
+ ScriptedFrameProviderInterfaceSP;
+typedef std::shared_ptr<lldb_private::SyntheticFrameProvider>
+ SyntheticFrameProviderSP;
typedef std::shared_ptr<lldb_private::ScriptedMetadata> ScriptedMetadataSP;
typedef std::unique_ptr<lldb_private::ScriptedPlatformInterface>
ScriptedPlatformInterfaceUP;
diff --git a/lldb/include/lldb/lldb-private-interfaces.h b/lldb/include/lldb/lldb-private-interfaces.h
index 249b25c251ac..2fe3af7c62e0 100644
--- a/lldb/include/lldb/lldb-private-interfaces.h
+++ b/lldb/include/lldb/lldb-private-interfaces.h
@@ -25,6 +25,7 @@ class Value;
namespace lldb_private {
class ScriptedInterfaceUsages;
+struct SyntheticFrameProviderDescriptor;
typedef lldb::ABISP (*ABICreateInstance)(lldb::ProcessSP process_sp,
const ArchSpec &arch);
typedef std::unique_ptr<Architecture> (*ArchitectureCreateInstance)(
@@ -86,6 +87,14 @@ typedef lldb::RegisterTypeBuilderSP (*RegisterTypeBuilderCreateInstance)(
Target &target);
typedef lldb::ScriptInterpreterSP (*ScriptInterpreterCreateInstance)(
Debugger &debugger);
+typedef llvm::Expected<lldb::SyntheticFrameProviderSP> (
+ *ScriptedFrameProviderCreateInstance)(
+ lldb::StackFrameListSP input_frames,
+ const lldb_private::SyntheticFrameProviderDescriptor &descriptor);
+typedef llvm::Expected<lldb::SyntheticFrameProviderSP> (
+ *SyntheticFrameProviderCreateInstance)(
+ lldb::StackFrameListSP input_frames,
+ const std::vector<lldb_private::ThreadSpec> &thread_specs);
typedef SymbolFile *(*SymbolFileCreateInstance)(lldb::ObjectFileSP objfile_sp);
typedef SymbolVendor *(*SymbolVendorCreateInstance)(
const lldb::ModuleSP &module_sp,
diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
index 63a35224b043..0122fe8409c2 100644
--- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
+++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
@@ -294,6 +294,11 @@ ifeq "$(MAKE_DEBUG_NAMES)" "YES"
CFLAGS += -gpubnames
endif
+# Enable GNU POSIX extensions (e.g. kill(), usleep(), getpgid(), ...)
+ifeq "$(OS)" "Linux"
+ CFLAGS += -D_DEFAULT_SOURCE
+endif
+
ifeq "$(USE_PRIVATE_MODULE_CACHE)" "YES"
THE_CLANG_MODULE_CACHE_DIR := $(BUILDDIR)/private-module-cache
else
diff --git a/lldb/source/API/SBModule.cpp b/lldb/source/API/SBModule.cpp
index 5a57f45f0d47..32067ac1c650 100644
--- a/lldb/source/API/SBModule.cpp
+++ b/lldb/source/API/SBModule.cpp
@@ -37,8 +37,8 @@ SBModule::SBModule(const SBModuleSpec &module_spec) {
LLDB_INSTRUMENT_VA(this, module_spec);
ModuleSP module_sp;
- Status error = ModuleList::GetSharedModule(
- *module_spec.m_opaque_up, module_sp, nullptr, nullptr, nullptr);
+ Status error = ModuleList::GetSharedModule(*module_spec.m_opaque_up,
+ module_sp, nullptr, nullptr);
if (module_sp)
SetSP(module_sp);
}
diff --git a/lldb/source/API/SBModuleSpec.cpp b/lldb/source/API/SBModuleSpec.cpp
index fbbcfeac2017..031ba1256d18 100644
--- a/lldb/source/API/SBModuleSpec.cpp
+++ b/lldb/source/API/SBModuleSpec.cpp
@@ -9,6 +9,7 @@
#include "lldb/API/SBModuleSpec.h"
#include "Utils.h"
#include "lldb/API/SBStream.h"
+#include "lldb/API/SBTarget.h"
#include "lldb/Core/Module.h"
#include "lldb/Core/ModuleSpec.h"
#include "lldb/Host/Host.h"
@@ -174,6 +175,18 @@ void SBModuleSpec::SetObjectSize(uint64_t object_size) {
m_opaque_up->SetObjectSize(object_size);
}
+SBTarget SBModuleSpec::GetTarget() {
+ LLDB_INSTRUMENT_VA(this);
+
+ return SBTarget(m_opaque_up->GetTargetSP());
+}
+
+void SBModuleSpec::SetTarget(SBTarget target) {
+ LLDB_INSTRUMENT_VA(this, target);
+
+ m_opaque_up->SetTarget(target.GetSP());
+}
+
SBModuleSpecList::SBModuleSpecList() : m_opaque_up(new ModuleSpecList()) {
LLDB_INSTRUMENT_VA(this);
}
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index 8de6521e65b2..30bca639060e 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -5121,6 +5121,15 @@ public:
: CommandObjectParsed(interpreter, "target stop-hook delete",
"Delete a stop-hook.",
"target stop-hook delete [<idx>]") {
+ SetHelpLong(
+ R"(
+Deletes the stop hook by index.
+
+At any given stop, all enabled stop hooks that pass the stop filter will
+get a chance to run. That means if one stop-hook deletes another stop hook
+while executing, the deleted stop hook will still fire for the stop at which
+it was deleted.
+ )");
AddSimpleArgumentList(eArgTypeStopHookID, eArgRepeatStar);
}
diff --git a/lldb/source/Core/DemangledNameInfo.cpp b/lldb/source/Core/DemangledNameInfo.cpp
index 76f8987c5149..16fbfda299b2 100644
--- a/lldb/source/Core/DemangledNameInfo.cpp
+++ b/lldb/source/Core/DemangledNameInfo.cpp
@@ -16,7 +16,7 @@ bool TrackingOutputBuffer::shouldTrack() const {
if (!isPrintingTopLevelFunctionType())
return false;
- if (isGtInsideTemplateArgs())
+ if (isInsideTemplateArgs())
return false;
if (NameInfo.ArgumentsRange.first > 0)
@@ -29,7 +29,7 @@ bool TrackingOutputBuffer::canFinalize() const {
if (!isPrintingTopLevelFunctionType())
return false;
- if (isGtInsideTemplateArgs())
+ if (isInsideTemplateArgs())
return false;
if (NameInfo.ArgumentsRange.first == 0)
diff --git a/lldb/source/Core/DynamicLoader.cpp b/lldb/source/Core/DynamicLoader.cpp
index 7580b15c02ce..b309e0f0a72f 100644
--- a/lldb/source/Core/DynamicLoader.cpp
+++ b/lldb/source/Core/DynamicLoader.cpp
@@ -227,6 +227,7 @@ ModuleSP DynamicLoader::LoadBinaryWithUUIDAndAddress(
}
}
ModuleSpec module_spec;
+ module_spec.SetTarget(target.shared_from_this());
module_spec.GetUUID() = uuid;
FileSpec name_filespec(name);
if (FileSystem::Instance().Exists(name_filespec))
@@ -238,8 +239,8 @@ ModuleSP DynamicLoader::LoadBinaryWithUUIDAndAddress(
// Has lldb already seen a module with this UUID?
// Or have external lookup enabled in DebugSymbols on macOS.
if (!module_sp)
- error = ModuleList::GetSharedModule(module_spec, module_sp, nullptr,
- nullptr, nullptr);
+ error =
+ ModuleList::GetSharedModule(module_spec, module_sp, nullptr, nullptr);
// Can lldb's symbol/executable location schemes
// find an executable and symbol file.
diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp
index c40612c1ced5..d9f845681e70 100644
--- a/lldb/source/Core/ModuleList.cpp
+++ b/lldb/source/Core/ModuleList.cpp
@@ -19,6 +19,8 @@
#include "lldb/Symbol/SymbolContext.h"
#include "lldb/Symbol/TypeList.h"
#include "lldb/Symbol/VariableList.h"
+#include "lldb/Target/Platform.h"
+#include "lldb/Target/Target.h"
#include "lldb/Utility/ArchSpec.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/FileSpecList.h"
@@ -1038,9 +1040,9 @@ size_t ModuleList::RemoveOrphanSharedModules(bool mandatory) {
Status
ModuleList::GetSharedModule(const ModuleSpec &module_spec, ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules,
- bool *did_create_ptr, bool always_create) {
+ bool *did_create_ptr, bool always_create,
+ bool invoke_locate_callback) {
SharedModuleList &shared_module_list = GetSharedModuleList();
std::lock_guard<std::recursive_mutex> guard(shared_module_list.GetMutex());
char path[PATH_MAX];
@@ -1095,6 +1097,22 @@ ModuleList::GetSharedModule(const ModuleSpec &module_spec, ModuleSP &module_sp,
if (module_sp)
return error;
+ // Try target's platform locate module callback before second attempt.
+ if (invoke_locate_callback) {
+ TargetSP target_sp = module_spec.GetTargetSP();
+ if (target_sp && target_sp->IsValid()) {
+ if (PlatformSP platform_sp = target_sp->GetPlatform()) {
+ FileSpec symbol_file_spec;
+ platform_sp->CallLocateModuleCallbackIfSet(
+ module_spec, module_sp, symbol_file_spec, did_create_ptr);
+ if (module_sp) {
+ // The callback found a module.
+ return error;
+ }
+ }
+ }
+ }
+
module_sp = std::make_shared<Module>(module_spec);
// Make sure there are a module and an object file since we can specify a
// valid file path with an architecture that might not be in that file. By
@@ -1122,10 +1140,16 @@ ModuleList::GetSharedModule(const ModuleSpec &module_spec, ModuleSP &module_sp,
module_sp.reset();
}
- if (module_search_paths_ptr) {
- const auto num_directories = module_search_paths_ptr->GetSize();
+ // Get module search paths from the target if available.
+ lldb::TargetSP target_sp = module_spec.GetTargetSP();
+ FileSpecList module_search_paths;
+ if (target_sp)
+ module_search_paths = target_sp->GetExecutableSearchPaths();
+
+ if (!module_search_paths.IsEmpty()) {
+ const auto num_directories = module_search_paths.GetSize();
for (size_t idx = 0; idx < num_directories; ++idx) {
- auto search_path_spec = module_search_paths_ptr->GetFileSpecAtIndex(idx);
+ auto search_path_spec = module_search_paths.GetFileSpecAtIndex(idx);
FileSystem::Instance().Resolve(search_path_spec);
namespace fs = llvm::sys::fs;
if (!FileSystem::Instance().IsDirectory(search_path_spec))
diff --git a/lldb/source/Core/PluginManager.cpp b/lldb/source/Core/PluginManager.cpp
index 588736715f81..4e3563cf419f 100644
--- a/lldb/source/Core/PluginManager.cpp
+++ b/lldb/source/Core/PluginManager.cpp
@@ -1300,6 +1300,61 @@ PluginManager::GetScriptInterpreterForLanguage(lldb::ScriptLanguage script_lang,
return none_instance(debugger);
}
+#pragma mark SyntheticFrameProvider
+
+typedef PluginInstance<SyntheticFrameProviderCreateInstance>
+ SyntheticFrameProviderInstance;
+typedef PluginInstance<ScriptedFrameProviderCreateInstance>
+ ScriptedFrameProviderInstance;
+typedef PluginInstances<SyntheticFrameProviderInstance>
+ SyntheticFrameProviderInstances;
+typedef PluginInstances<ScriptedFrameProviderInstance>
+ ScriptedFrameProviderInstances;
+
+static SyntheticFrameProviderInstances &GetSyntheticFrameProviderInstances() {
+ static SyntheticFrameProviderInstances g_instances;
+ return g_instances;
+}
+
+static ScriptedFrameProviderInstances &GetScriptedFrameProviderInstances() {
+ static ScriptedFrameProviderInstances g_instances;
+ return g_instances;
+}
+
+bool PluginManager::RegisterPlugin(
+ llvm::StringRef name, llvm::StringRef description,
+ SyntheticFrameProviderCreateInstance create_native_callback,
+ ScriptedFrameProviderCreateInstance create_scripted_callback) {
+ if (create_native_callback)
+ return GetSyntheticFrameProviderInstances().RegisterPlugin(
+ name, description, create_native_callback);
+ else if (create_scripted_callback)
+ return GetScriptedFrameProviderInstances().RegisterPlugin(
+ name, description, create_scripted_callback);
+ return false;
+}
+
+bool PluginManager::UnregisterPlugin(
+ SyntheticFrameProviderCreateInstance create_callback) {
+ return GetSyntheticFrameProviderInstances().UnregisterPlugin(create_callback);
+}
+
+bool PluginManager::UnregisterPlugin(
+ ScriptedFrameProviderCreateInstance create_callback) {
+ return GetScriptedFrameProviderInstances().UnregisterPlugin(create_callback);
+}
+
+SyntheticFrameProviderCreateInstance
+PluginManager::GetSyntheticFrameProviderCreateCallbackForPluginName(
+ llvm::StringRef name) {
+ return GetSyntheticFrameProviderInstances().GetCallbackForName(name);
+}
+
+ScriptedFrameProviderCreateInstance
+PluginManager::GetScriptedFrameProviderCreateCallbackAtIndex(uint32_t idx) {
+ return GetScriptedFrameProviderInstances().GetCallbackAtIndex(idx);
+}
+
#pragma mark StructuredDataPlugin
struct StructuredDataPluginInstance
diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp
index 02d9d86fe537..f16035b5649e 100644
--- a/lldb/source/Core/Section.cpp
+++ b/lldb/source/Core/Section.cpp
@@ -471,8 +471,14 @@ bool Section::ContainsOnlyDebugInfo() const {
return false;
}
+bool Section::IsGOTSection() const {
+ return GetObjectFile()->IsGOTSection(*this);
+}
+
#pragma mark SectionList
+SectionList::SectionList(const SectionList &rhs) : m_sections(rhs.m_sections) {}
+
SectionList &SectionList::operator=(const SectionList &rhs) {
if (this != &rhs)
m_sections = rhs.m_sections;
@@ -683,6 +689,33 @@ uint64_t SectionList::GetDebugInfoSize() const {
return debug_info_size;
}
+SectionList SectionList::Merge(SectionList &lhs, SectionList &rhs,
+ MergeCallback filter) {
+ SectionList output_list;
+
+ // Iterate through all the sections in lhs and see if we have matches in
+ // the rhs list.
+ for (const auto &lhs_section : lhs) {
+ auto rhs_section = rhs.FindSectionByName(lhs_section->GetName());
+ if (rhs_section)
+ output_list.AddSection(filter(lhs_section, rhs_section));
+ else
+ output_list.AddSection(lhs_section);
+ }
+
+ // Now that we've visited all possible duplicates, we can iterate over
+ // the rhs and take any values not in lhs.
+ for (const auto &rhs_section : rhs) {
+ auto lhs_section = lhs.FindSectionByName(rhs_section->GetName());
+ // Because we already visited everything overlapping between rhs
+ // and lhs, any section not in lhs is unique and can be output.
+ if (!lhs_section)
+ output_list.AddSection(rhs_section);
+ }
+
+ return output_list;
+}
+
namespace llvm {
namespace json {
diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp
index 1b1922e71076..e2995b37429f 100644
--- a/lldb/source/Host/common/Editline.cpp
+++ b/lldb/source/Host/common/Editline.cpp
@@ -1626,6 +1626,9 @@ bool Editline::GetLine(std::string &line, bool &interrupted) {
m_editor_status = EditorStatus::Editing;
m_revert_cursor_index = -1;
+ lldbassert(m_output_stream_sp);
+ fprintf(m_locked_output->GetFile().GetStream(), "\r" ANSI_CLEAR_RIGHT);
+
int count;
auto input = el_wgets(m_editline, &count);
diff --git a/lldb/source/Interpreter/ScriptInterpreter.cpp b/lldb/source/Interpreter/ScriptInterpreter.cpp
index ca768db1199c..211868b51fac 100644
--- a/lldb/source/Interpreter/ScriptInterpreter.cpp
+++ b/lldb/source/Interpreter/ScriptInterpreter.cpp
@@ -150,6 +150,11 @@ ScriptInterpreter::GetOpaqueTypeFromSBExecutionContext(
return exe_ctx.m_exe_ctx_sp;
}
+lldb::StackFrameListSP ScriptInterpreter::GetOpaqueTypeFromSBFrameList(
+ const lldb::SBFrameList &frame_list) const {
+ return frame_list.m_opaque_sp;
+}
+
lldb::ScriptLanguage
ScriptInterpreter::StringToLanguage(const llvm::StringRef &language) {
if (language.equals_insensitive(LanguageToString(eScriptLanguageNone)))
diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
index 1d210ea78df1..2d0a4f67499e 100644
--- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
@@ -789,6 +789,7 @@ bool DynamicLoaderDarwinKernel::KextImageInfo::LoadImageUsingMemoryModule(
// Search for the kext on the local filesystem via the UUID
if (!m_module_sp && m_uuid.IsValid()) {
ModuleSpec module_spec;
+ module_spec.SetTarget(target.shared_from_this());
module_spec.GetUUID() = m_uuid;
if (!m_uuid.IsValid())
module_spec.GetArchitecture() = target.GetArchitecture();
@@ -801,9 +802,8 @@ bool DynamicLoaderDarwinKernel::KextImageInfo::LoadImageUsingMemoryModule(
// system.
PlatformSP platform_sp(target.GetPlatform());
if (platform_sp) {
- FileSpecList search_paths = target.GetExecutableSearchPaths();
- platform_sp->GetSharedModule(module_spec, process, m_module_sp,
- &search_paths, nullptr, nullptr);
+ platform_sp->GetSharedModule(module_spec, process, m_module_sp, nullptr,
+ nullptr);
}
// Ask the Target to find this file on the local system, if possible.
diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
index 326b6910b526..470fc2a2fdbb 100644
--- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
@@ -901,10 +901,9 @@ void DynamicLoaderPOSIXDYLD::ResolveExecutableModule(
if (module_sp && module_sp->MatchesModuleSpec(module_spec))
return;
+ module_spec.SetTarget(target.shared_from_this());
const auto executable_search_paths(Target::GetDefaultExecutableSearchPaths());
- auto error = platform_sp->ResolveExecutable(
- module_spec, module_sp,
- !executable_search_paths.IsEmpty() ? &executable_search_paths : nullptr);
+ auto error = platform_sp->ResolveExecutable(module_spec, module_sp);
if (error.Fail()) {
StreamString stream;
module_spec.Dump(stream);
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h
index ad4d060319e3..debf4761175b 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h
@@ -41,11 +41,11 @@ public:
/// The path to the exact module to be loaded. E.g., if the desired
/// module is std.io, then this should be { "std", "io" }.
///
- /// \param[in] exported_modules
+ /// \param[out] exported_modules
/// If non-NULL, a pointer to a vector to populate with the ID of every
/// module that is re-exported by the specified module.
///
- /// \param[in] error_stream
+ /// \param[out] error_stream
/// A stream to populate with the output of the Clang parser when
/// it tries to load the module.
///
@@ -63,11 +63,11 @@ public:
/// \param[in] cu
/// The compilation unit to scan for imported modules.
///
- /// \param[in] exported_modules
+ /// \param[out] exported_modules
/// A vector to populate with the ID of each module loaded (directly
/// and via re-exports) in this way.
///
- /// \param[in] error_stream
+ /// \param[out] error_stream
/// A stream to populate with the output of the Clang parser when
/// it tries to load the modules.
///
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index e06e69fb0830..3968715a6d21 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -130,6 +130,29 @@ private:
RelocUnion reloc;
};
+
+lldb::SectionSP MergeSections(lldb::SectionSP lhs, lldb::SectionSP rhs) {
+ assert(lhs && rhs);
+
+ lldb::ModuleSP lhs_module_parent = lhs->GetModule();
+ lldb::ModuleSP rhs_module_parent = rhs->GetModule();
+ assert(lhs_module_parent && rhs_module_parent);
+
+ // Do a sanity check, these should be the same.
+ if (lhs->GetFileAddress() != rhs->GetFileAddress())
+ lhs_module_parent->ReportWarning(
+ "Mismatch addresses for section {0} when "
+ "merging with {1}, expected: {2:x}, "
+ "actual: {3:x}",
+ lhs->GetTypeAsCString(),
+ rhs_module_parent->GetFileSpec().GetPathAsConstString().GetCString(),
+ lhs->GetByteSize(), rhs->GetByteSize());
+
+ // We want to take the greater of two sections. If LHS and RHS are both
+ // SHT_NOBITS, we should default to LHS. If RHS has a bigger section,
+ // indicating it has data that wasn't stripped, we should take that instead.
+ return rhs->GetFileSize() > lhs->GetFileSize() ? rhs : lhs;
+}
} // end anonymous namespace
ELFRelocation::ELFRelocation(unsigned type) {
@@ -1967,10 +1990,10 @@ void ObjectFileELF::CreateSections(SectionList &unified_section_list) {
provider.AddSection(std::move(*InfoOr), std::move(section_sp));
}
- // For eTypeDebugInfo files, the Symbol Vendor will take care of updating the
- // unified section list.
- if (GetType() != eTypeDebugInfo)
- unified_section_list = *m_sections_up;
+ // Merge the two adding any new sections, and overwriting any existing
+ // sections that are SHT_NOBITS
+ unified_section_list =
+ SectionList::Merge(unified_section_list, *m_sections_up, MergeSections);
// If there's a .gnu_debugdata section, we'll try to read the .symtab that's
// embedded in there and replace the one in the original object file (if any).
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index c8e520d687f6..2218c23db5a9 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -5936,6 +5936,20 @@ Section *ObjectFileMachO::GetMachHeaderSection() {
return nullptr;
}
+bool ObjectFileMachO::IsGOTSection(const lldb_private::Section &section) const {
+ assert(section.GetObjectFile() == this && "Wrong object file!");
+ SectionSP segment = section.GetParent();
+ if (!segment)
+ return false;
+
+ const bool is_data_const_got =
+ segment->GetName() == "__DATA_CONST" && section.GetName() == "__got";
+ const bool is_auth_const_ptr =
+ segment->GetName() == "__AUTH_CONST" &&
+ (section.GetName() == "__auth_got" || section.GetName() == "__auth_ptr");
+ return is_data_const_got || is_auth_const_ptr;
+}
+
bool ObjectFileMachO::SectionIsLoadable(const Section *section) {
if (!section)
return false;
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h
index 25643aacb3d2..5456f0315c94 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h
@@ -162,6 +162,8 @@ public:
lldb_private::Section *GetMachHeaderSection();
+ bool IsGOTSection(const lldb_private::Section &section) const override;
+
// PluginInterface protocol
llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp
index 4cfb0a81dc6e..47111c97927c 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp
@@ -90,7 +90,7 @@ void PlatformAppleSimulator::GetStatus(Stream &strm) {
if (!sdk.empty())
strm << " SDK Path: \"" << sdk << "\"\n";
else
- strm << " SDK Path: error: unable to locate SDK\n";
+ strm << " SDK Path: <unable to locate SDK>\n";
#if defined(__APPLE__)
// This will get called by subclasses, so just output status on the current
@@ -420,7 +420,6 @@ Status PlatformAppleSimulator::GetSymbolFile(const FileSpec &platform_file,
Status PlatformAppleSimulator::GetSharedModule(
const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr) {
// For iOS/tvOS/watchOS, the SDK files are all cached locally on the
// host system. So first we ask for the file in the cached SDK, then
@@ -432,12 +431,10 @@ Status PlatformAppleSimulator::GetSharedModule(
error = GetSymbolFile(platform_file, module_spec.GetUUIDPtr(),
platform_module_spec.GetFileSpec());
if (error.Success()) {
- error = ResolveExecutable(platform_module_spec, module_sp,
- module_search_paths_ptr);
+ error = ResolveExecutable(platform_module_spec, module_sp);
} else {
const bool always_create = false;
- error = ModuleList::GetSharedModule(module_spec, module_sp,
- module_search_paths_ptr, old_modules,
+ error = ModuleList::GetSharedModule(module_spec, module_sp, old_modules,
did_create_ptr, always_create);
}
if (module_sp)
@@ -660,4 +657,3 @@ void PlatformAppleSimulator::Terminate() {
PlatformDarwin::Terminate();
}
}
-
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h
index 7fcf2c502ca6..77d2a3b4e1cc 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h
@@ -89,7 +89,6 @@ public:
Status GetSharedModule(const ModuleSpec &module_spec, Process *process,
lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules,
bool *did_create_ptr) override;
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
index 5aad4470091b..8b4a3e0a7c3f 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
@@ -331,7 +331,6 @@ Status PlatformDarwin::ResolveSymbolFile(Target &target,
Status PlatformDarwin::GetSharedModule(
const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<ModuleSP> *old_modules, bool *did_create_ptr) {
Status error;
module_sp.reset();
@@ -341,19 +340,22 @@ Status PlatformDarwin::GetSharedModule(
// module first.
if (m_remote_platform_sp) {
error = m_remote_platform_sp->GetSharedModule(
- module_spec, process, module_sp, module_search_paths_ptr, old_modules,
- did_create_ptr);
+ module_spec, process, module_sp, old_modules, did_create_ptr);
}
}
if (!module_sp) {
// Fall back to the local platform and find the file locally
error = Platform::GetSharedModule(module_spec, process, module_sp,
- module_search_paths_ptr, old_modules,
- did_create_ptr);
+ old_modules, did_create_ptr);
const FileSpec &platform_file = module_spec.GetFileSpec();
- if (!module_sp && module_search_paths_ptr && platform_file) {
+ // Get module search paths from the target if available.
+ TargetSP target_sp = module_spec.GetTargetSP();
+ FileSpecList module_search_paths;
+ if (target_sp)
+ module_search_paths = target_sp->GetExecutableSearchPaths();
+ if (!module_sp && !module_search_paths.IsEmpty() && platform_file) {
// We can try to pull off part of the file path up to the bundle
// directory level and try any module search paths...
FileSpec bundle_directory;
@@ -362,9 +364,9 @@ Status PlatformDarwin::GetSharedModule(
ModuleSpec new_module_spec(module_spec);
new_module_spec.GetFileSpec() = bundle_directory;
if (Host::ResolveExecutableInBundle(new_module_spec.GetFileSpec())) {
- Status new_error(Platform::GetSharedModule(
- new_module_spec, process, module_sp, nullptr, old_modules,
- did_create_ptr));
+ Status new_error(Platform::GetSharedModule(new_module_spec, process,
+ module_sp, old_modules,
+ did_create_ptr));
if (module_sp)
return new_error;
@@ -376,10 +378,10 @@ Status PlatformDarwin::GetSharedModule(
const size_t bundle_directory_len =
bundle_directory.GetPath(bundle_dir, sizeof(bundle_dir));
char new_path[PATH_MAX];
- size_t num_module_search_paths = module_search_paths_ptr->GetSize();
+ size_t num_module_search_paths = module_search_paths.GetSize();
for (size_t i = 0; i < num_module_search_paths; ++i) {
const size_t search_path_len =
- module_search_paths_ptr->GetFileSpecAtIndex(i).GetPath(
+ module_search_paths.GetFileSpecAtIndex(i).GetPath(
new_path, sizeof(new_path));
if (search_path_len < sizeof(new_path)) {
snprintf(new_path + search_path_len,
@@ -390,7 +392,7 @@ Status PlatformDarwin::GetSharedModule(
ModuleSpec new_module_spec(module_spec);
new_module_spec.GetFileSpec() = new_file_spec;
Status new_error(Platform::GetSharedModule(
- new_module_spec, process, module_sp, nullptr, old_modules,
+ new_module_spec, process, module_sp, old_modules,
did_create_ptr));
if (module_sp) {
@@ -1303,12 +1305,15 @@ PlatformDarwin::LaunchProcess(lldb_private::ProcessLaunchInfo &launch_info) {
lldb_private::Status PlatformDarwin::FindBundleBinaryInExecSearchPaths(
const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<ModuleSP> *old_modules, bool *did_create_ptr) {
const FileSpec &platform_file = module_spec.GetFileSpec();
- // See if the file is present in any of the module_search_paths_ptr
+ TargetSP target_sp = module_spec.GetTargetSP();
+ FileSpecList module_search_paths;
+ if (target_sp)
+ module_search_paths = target_sp->GetExecutableSearchPaths();
+ // See if the file is present in any of the module_search_paths
// directories.
- if (!module_sp && module_search_paths_ptr && platform_file) {
+ if (!module_sp && !module_search_paths.IsEmpty() && platform_file) {
// create a vector of all the file / directory names in platform_file e.g.
// this might be
// /System/Library/PrivateFrameworks/UIFoundation.framework/UIFoundation
@@ -1322,21 +1327,21 @@ lldb_private::Status PlatformDarwin::FindBundleBinaryInExecSearchPaths(
std::reverse(path_parts.begin(), path_parts.end());
const size_t path_parts_size = path_parts.size();
- size_t num_module_search_paths = module_search_paths_ptr->GetSize();
+ size_t num_module_search_paths = module_search_paths.GetSize();
for (size_t i = 0; i < num_module_search_paths; ++i) {
Log *log_verbose = GetLog(LLDBLog::Host);
LLDB_LOGF(
log_verbose,
"PlatformRemoteDarwinDevice::GetSharedModule searching for binary in "
"search-path %s",
- module_search_paths_ptr->GetFileSpecAtIndex(i).GetPath().c_str());
+ module_search_paths.GetFileSpecAtIndex(i).GetPath().c_str());
// Create a new FileSpec with this module_search_paths_ptr plus just the
// filename ("UIFoundation"), then the parent dir plus filename
// ("UIFoundation.framework/UIFoundation") etc - up to four names (to
// handle "Foo.framework/Contents/MacOS/Foo")
for (size_t j = 0; j < 4 && j < path_parts_size - 1; ++j) {
- FileSpec path_to_try(module_search_paths_ptr->GetFileSpecAtIndex(i));
+ FileSpec path_to_try(module_search_paths.GetFileSpecAtIndex(i));
// Add the components backwards. For
// .../PrivateFrameworks/UIFoundation.framework/UIFoundation path_parts
@@ -1356,9 +1361,9 @@ lldb_private::Status PlatformDarwin::FindBundleBinaryInExecSearchPaths(
if (FileSystem::Instance().Exists(path_to_try)) {
ModuleSpec new_module_spec(module_spec);
new_module_spec.GetFileSpec() = path_to_try;
- Status new_error(
- Platform::GetSharedModule(new_module_spec, process, module_sp,
- nullptr, old_modules, did_create_ptr));
+ Status new_error(Platform::GetSharedModule(new_module_spec, process,
+ module_sp, old_modules,
+ did_create_ptr));
if (module_sp) {
module_sp->SetPlatformFileSpec(path_to_try);
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h
index f8a62ceb958f..82e69e36dca0 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h
@@ -73,7 +73,6 @@ public:
Status GetSharedModule(const ModuleSpec &module_spec, Process *process,
lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules,
bool *did_create_ptr) override;
@@ -189,7 +188,7 @@ protected:
Status FindBundleBinaryInExecSearchPaths(
const ModuleSpec &module_spec, Process *process,
- lldb::ModuleSP &module_sp, const FileSpecList *module_search_paths_ptr,
+ lldb::ModuleSP &module_sp,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr);
// The OSType where lldb is running.
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.cpp
index 68ef81789b08..a72d94ea79c4 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.cpp
@@ -295,7 +295,6 @@ BringInRemoteFile(Platform *platform,
lldb_private::Status PlatformDarwinDevice::GetSharedModuleWithLocalCache(
const lldb_private::ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
- const lldb_private::FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr) {
Log *log = GetLog(LLDBLog::Platform);
@@ -329,8 +328,7 @@ lldb_private::Status PlatformDarwinDevice::GetSharedModuleWithLocalCache(
ModuleSpec shared_cache_spec(module_spec.GetFileSpec(), image_info.uuid,
image_info.data_sp);
err = ModuleList::GetSharedModule(shared_cache_spec, module_sp,
- module_search_paths_ptr, old_modules,
- did_create_ptr);
+ old_modules, did_create_ptr);
if (module_sp) {
LLDB_LOGF(log, "[%s] module %s was found in the in-memory shared cache",
(IsHost() ? "host" : "remote"),
@@ -348,8 +346,7 @@ lldb_private::Status PlatformDarwinDevice::GetSharedModuleWithLocalCache(
FileSystem::Instance().Resolve(device_support_spec);
if (FileSystem::Instance().Exists(device_support_spec)) {
ModuleSpec local_spec(device_support_spec, module_spec.GetUUID());
- err = ModuleList::GetSharedModule(local_spec, module_sp,
- module_search_paths_ptr, old_modules,
+ err = ModuleList::GetSharedModule(local_spec, module_sp, old_modules,
did_create_ptr);
if (module_sp) {
LLDB_LOGF(log,
@@ -363,8 +360,7 @@ lldb_private::Status PlatformDarwinDevice::GetSharedModuleWithLocalCache(
}
}
- err = ModuleList::GetSharedModule(module_spec, module_sp,
- module_search_paths_ptr, old_modules,
+ err = ModuleList::GetSharedModule(module_spec, module_sp, old_modules,
did_create_ptr);
if (module_sp)
return err;
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.h
index e1eba08fb558..e0142ab7ca4c 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.h
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinDevice.h
@@ -26,7 +26,6 @@ public:
protected:
virtual Status GetSharedModuleWithLocalCache(
const ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr);
struct SDKDirectoryInfo {
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp
index 07c5a523161e..04e87b9dea69 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp
@@ -719,7 +719,6 @@ void PlatformDarwinKernel::UpdateKextandKernelsLocalScan() {
Status PlatformDarwinKernel::GetSharedModule(
const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<ModuleSP> *old_modules, bool *did_create_ptr) {
Status error;
module_sp.reset();
@@ -734,14 +733,12 @@ Status PlatformDarwinKernel::GetSharedModule(
// UUID search can get here with no name - and it may be a kernel.
if (kext_bundle_id == "mach_kernel" || kext_bundle_id.empty()) {
error = GetSharedModuleKernel(module_spec, process, module_sp,
- module_search_paths_ptr, old_modules,
- did_create_ptr);
+ old_modules, did_create_ptr);
if (error.Success() && module_sp) {
return error;
}
} else {
- return GetSharedModuleKext(module_spec, process, module_sp,
- module_search_paths_ptr, old_modules,
+ return GetSharedModuleKext(module_spec, process, module_sp, old_modules,
did_create_ptr);
}
}
@@ -749,13 +746,11 @@ Status PlatformDarwinKernel::GetSharedModule(
// Give the generic methods, including possibly calling into DebugSymbols
// framework on macOS systems, a chance.
return PlatformDarwin::GetSharedModule(module_spec, process, module_sp,
- module_search_paths_ptr, old_modules,
- did_create_ptr);
+ old_modules, did_create_ptr);
}
Status PlatformDarwinKernel::GetSharedModuleKext(
const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<ModuleSP> *old_modules, bool *did_create_ptr) {
Status error;
module_sp.reset();
@@ -782,8 +777,7 @@ Status PlatformDarwinKernel::GetSharedModuleKext(
// Give the generic methods, including possibly calling into DebugSymbols
// framework on macOS systems, a chance.
error = PlatformDarwin::GetSharedModule(module_spec, process, module_sp,
- module_search_paths_ptr, old_modules,
- did_create_ptr);
+ old_modules, did_create_ptr);
if (error.Success() && module_sp.get()) {
return error;
}
@@ -793,7 +787,6 @@ Status PlatformDarwinKernel::GetSharedModuleKext(
Status PlatformDarwinKernel::GetSharedModuleKernel(
const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<ModuleSP> *old_modules, bool *did_create_ptr) {
assert(module_sp.get() == nullptr);
UpdateKextandKernelsLocalScan();
@@ -848,8 +841,7 @@ Status PlatformDarwinKernel::GetSharedModuleKernel(
// Give the generic methods, including possibly calling into DebugSymbols
// framework on macOS systems, a chance.
return PlatformDarwin::GetSharedModule(module_spec, process, module_sp,
- module_search_paths_ptr, old_modules,
- did_create_ptr);
+ old_modules, did_create_ptr);
}
std::vector<lldb_private::FileSpec>
@@ -888,8 +880,8 @@ Status PlatformDarwinKernel::ExamineKextForMatchingUUID(
ModuleSP module_sp(new Module(exe_spec));
if (module_sp && module_sp->GetObjectFile() &&
module_sp->MatchesModuleSpec(exe_spec)) {
- Status error = ModuleList::GetSharedModule(exe_spec, exe_module_sp,
- NULL, NULL, NULL);
+ Status error =
+ ModuleList::GetSharedModule(exe_spec, exe_module_sp, NULL, NULL);
if (exe_module_sp && exe_module_sp->GetObjectFile()) {
return error;
}
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h
index 9db9c0065613..b5cf701a76b4 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h
@@ -60,7 +60,6 @@ public:
Status GetSharedModule(const ModuleSpec &module_spec, Process *process,
lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules,
bool *did_create_ptr) override;
@@ -142,14 +141,14 @@ protected:
Status GetSharedModuleKext(const ModuleSpec &module_spec, Process *process,
lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules,
bool *did_create_ptr);
- Status GetSharedModuleKernel(
- const ModuleSpec &module_spec, Process *process,
- lldb::ModuleSP &module_sp, const FileSpecList *module_search_paths_ptr,
- llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr);
+ Status
+ GetSharedModuleKernel(const ModuleSpec &module_spec, Process *process,
+ lldb::ModuleSP &module_sp,
+ llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules,
+ bool *did_create_ptr);
Status ExamineKextForMatchingUUID(const FileSpec &kext_bundle_path,
const UUID &uuid, const ArchSpec &arch,
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp
index dad6dcd13395..e6ea75a35f92 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp
@@ -182,10 +182,8 @@ PlatformMacOSX::GetSupportedArchitectures(const ArchSpec &process_host_arch) {
lldb_private::Status PlatformMacOSX::GetSharedModule(
const lldb_private::ModuleSpec &module_spec, Process *process,
lldb::ModuleSP &module_sp,
- const lldb_private::FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr) {
Status error = GetSharedModuleWithLocalCache(module_spec, module_sp,
- module_search_paths_ptr,
old_modules, did_create_ptr);
if (module_sp) {
@@ -199,9 +197,9 @@ lldb_private::Status PlatformMacOSX::GetSharedModule(
lldb::ModuleSP x86_64_module_sp;
llvm::SmallVector<lldb::ModuleSP, 1> old_x86_64_modules;
bool did_create = false;
- Status x86_64_error = GetSharedModuleWithLocalCache(
- module_spec_x86_64, x86_64_module_sp, module_search_paths_ptr,
- &old_x86_64_modules, &did_create);
+ Status x86_64_error =
+ GetSharedModuleWithLocalCache(module_spec_x86_64, x86_64_module_sp,
+ &old_x86_64_modules, &did_create);
if (x86_64_module_sp && x86_64_module_sp->GetObjectFile()) {
module_sp = x86_64_module_sp;
if (old_modules)
@@ -217,7 +215,6 @@ lldb_private::Status PlatformMacOSX::GetSharedModule(
if (!module_sp) {
error = FindBundleBinaryInExecSearchPaths(module_spec, process, module_sp,
- module_search_paths_ptr,
old_modules, did_create_ptr);
}
return error;
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h
index be844856ef92..9555b16551d5 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h
@@ -48,7 +48,6 @@ public:
Status GetSharedModule(const ModuleSpec &module_spec, Process *process,
lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules,
bool *did_create_ptr) override;
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp
index b83d07b19235..53fab93f5e70 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp
@@ -53,7 +53,7 @@ void PlatformRemoteDarwinDevice::GetStatus(Stream &strm) {
if (sdk_directory)
strm.Printf(" SDK Path: \"%s\"\n", sdk_directory);
else
- strm.PutCString(" SDK Path: error: unable to locate SDK\n");
+ strm.PutCString(" SDK Path: <unable to locate SDK>\n");
const uint32_t num_sdk_infos = m_sdk_directory_infos.size();
for (uint32_t i = 0; i < num_sdk_infos; ++i) {
@@ -158,7 +158,6 @@ Status PlatformRemoteDarwinDevice::GetSymbolFile(const FileSpec &platform_file,
Status PlatformRemoteDarwinDevice::GetSharedModule(
const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<ModuleSP> *old_modules, bool *did_create_ptr) {
// For iOS, the SDK files are all cached locally on the host system. So first
// we ask for the file in the cached SDK, then we attempt to get a shared
@@ -185,7 +184,7 @@ Status PlatformRemoteDarwinDevice::GetSharedModule(
if (GetFileInSDK(platform_file_path, connected_sdk_idx,
platform_module_spec.GetFileSpec())) {
module_sp.reset();
- error = ResolveExecutable(platform_module_spec, module_sp, nullptr);
+ error = ResolveExecutable(platform_module_spec, module_sp);
if (module_sp) {
m_last_module_sdk_idx = connected_sdk_idx;
error.Clear();
@@ -202,7 +201,7 @@ Status PlatformRemoteDarwinDevice::GetSharedModule(
if (GetFileInSDK(platform_file_path, m_last_module_sdk_idx,
platform_module_spec.GetFileSpec())) {
module_sp.reset();
- error = ResolveExecutable(platform_module_spec, module_sp, nullptr);
+ error = ResolveExecutable(platform_module_spec, module_sp);
if (module_sp) {
error.Clear();
return error;
@@ -224,7 +223,7 @@ Status PlatformRemoteDarwinDevice::GetSharedModule(
if (GetFileInSDK(platform_file_path, current_sdk_idx,
platform_module_spec.GetFileSpec())) {
module_sp.reset();
- error = ResolveExecutable(platform_module_spec, module_sp, nullptr);
+ error = ResolveExecutable(platform_module_spec, module_sp);
if (module_sp) {
m_last_module_sdk_idx = current_sdk_idx;
error.Clear();
@@ -245,7 +244,7 @@ Status PlatformRemoteDarwinDevice::GetSharedModule(
platform_module_spec.GetFileSpec())) {
// printf ("sdk[%u]: '%s'\n", sdk_idx, local_file.GetPath().c_str());
- error = ResolveExecutable(platform_module_spec, module_sp, nullptr);
+ error = ResolveExecutable(platform_module_spec, module_sp);
if (module_sp) {
// Remember the index of the last SDK that we found a file in in case
// the wrong SDK was selected.
@@ -261,8 +260,7 @@ Status PlatformRemoteDarwinDevice::GetSharedModule(
// This may not be an SDK-related module. Try whether we can bring in the
// thing to our local cache.
- error = GetSharedModuleWithLocalCache(module_spec, module_sp,
- module_search_paths_ptr, old_modules,
+ error = GetSharedModuleWithLocalCache(module_spec, module_sp, old_modules,
did_create_ptr);
if (error.Success())
return error;
@@ -271,15 +269,13 @@ Status PlatformRemoteDarwinDevice::GetSharedModule(
// directories.
if (!module_sp)
error = PlatformDarwin::FindBundleBinaryInExecSearchPaths(
- module_spec, process, module_sp, module_search_paths_ptr, old_modules,
- did_create_ptr);
+ module_spec, process, module_sp, old_modules, did_create_ptr);
if (error.Success())
return error;
const bool always_create = false;
- error = ModuleList::GetSharedModule(module_spec, module_sp,
- module_search_paths_ptr, old_modules,
+ error = ModuleList::GetSharedModule(module_spec, module_sp, old_modules,
did_create_ptr, always_create);
if (module_sp)
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.h
index 557f4876e91a..4abd74ed0758 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.h
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.h
@@ -47,7 +47,6 @@ public:
Status GetSharedModule(const ModuleSpec &module_spec, Process *process,
lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules,
bool *did_create_ptr) override;
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
index b7029fb3a95b..f8e33eac614a 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
@@ -84,8 +84,9 @@ bool ProcessElfCore::CanDebug(lldb::TargetSP target_sp,
// For now we are just making sure the file exists for a given module
if (!m_core_module_sp && FileSystem::Instance().Exists(m_core_file)) {
ModuleSpec core_module_spec(m_core_file, target_sp->GetArchitecture());
+ core_module_spec.SetTarget(target_sp);
Status error(ModuleList::GetSharedModule(core_module_spec, m_core_module_sp,
- nullptr, nullptr, nullptr));
+ nullptr, nullptr));
if (m_core_module_sp) {
ObjectFile *core_objfile = m_core_module_sp->GetObjectFile();
if (core_objfile && core_objfile->GetType() == ObjectFile::eTypeCoreFile)
diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
index a780b3f59ade..83d684e9ca52 100644
--- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
+++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
@@ -95,8 +95,9 @@ bool ProcessMachCore::CanDebug(lldb::TargetSP target_sp,
// header but we should still try to use it -
// ModuleSpecList::FindMatchingModuleSpec enforces a strict arch mach.
ModuleSpec core_module_spec(m_core_file);
+ core_module_spec.SetTarget(target_sp);
Status error(ModuleList::GetSharedModule(core_module_spec, m_core_module_sp,
- nullptr, nullptr, nullptr));
+ nullptr, nullptr));
if (m_core_module_sp) {
ObjectFile *core_objfile = m_core_module_sp->GetObjectFile();
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedFrame.h b/lldb/source/Plugins/Process/scripted/ScriptedFrame.h
index 6e01e2fd7653..b6b77c4a7d16 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedFrame.h
+++ b/lldb/source/Plugins/Process/scripted/ScriptedFrame.h
@@ -9,7 +9,6 @@
#ifndef LLDB_SOURCE_PLUGINS_SCRIPTED_FRAME_H
#define LLDB_SOURCE_PLUGINS_SCRIPTED_FRAME_H
-#include "Plugins/Process/Utility/RegisterContextMemory.h"
#include "ScriptedThread.h"
#include "lldb/Interpreter/ScriptInterpreter.h"
#include "lldb/Target/DynamicRegisterInfo.h"
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt
index 09103573b89c..50569cdefaaf 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt
@@ -23,6 +23,7 @@ add_lldb_library(lldbPluginScriptInterpreterPythonInterfaces PLUGIN
OperatingSystemPythonInterface.cpp
ScriptInterpreterPythonInterfaces.cpp
ScriptedFramePythonInterface.cpp
+ ScriptedFrameProviderPythonInterface.cpp
ScriptedPlatformPythonInterface.cpp
ScriptedProcessPythonInterface.cpp
ScriptedPythonInterface.cpp
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.h
index 3814f4661507..b2a347951d0f 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.h
@@ -17,6 +17,7 @@
#include "OperatingSystemPythonInterface.h"
#include "ScriptedBreakpointPythonInterface.h"
+#include "ScriptedFrameProviderPythonInterface.h"
#include "ScriptedFramePythonInterface.h"
#include "ScriptedPlatformPythonInterface.h"
#include "ScriptedProcessPythonInterface.h"
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp
new file mode 100644
index 000000000000..b866bf332b7b
--- /dev/null
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Host/Config.h"
+#include "lldb/Target/Thread.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/lldb-enumerations.h"
+
+#if LLDB_ENABLE_PYTHON
+
+// LLDB Python header must be included first
+#include "../lldb-python.h"
+
+#include "../SWIGPythonBridge.h"
+#include "../ScriptInterpreterPythonImpl.h"
+#include "ScriptedFrameProviderPythonInterface.h"
+#include <optional>
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace lldb_private::python;
+using Locker = ScriptInterpreterPythonImpl::Locker;
+
+ScriptedFrameProviderPythonInterface::ScriptedFrameProviderPythonInterface(
+ ScriptInterpreterPythonImpl &interpreter)
+ : ScriptedFrameProviderInterface(), ScriptedPythonInterface(interpreter) {}
+
+llvm::Expected<StructuredData::GenericSP>
+ScriptedFrameProviderPythonInterface::CreatePluginObject(
+ const llvm::StringRef class_name, lldb::StackFrameListSP input_frames,
+ StructuredData::DictionarySP args_sp) {
+ if (!input_frames)
+ return llvm::createStringError("Invalid frame list");
+
+ StructuredDataImpl sd_impl(args_sp);
+ return ScriptedPythonInterface::CreatePluginObject(class_name, nullptr,
+ input_frames, sd_impl);
+}
+
+StructuredData::ObjectSP
+ScriptedFrameProviderPythonInterface::GetFrameAtIndex(uint32_t index) {
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("get_frame_at_index", error, index);
+
+ if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj,
+ error))
+ return {};
+
+ return obj;
+}
+
+#endif
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h
new file mode 100644
index 000000000000..fd163984028d
--- /dev/null
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDFRAMEPROVIDERPYTHONINTERFACE_H
+#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDFRAMEPROVIDERPYTHONINTERFACE_H
+
+#include "lldb/Host/Config.h"
+
+#if LLDB_ENABLE_PYTHON
+
+#include "ScriptedPythonInterface.h"
+#include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h"
+#include <optional>
+
+namespace lldb_private {
+class ScriptedFrameProviderPythonInterface
+ : public ScriptedFrameProviderInterface,
+ public ScriptedPythonInterface {
+public:
+ ScriptedFrameProviderPythonInterface(
+ ScriptInterpreterPythonImpl &interpreter);
+
+ llvm::Expected<StructuredData::GenericSP>
+ CreatePluginObject(llvm::StringRef class_name,
+ lldb::StackFrameListSP input_frames,
+ StructuredData::DictionarySP args_sp) override;
+
+ llvm::SmallVector<AbstractMethodRequirement>
+ GetAbstractMethodRequirements() const override {
+ return llvm::SmallVector<AbstractMethodRequirement>(
+ {{"get_frame_at_index"}});
+ }
+
+ StructuredData::ObjectSP GetFrameAtIndex(uint32_t index) override;
+};
+} // namespace lldb_private
+
+#endif // LLDB_ENABLE_PYTHON
+#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDFRAMEPROVIDERPYTHONINTERFACE_H
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp
index 4fdf2b12a550..af2e0b5df4d2 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp
@@ -243,4 +243,21 @@ ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::DescriptionLevel>(
return static_cast<lldb::DescriptionLevel>(unsigned_val);
}
+template <>
+lldb::StackFrameListSP
+ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::StackFrameListSP>(
+ python::PythonObject &p, Status &error) {
+
+ lldb::SBFrameList *sb_frame_list = reinterpret_cast<lldb::SBFrameList *>(
+ python::LLDBSWIGPython_CastPyObjectToSBFrameList(p.get()));
+
+ if (!sb_frame_list) {
+ error = Status::FromErrorStringWithFormat(
+ "couldn't cast lldb::SBFrameList to lldb::StackFrameListSP.");
+ return {};
+ }
+
+ return m_interpreter.GetOpaqueTypeFromSBFrameList(*sb_frame_list);
+}
+
#endif
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h
index 2335b2ef0f17..ec1dd9910d8a 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h
@@ -444,6 +444,14 @@ protected:
return python::SWIGBridge::ToSWIGWrapper(arg);
}
+ python::PythonObject Transform(lldb::ThreadSP arg) {
+ return python::SWIGBridge::ToSWIGWrapper(arg);
+ }
+
+ python::PythonObject Transform(lldb::StackFrameListSP arg) {
+ return python::SWIGBridge::ToSWIGWrapper(arg);
+ }
+
python::PythonObject Transform(lldb::ThreadPlanSP arg) {
return python::SWIGBridge::ToSWIGWrapper(arg);
}
@@ -628,6 +636,11 @@ lldb::DescriptionLevel
ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::DescriptionLevel>(
python::PythonObject &p, Status &error);
+template <>
+lldb::StackFrameListSP
+ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::StackFrameListSP>(
+ python::PythonObject &p, Status &error);
+
} // namespace lldb_private
#endif // LLDB_ENABLE_PYTHON
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
index 27f5d2ee471c..2c971262fc34 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
@@ -93,6 +93,7 @@ public:
static PythonObject ToSWIGWrapper(const StructuredDataImpl &data_impl);
static PythonObject ToSWIGWrapper(lldb::ThreadSP thread_sp);
static PythonObject ToSWIGWrapper(lldb::StackFrameSP frame_sp);
+ static PythonObject ToSWIGWrapper(lldb::StackFrameListSP frames_sp);
static PythonObject ToSWIGWrapper(lldb::DebuggerSP debugger_sp);
static PythonObject ToSWIGWrapper(lldb::WatchpointSP watchpoint_sp);
static PythonObject ToSWIGWrapper(lldb::BreakpointLocationSP bp_loc_sp);
@@ -269,6 +270,7 @@ void *LLDBSWIGPython_CastPyObjectToSBSymbolContext(PyObject *data);
void *LLDBSWIGPython_CastPyObjectToSBValue(PyObject *data);
void *LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo(PyObject *data);
void *LLDBSWIGPython_CastPyObjectToSBExecutionContext(PyObject *data);
+void *LLDBSWIGPython_CastPyObjectToSBFrameList(PyObject *data);
} // namespace python
} // namespace lldb_private
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index d257a08a2c62..3493fa9fef63 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -1526,6 +1526,11 @@ ScriptInterpreterPythonImpl::CreateScriptedFrameInterface() {
return std::make_shared<ScriptedFramePythonInterface>(*this);
}
+ScriptedFrameProviderInterfaceSP
+ScriptInterpreterPythonImpl::CreateScriptedFrameProviderInterface() {
+ return std::make_shared<ScriptedFrameProviderPythonInterface>(*this);
+}
+
ScriptedThreadPlanInterfaceSP
ScriptInterpreterPythonImpl::CreateScriptedThreadPlanInterface() {
return std::make_shared<ScriptedThreadPlanPythonInterface>(*this);
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h
index 00ae59c1c424..ad2ddd2219e8 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h
@@ -101,6 +101,9 @@ public:
lldb::ScriptedFrameInterfaceSP CreateScriptedFrameInterface() override;
+ lldb::ScriptedFrameProviderInterfaceSP
+ CreateScriptedFrameProviderInterface() override;
+
lldb::ScriptedThreadPlanInterfaceSP
CreateScriptedThreadPlanInterface() override;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 881268bc4ca0..f00e94aee984 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -2018,7 +2018,7 @@ void SymbolFileDWARF::UpdateExternalModuleListIfNeeded() {
}
Status error = ModuleList::GetSharedModule(dwo_module_spec, module_sp,
- nullptr, nullptr, nullptr);
+ nullptr, nullptr);
if (!module_sp) {
// ReportWarning also rate-limits based on the warning string,
// but in a -gmodules build, each object file has a similar DAG
diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt
index 8e6d51efad1f..cff59049cdce 100644
--- a/lldb/source/Target/CMakeLists.txt
+++ b/lldb/source/Target/CMakeLists.txt
@@ -38,6 +38,7 @@ add_lldb_library(lldbTarget
RegisterNumber.cpp
RemoteAwarePlatform.cpp
ScriptedThreadPlan.cpp
+ SyntheticFrameProvider.cpp
SectionLoadHistory.cpp
SectionLoadList.cpp
StackFrame.cpp
diff --git a/lldb/source/Target/ModuleCache.cpp b/lldb/source/Target/ModuleCache.cpp
index f737836e0d97..997894610545 100644
--- a/lldb/source/Target/ModuleCache.cpp
+++ b/lldb/source/Target/ModuleCache.cpp
@@ -255,7 +255,7 @@ Status ModuleCache::Get(const FileSpec &root_dir_spec, const char *hostname,
cached_module_spec.GetPlatformFileSpec() = module_spec.GetFileSpec();
error = ModuleList::GetSharedModule(cached_module_spec, cached_module_sp,
- nullptr, nullptr, did_create_ptr, false);
+ nullptr, did_create_ptr, false);
if (error.Fail())
return error;
diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
index 8681adaf5ea7..5b0930cf26b7 100644
--- a/lldb/source/Target/Platform.cpp
+++ b/lldb/source/Target/Platform.cpp
@@ -163,11 +163,12 @@ Platform::LocateExecutableScriptingResources(Target *target, Module &module,
Status Platform::GetSharedModule(
const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
llvm::SmallVectorImpl<lldb::ModuleSP> *old_modules, bool *did_create_ptr) {
if (IsHost())
- return ModuleList::GetSharedModule(module_spec, module_sp,
- module_search_paths_ptr, old_modules,
+ // Note: module_search_paths_ptr functionality is now handled internally
+ // by getting target from module_spec and calling
+ // target->GetExecutableSearchPaths()
+ return ModuleList::GetSharedModule(module_spec, module_sp, old_modules,
did_create_ptr, false);
// Module resolver lambda.
@@ -180,16 +181,14 @@ Status Platform::GetSharedModule(
resolved_spec = spec;
resolved_spec.GetFileSpec().PrependPathComponent(m_sdk_sysroot);
// Try to get shared module with resolved spec.
- error = ModuleList::GetSharedModule(resolved_spec, module_sp,
- module_search_paths_ptr, old_modules,
+ error = ModuleList::GetSharedModule(resolved_spec, module_sp, old_modules,
did_create_ptr, false);
}
// If we don't have sysroot or it didn't work then
// try original module spec.
if (!error.Success()) {
resolved_spec = spec;
- error = ModuleList::GetSharedModule(resolved_spec, module_sp,
- module_search_paths_ptr, old_modules,
+ error = ModuleList::GetSharedModule(resolved_spec, module_sp, old_modules,
did_create_ptr, false);
}
if (error.Success() && module_sp)
@@ -731,10 +730,8 @@ bool Platform::SetOSVersion(llvm::VersionTuple version) {
return false;
}
-Status
-Platform::ResolveExecutable(const ModuleSpec &module_spec,
- lldb::ModuleSP &exe_module_sp,
- const FileSpecList *module_search_paths_ptr) {
+Status Platform::ResolveExecutable(const ModuleSpec &module_spec,
+ lldb::ModuleSP &exe_module_sp) {
// We may connect to a process and use the provided executable (Don't use
// local $PATH).
@@ -750,9 +747,8 @@ Platform::ResolveExecutable(const ModuleSpec &module_spec,
if (resolved_module_spec.GetArchitecture().IsValid() ||
resolved_module_spec.GetUUID().IsValid()) {
- Status error =
- ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
- module_search_paths_ptr, nullptr, nullptr);
+ Status error = ModuleList::GetSharedModule(resolved_module_spec,
+ exe_module_sp, nullptr, nullptr);
if (exe_module_sp && exe_module_sp->GetObjectFile())
return error;
@@ -767,9 +763,9 @@ Platform::ResolveExecutable(const ModuleSpec &module_spec,
Status error;
for (const ArchSpec &arch : GetSupportedArchitectures(process_host_arch)) {
resolved_module_spec.GetArchitecture() = arch;
- error =
- ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
- module_search_paths_ptr, nullptr, nullptr);
+
+ error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
+ nullptr, nullptr);
if (error.Success()) {
if (exe_module_sp && exe_module_sp->GetObjectFile())
break;
@@ -1446,16 +1442,13 @@ const std::vector<ConstString> &Platform::GetTrapHandlerSymbolNames() {
return m_trap_handlers;
}
-Status
-Platform::GetCachedExecutable(ModuleSpec &module_spec,
- lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr) {
+Status Platform::GetCachedExecutable(ModuleSpec &module_spec,
+ lldb::ModuleSP &module_sp) {
FileSpec platform_spec = module_spec.GetFileSpec();
Status error = GetRemoteSharedModule(
module_spec, nullptr, module_sp,
[&](const ModuleSpec &spec) {
- return Platform::ResolveExecutable(spec, module_sp,
- module_search_paths_ptr);
+ return Platform::ResolveExecutable(spec, module_sp);
},
nullptr);
if (error.Success()) {
@@ -1497,7 +1490,7 @@ Status Platform::GetRemoteSharedModule(const ModuleSpec &module_spec,
for (const ArchSpec &arch : GetSupportedArchitectures(process_host_arch)) {
arch_module_spec.GetArchitecture() = arch;
error = ModuleList::GetSharedModule(arch_module_spec, module_sp, nullptr,
- nullptr, nullptr);
+ nullptr);
// Did we find an executable using one of the
if (error.Success() && module_sp)
break;
@@ -1673,11 +1666,12 @@ void Platform::CallLocateModuleCallbackIfSet(const ModuleSpec &module_spec,
cached_module_spec.GetUUID().Clear(); // Clear UUID since it may contain md5
// content hash instead of real UUID.
cached_module_spec.GetFileSpec() = module_file_spec;
+ cached_module_spec.GetSymbolFileSpec() = symbol_file_spec;
cached_module_spec.GetPlatformFileSpec() = module_spec.GetFileSpec();
cached_module_spec.SetObjectOffset(0);
error = ModuleList::GetSharedModule(cached_module_spec, module_sp, nullptr,
- nullptr, did_create_ptr, false);
+ did_create_ptr, false, false);
if (error.Success() && module_sp) {
// Succeeded to load the module file.
LLDB_LOGF(log, "%s: locate module callback succeeded: module=%s symbol=%s",
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 42ce198a283d..69edea503002 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -3256,6 +3256,7 @@ Status Process::ConnectRemote(llvm::StringRef remote_url) {
if (state == eStateStopped || state == eStateCrashed) {
// If we attached and actually have a process on the other end, then
// this ended up being the equivalent of an attach.
+ SetShouldDetach(true);
CompleteAttach();
// This delays passing the stopped event to listeners till
diff --git a/lldb/source/Target/RemoteAwarePlatform.cpp b/lldb/source/Target/RemoteAwarePlatform.cpp
index cac738ea67b4..89b946ba7516 100644
--- a/lldb/source/Target/RemoteAwarePlatform.cpp
+++ b/lldb/source/Target/RemoteAwarePlatform.cpp
@@ -29,9 +29,8 @@ bool RemoteAwarePlatform::GetModuleSpec(const FileSpec &module_file_spec,
return false;
}
-Status RemoteAwarePlatform::ResolveExecutable(
- const ModuleSpec &module_spec, lldb::ModuleSP &exe_module_sp,
- const FileSpecList *module_search_paths_ptr) {
+Status RemoteAwarePlatform::ResolveExecutable(const ModuleSpec &module_spec,
+ lldb::ModuleSP &exe_module_sp) {
ModuleSpec resolved_module_spec(module_spec);
// The host platform can resolve the path more aggressively.
@@ -47,12 +46,10 @@ Status RemoteAwarePlatform::ResolveExecutable(
if (!FileSystem::Instance().Exists(resolved_file_spec))
FileSystem::Instance().ResolveExecutableLocation(resolved_file_spec);
} else if (m_remote_platform_sp) {
- return GetCachedExecutable(resolved_module_spec, exe_module_sp,
- module_search_paths_ptr);
+ return GetCachedExecutable(resolved_module_spec, exe_module_sp);
}
- return Platform::ResolveExecutable(resolved_module_spec, exe_module_sp,
- module_search_paths_ptr);
+ return Platform::ResolveExecutable(resolved_module_spec, exe_module_sp);
}
Status RemoteAwarePlatform::RunShellCommand(
diff --git a/lldb/source/Target/SyntheticFrameProvider.cpp b/lldb/source/Target/SyntheticFrameProvider.cpp
new file mode 100644
index 000000000000..241ce82c39be
--- /dev/null
+++ b/lldb/source/Target/SyntheticFrameProvider.cpp
@@ -0,0 +1,100 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Target/SyntheticFrameProvider.h"
+#include "lldb/Core/PluginManager.h"
+#include "lldb/Target/Thread.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/Status.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+SyntheticFrameProvider::SyntheticFrameProvider(StackFrameListSP input_frames)
+ : m_input_frames(std::move(input_frames)) {}
+
+SyntheticFrameProvider::~SyntheticFrameProvider() = default;
+
+void SyntheticFrameProviderDescriptor::Dump(Stream *s) const {
+ if (!s)
+ return;
+
+ s->Printf(" Name: %s\n", GetName().str().c_str());
+
+ // Show thread filter information.
+ if (thread_specs.empty()) {
+ s->PutCString(" Thread Filter: (applies to all threads)\n");
+ } else {
+ s->Printf(" Thread Filter: %zu specification(s)\n", thread_specs.size());
+ for (size_t i = 0; i < thread_specs.size(); ++i) {
+ const ThreadSpec &spec = thread_specs[i];
+ s->Printf(" [%zu] ", i);
+ spec.GetDescription(s, lldb::eDescriptionLevelVerbose);
+ s->PutChar('\n');
+ }
+ }
+}
+
+llvm::Expected<SyntheticFrameProviderSP> SyntheticFrameProvider::CreateInstance(
+ StackFrameListSP input_frames,
+ const SyntheticFrameProviderDescriptor &descriptor) {
+ if (!input_frames)
+ return llvm::createStringError(
+ "cannot create synthetic frame provider: invalid input frames");
+
+ // Iterate through all registered ScriptedFrameProvider plugins.
+ ScriptedFrameProviderCreateInstance create_callback = nullptr;
+ for (uint32_t idx = 0;
+ (create_callback =
+ PluginManager::GetScriptedFrameProviderCreateCallbackAtIndex(
+ idx)) != nullptr;
+ ++idx) {
+ auto provider_or_err = create_callback(input_frames, descriptor);
+ if (!provider_or_err) {
+ LLDB_LOG_ERROR(GetLog(LLDBLog::Target), provider_or_err.takeError(),
+ "Failed to create synthetic frame provider: {0}");
+ continue;
+ }
+
+ if (auto frame_provider_up = std::move(*provider_or_err))
+ return std::move(frame_provider_up);
+ }
+
+ return llvm::createStringError(
+ "cannot create synthetic frame provider: no suitable plugin found");
+}
+
+llvm::Expected<SyntheticFrameProviderSP> SyntheticFrameProvider::CreateInstance(
+ StackFrameListSP input_frames, llvm::StringRef plugin_name,
+ const std::vector<ThreadSpec> &thread_specs) {
+ if (!input_frames)
+ return llvm::createStringError(
+ "cannot create synthetic frame provider: invalid input frames");
+
+ // Look up the specific C++ plugin by name.
+ SyntheticFrameProviderCreateInstance create_callback =
+ PluginManager::GetSyntheticFrameProviderCreateCallbackForPluginName(
+ plugin_name);
+
+ if (!create_callback)
+ return llvm::createStringError(
+ "cannot create synthetic frame provider: C++ plugin '%s' not found",
+ plugin_name.str().c_str());
+
+ auto provider_or_err = create_callback(input_frames, thread_specs);
+ if (!provider_or_err)
+ return provider_or_err.takeError();
+
+ if (auto frame_provider_sp = std::move(*provider_or_err))
+ return std::move(frame_provider_sp);
+
+ return llvm::createStringError(
+ "cannot create synthetic frame provider: C++ plugin '%s' returned null",
+ plugin_name.str().c_str());
+}
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index e53fc7a1e1bd..3b51e17d1c4e 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -1779,9 +1779,9 @@ bool Target::SetArchitecture(const ArchSpec &arch_spec, bool set_platform,
arch_spec.GetArchitectureName(),
arch_spec.GetTriple().getTriple().c_str());
ModuleSpec module_spec(executable_sp->GetFileSpec(), other);
- FileSpecList search_paths = GetExecutableSearchPaths();
+ module_spec.SetTarget(shared_from_this());
Status error = ModuleList::GetSharedModule(module_spec, executable_sp,
- &search_paths, nullptr, nullptr);
+ nullptr, nullptr);
if (!error.Fail() && executable_sp) {
SetExecutableModule(executable_sp, eLoadDependentsYes);
@@ -2350,6 +2350,7 @@ ModuleSP Target::GetOrCreateModule(const ModuleSpec &orig_module_spec,
// Apply any remappings specified in target.object-map:
ModuleSpec module_spec(orig_module_spec);
+ module_spec.SetTarget(shared_from_this());
PathMappingList &obj_mapping = GetObjectPathMap();
if (std::optional<FileSpec> remapped_obj_file =
obj_mapping.RemapPath(orig_module_spec.GetFileSpec().GetPath(),
@@ -2408,9 +2409,9 @@ ModuleSP Target::GetOrCreateModule(const ModuleSpec &orig_module_spec,
transformed_spec.GetFileSpec().SetDirectory(transformed_dir);
transformed_spec.GetFileSpec().SetFilename(
module_spec.GetFileSpec().GetFilename());
+ transformed_spec.SetTarget(shared_from_this());
error = ModuleList::GetSharedModule(transformed_spec, module_sp,
- &search_paths, &old_modules,
- &did_create_module);
+ &old_modules, &did_create_module);
}
}
}
@@ -2426,9 +2427,8 @@ ModuleSP Target::GetOrCreateModule(const ModuleSpec &orig_module_spec,
// cache.
if (module_spec.GetUUID().IsValid()) {
// We have a UUID, it is OK to check the global module list...
- error =
- ModuleList::GetSharedModule(module_spec, module_sp, &search_paths,
- &old_modules, &did_create_module);
+ error = ModuleList::GetSharedModule(module_spec, module_sp,
+ &old_modules, &did_create_module);
}
if (!module_sp) {
@@ -2436,8 +2436,8 @@ ModuleSP Target::GetOrCreateModule(const ModuleSpec &orig_module_spec,
// module in the shared module cache.
if (m_platform_sp) {
error = m_platform_sp->GetSharedModule(
- module_spec, m_process_sp.get(), module_sp, &search_paths,
- &old_modules, &did_create_module);
+ module_spec, m_process_sp.get(), module_sp, &old_modules,
+ &did_create_module);
} else {
error = Status::FromErrorString("no platform is currently set");
}
@@ -3207,6 +3207,11 @@ bool Target::RunStopHooks(bool at_initial_stop) {
bool should_stop = false;
bool requested_continue = false;
+ // A stop hook might get deleted while running stop hooks.
+ // We have to decide what that means. We will follow the rule that deleting
+ // a stop hook while processing these stop hooks will delete it for FUTURE
+ // stops but not this stop. Fortunately, copying the m_stop_hooks to the
+ // active_hooks list before iterating over the hooks has this effect.
for (auto cur_hook_sp : active_hooks) {
bool any_thread_matched = false;
for (auto exc_ctx : exc_ctx_with_reasons) {
diff --git a/lldb/source/Target/TargetList.cpp b/lldb/source/Target/TargetList.cpp
index 188c2508a71e..2e03bc1e38ea 100644
--- a/lldb/source/Target/TargetList.cpp
+++ b/lldb/source/Target/TargetList.cpp
@@ -304,13 +304,9 @@ Status TargetList::CreateTargetInternal(Debugger &debugger,
ModuleSP exe_module_sp;
if (platform_sp) {
- FileSpecList executable_search_paths(
- Target::GetDefaultExecutableSearchPaths());
ModuleSpec module_spec(file, arch);
- error = platform_sp->ResolveExecutable(module_spec, exe_module_sp,
- executable_search_paths.GetSize()
- ? &executable_search_paths
- : nullptr);
+ module_spec.SetTarget(target_sp);
+ error = platform_sp->ResolveExecutable(module_spec, exe_module_sp);
}
if (error.Success() && exe_module_sp) {
diff --git a/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py b/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py
index 954cac159243..8e91781b87a3 100644
--- a/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py
+++ b/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py
@@ -48,6 +48,39 @@ class TestStopHooks(TestBase):
"Got the right error",
)
+ def test_self_deleting(self):
+ """Test that we can handle a stop hook that deletes itself"""
+ self.script_setup()
+ # Run to the first breakpoint before setting the stop hook
+ # so we don't have to figure out where it showed up in the new
+ # target.
+ (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(
+ self, "Stop here first", self.main_source_file
+ )
+
+ # Now add our stop hook and register it:
+ result = lldb.SBCommandReturnObject()
+ command = "target stop-hook add -P stop_hook.self_deleting_stop"
+ self.interp.HandleCommand(command, result)
+ self.assertCommandReturn(result, f"Added my stop hook: {result.GetError()}")
+
+ result_str = result.GetOutput()
+ p = re.compile("Stop hook #([0-9]+) added.")
+ m = p.match(result_str)
+ current_stop_hook_id = m.group(1)
+ command = "command script add -o -f stop_hook.handle_stop_hook_id handle_id"
+ self.interp.HandleCommand(command, result)
+ self.assertCommandReturn(result, "Added my command")
+
+ command = f"handle_id {current_stop_hook_id}"
+ self.interp.HandleCommand(command, result)
+ self.assertCommandReturn(result, "Registered my stop ID")
+
+ # Now step the process and make sure the stop hook was deleted.
+ thread.StepOver()
+ self.interp.HandleCommand("target stop-hook list", result)
+ self.assertEqual(result.GetOutput().rstrip(), "No stop hooks.", "Deleted hook")
+
def test_stop_hooks_scripted(self):
"""Test that a scripted stop hook works with no specifiers"""
self.stop_hooks_scripted(5, "-I false")
diff --git a/lldb/test/API/commands/target/stop-hooks/stop_hook.py b/lldb/test/API/commands/target/stop-hooks/stop_hook.py
index cb7a4337c40d..a41190baeadf 100644
--- a/lldb/test/API/commands/target/stop-hooks/stop_hook.py
+++ b/lldb/test/API/commands/target/stop-hooks/stop_hook.py
@@ -48,3 +48,28 @@ class bad_handle_stop:
class no_handle_stop:
def __init__(self, target, extra_args, dict):
print("I am okay")
+
+
+class self_deleting_stop:
+ def __init__(self, target, extra_args, dict):
+ self.target = target
+
+ def handle_stop(self, exe_ctx, stream):
+ interp = exe_ctx.target.debugger.GetCommandInterpreter()
+ result = lldb.SBCommandReturnObject()
+ interp.HandleCommand("handle_id", result)
+ id_str = result.GetOutput().rstrip()
+
+ command = f"target stop-hook delete {id_str}"
+ interp.HandleCommand(command, result)
+
+
+stop_hook_id = 0
+
+
+def handle_stop_hook_id(debugger, command, exe_ctx, result, extra_args):
+ global stop_hook_id
+ if command == "":
+ result.AppendMessage(str(stop_hook_id))
+ else:
+ stop_hook_id = int(command)
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/map/TestDataFormatterStdMap.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/map/TestDataFormatterStdMap.py
index 07d6c963eb05..ca2d2d6b4954 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/map/TestDataFormatterStdMap.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/map/TestDataFormatterStdMap.py
@@ -9,6 +9,8 @@ from lldbsuite.test import lldbutil
class StdMapDataFormatterTestCase(TestBase):
+ TEST_WITH_PDB_DEBUG_INFO = True
+
def setUp(self):
TestBase.setUp(self)
ns = "ndk" if lldbplatformutil.target_is_android() else ""
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multimap/TestDataFormatterGenericMultiMap.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multimap/TestDataFormatterGenericMultiMap.py
index 7ac79714db88..4b0854b180e0 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multimap/TestDataFormatterGenericMultiMap.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multimap/TestDataFormatterGenericMultiMap.py
@@ -11,6 +11,8 @@ from lldbsuite.test import lldbutil
class GenericMultiMapDataFormatterTestCase(TestBase):
+ TEST_WITH_PDB_DEBUG_INFO = True
+
def setUp(self):
TestBase.setUp(self)
self.namespace = "std"
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multiset/TestDataFormatterGenericMultiSet.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multiset/TestDataFormatterGenericMultiSet.py
index 7e922fccdf7d..e846e072777f 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multiset/TestDataFormatterGenericMultiSet.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multiset/TestDataFormatterGenericMultiSet.py
@@ -10,6 +10,8 @@ from lldbsuite.test import lldbutil
class GenericMultiSetDataFormatterTestCase(TestBase):
+ TEST_WITH_PDB_DEBUG_INFO = True
+
def setUp(self):
TestBase.setUp(self)
self.namespace = "std"
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/set/TestDataFormatterGenericSet.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/set/TestDataFormatterGenericSet.py
index 1ac5e323e23e..355f0c6edba1 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/set/TestDataFormatterGenericSet.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/set/TestDataFormatterGenericSet.py
@@ -10,6 +10,8 @@ from lldbsuite.test import lldbutil
class GenericSetDataFormatterTestCase(TestBase):
+ TEST_WITH_PDB_DEBUG_INFO = True
+
def setUp(self):
TestBase.setUp(self)
self.namespace = "std"
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/string/TestDataFormatterStdString.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/string/TestDataFormatterStdString.py
index 6a27b5d2f078..00047e419de3 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/string/TestDataFormatterStdString.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/string/TestDataFormatterStdString.py
@@ -11,6 +11,8 @@ from lldbsuite.test import lldbutil
class StdStringDataFormatterTestCase(TestBase):
+ TEST_WITH_PDB_DEBUG_INFO = True
+
def setUp(self):
# Call super's setUp().
TestBase.setUp(self)
@@ -18,6 +20,17 @@ class StdStringDataFormatterTestCase(TestBase):
self.main_spec = lldb.SBFileSpec("main.cpp")
self.namespace = "std"
+ def _makeStringName(self, typedef: str, char_type: str, allocator=None):
+ if allocator is None:
+ allocator = self.namespace + "::allocator"
+
+ if self.getDebugInfo() == "pdb":
+ return f"{self.namespace}::basic_string<{char_type}, std::char_traits<{char_type}>, {allocator}<{char_type}>>"
+
+ if typedef.startswith("::"):
+ return self.namespace + typedef
+ return typedef
+
def do_test(self):
"""Test that that file and class static variables display correctly."""
(target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(
@@ -36,10 +49,17 @@ class StdStringDataFormatterTestCase(TestBase):
# Execute the cleanup function during test case tear down.
self.addTearDownHook(cleanup)
- ns = self.namespace
+ string_name = self._makeStringName("::string", "char")
+ wstring_name = self._makeStringName("::wstring", "wchar_t")
+ custom_string_name = self._makeStringName(
+ "CustomString", "char", allocator="CustomAlloc"
+ )
+ custom_wstring_name = self._makeStringName(
+ "CustomWString", "wchar_t", allocator="CustomAlloc"
+ )
# Check 'S' pre-assignment.
- self.expect("frame variable S", substrs=['(%s::wstring) S = L"!!!!"' % ns])
+ self.expect("frame variable S", substrs=[f'({wstring_name}) S = L"!!!!"'])
thread.StepOver()
@@ -54,34 +74,31 @@ class StdStringDataFormatterTestCase(TestBase):
)
self.expect_expr(
- "s", result_type=ns + "::wstring", result_summary='L"hello world! מזל טוב!"'
+ "s", result_type=wstring_name, result_summary='L"hello world! מזל טוב!"'
)
- self.expect_expr(
- "q", result_type=ns + "::string", result_summary='"hello world"'
- )
+ self.expect_expr("q", result_type=string_name, result_summary='"hello world"')
self.expect_expr(
"Q",
- result_type=ns + "::string",
+ result_type=string_name,
result_summary='"quite a long std::strin with lots of info inside it"',
)
self.expect(
"frame variable",
substrs=[
- '(%s::wstring) wempty = L""' % ns,
- '(%s::wstring) s = L"hello world! מזל טוב!"' % ns,
- '(%s::wstring) S = L"!!!!!"' % ns,
+ f'({wstring_name}) wempty = L""',
+ f'({wstring_name}) s = L"hello world! מזל טוב!"',
+ f'({wstring_name}) S = L"!!!!!"',
"(const wchar_t *) mazeltov = 0x",
'L"מזל טוב"',
- '(%s::string) empty = ""' % ns,
- '(%s::string) q = "hello world"' % ns,
- '(%s::string) Q = "quite a long std::strin with lots of info inside it"'
- % ns,
- "(%s::string *) null_str = nullptr" % ns,
- '(CustomString) custom_str = "hello!"',
- '(CustomWString) custom_wstr = L"hello!"',
+ f'({string_name}) empty = ""',
+ f'({string_name}) q = "hello world"',
+ f'({string_name}) Q = "quite a long std::strin with lots of info inside it"',
+ f"({string_name} *) null_str = nullptr",
+ f'({custom_string_name}) custom_str = "hello!"',
+ f'({custom_wstring_name}) custom_wstr = L"hello!"',
],
)
@@ -136,19 +153,26 @@ class StdStringDataFormatterTestCase(TestBase):
self, "Set break point at this line.", self.main_spec
)
- ns = self.namespace
+ u16string_name = self._makeStringName("::u16string", "char16_t")
+ u32string_name = self._makeStringName("::u32string", "char32_t")
+ custom_u16string_name = self._makeStringName(
+ "CustomStringU16", "char16_t", allocator="CustomAlloc"
+ )
+ custom_u32string_name = self._makeStringName(
+ "CustomStringU32", "char32_t", allocator="CustomAlloc"
+ )
self.expect(
"frame variable",
substrs=[
- '(%s::u16string) u16_string = u"ß水氶"' % ns,
- '(%s::u16string) u16_empty = u""' % ns,
- '(%s::u32string) u32_string = U"🍄🍅🍆🍌"' % ns,
- '(%s::u32string) u32_empty = U""' % ns,
- '(CustomStringU16) custom_u16 = u"ß水氶"',
- '(CustomStringU16) custom_u16_empty = u""',
- '(CustomStringU32) custom_u32 = U"🍄🍅🍆🍌"',
- '(CustomStringU32) custom_u32_empty = U""',
+ f'({u16string_name}) u16_string = u"ß水氶"',
+ f'({u16string_name}) u16_empty = u""',
+ f'({u32string_name}) u32_string = U"🍄🍅🍆🍌"',
+ f'({u32string_name}) u32_empty = U""',
+ f'({custom_u16string_name}) custom_u16 = u"ß水氶"',
+ f'({custom_u16string_name}) custom_u16_empty = u""',
+ f'({custom_u32string_name}) custom_u32 = U"🍄🍅🍆🍌"',
+ f'({custom_u32string_name}) custom_u32_empty = U""',
],
)
@@ -271,9 +295,8 @@ class StdStringDataFormatterTestCase(TestBase):
self.expect(
"frame variable",
substrs=[
- '(%s::string) IHaveEmbeddedZeros = "a\\0b\\0c\\0d"' % ns,
- '(%s::wstring) IHaveEmbeddedZerosToo = L"hello world!\\0てざ ル゜䋨ミ㠧槊 きゅへ狦穤襩 じゃ馩リョ 䤦監"'
- % ns,
+ f'({self._makeStringName("::string", "char")}) IHaveEmbeddedZeros = "a\\0b\\0c\\0d"',
+ f'({self._makeStringName("::wstring", "wchar_t")}) IHaveEmbeddedZerosToo = L"hello world!\\0てざ ル゜䋨ミ㠧槊 きゅへ狦穤襩 じゃ馩リョ 䤦監"',
],
)
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/string_view/TestDataFormatterStdStringView.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/string_view/TestDataFormatterStdStringView.py
index 181141886c5a..5c915b6d9f58 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/string_view/TestDataFormatterStdStringView.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/string_view/TestDataFormatterStdStringView.py
@@ -11,6 +11,8 @@ from lldbsuite.test import lldbutil
class StdStringViewDataFormatterTestCase(TestBase):
+ TEST_WITH_PDB_DEBUG_INFO = True
+
def setUp(self):
# Call super's setUp().
TestBase.setUp(self)
@@ -20,6 +22,12 @@ class StdStringViewDataFormatterTestCase(TestBase):
"main.cpp", "// Break here to look at bad string view."
)
+ def _makeStringName(self, typedef: str, char_type: str):
+ if self.getDebugInfo() == "pdb":
+ return f"std::basic_string_view<{char_type}, std::char_traits<{char_type}>>"
+
+ return typedef
+
def do_test(self):
"""Test that that file and class static variables display correctly."""
self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET)
@@ -51,39 +59,47 @@ class StdStringViewDataFormatterTestCase(TestBase):
# Execute the cleanup function during test case tear down.
self.addTearDownHook(cleanup)
- self.expect_var_path("wempty", type="std::wstring_view", summary='L""')
+ string_view_name = self._makeStringName("std::string_view", "char")
+ wstring_view_name = self._makeStringName("std::wstring_view", "wchar_t")
+ u16string_view_name = self._makeStringName("std::u16string_view", "char16_t")
+ u32string_view_name = self._makeStringName("std::u32string_view", "char32_t")
+ string_name = (
+ "std::basic_string<char, std::char_traits<char>, std::allocator<char>>"
+ if self.getDebugInfo() == "pdb"
+ else "std::string"
+ )
+
+ self.expect_var_path("wempty", type=wstring_view_name, summary='L""')
self.expect_var_path(
- "s", type="std::wstring_view", summary='L"hello world! מזל טוב!"'
+ "s", type=wstring_view_name, summary='L"hello world! מזל טוב!"'
)
- self.expect_var_path("S", type="std::wstring_view", summary='L"!!!!"')
- self.expect_var_path("empty", type="std::string_view", summary='""')
- self.expect_var_path("q_source", type="std::string", summary='"hello world"')
- self.expect_var_path("q", type="std::string_view", summary='"hello world"')
+ self.expect_var_path("S", type=wstring_view_name, summary='L"!!!!"')
+ self.expect_var_path("empty", type=string_view_name, summary='""')
+ self.expect_var_path("q_source", type=string_name, summary='"hello world"')
+ self.expect_var_path("q", type=string_view_name, summary='"hello world"')
self.expect_var_path(
"Q",
- type="std::string_view",
+ type=string_view_name,
summary='"quite a long std::strin with lots of info inside it"',
)
self.expect_var_path(
- "IHaveEmbeddedZeros", type="std::string_view", summary='"a\\0b\\0c\\0d"'
+ "IHaveEmbeddedZeros", type=string_view_name, summary='"a\\0b\\0c\\0d"'
)
self.expect_var_path(
"IHaveEmbeddedZerosToo",
- type="std::wstring_view",
+ type=wstring_view_name,
summary='L"hello world!\\0てざ ル゜䋨ミ㠧槊 きゅへ狦穤襩 じゃ馩リョ 䤦監"',
)
- self.expect_var_path("u16_string", type="std::u16string_view", summary='u"ß水氶"')
- self.expect_var_path("u16_empty", type="std::u16string_view", summary='u""')
- self.expect_var_path(
- "u32_string", type="std::u32string_view", summary='U"🍄🍅🍆🍌"'
- )
- self.expect_var_path("u32_empty", type="std::u32string_view", summary='U""')
+ self.expect_var_path("u16_string", type=u16string_view_name, summary='u"ß水氶"')
+ self.expect_var_path("u16_empty", type=u16string_view_name, summary='u""')
+ self.expect_var_path("u32_string", type=u32string_view_name, summary='U"🍄🍅🍆🍌"')
+ self.expect_var_path("u32_empty", type=u32string_view_name, summary='U""')
# GetSummary returns None so can't be checked by expect_var_path, so we
# use the str representation instead
null_obj = self.frame().GetValueForVariablePath("null_str")
self.assertEqual(null_obj.GetSummary(), "Summary Unavailable")
- self.assertEqual(str(null_obj), "(std::string_view *) null_str = nullptr")
+ self.assertEqual(str(null_obj), f"({string_view_name} *) null_str = nullptr")
self.runCmd("n")
@@ -108,37 +124,35 @@ class StdStringViewDataFormatterTestCase(TestBase):
self.expect_expr(
"s",
- result_type="std::wstring_view",
+ result_type=wstring_view_name,
result_summary='L"hello world! מזל טוב!"',
)
- self.expect_var_path("wempty", type="std::wstring_view", summary='L""')
+ self.expect_var_path("wempty", type=wstring_view_name, summary='L""')
self.expect_var_path(
- "s", type="std::wstring_view", summary='L"hello world! מזל טוב!"'
+ "s", type=wstring_view_name, summary='L"hello world! מזל טוב!"'
)
- self.expect_var_path("S", type="std::wstring_view", summary='L"!!!!"')
- self.expect_var_path("empty", type="std::string_view", summary='""')
- self.expect_var_path("q_source", type="std::string", summary='"Hello world"')
- self.expect_var_path("q", type="std::string_view", summary='"Hello world"')
+ self.expect_var_path("S", type=wstring_view_name, summary='L"!!!!"')
+ self.expect_var_path("empty", type=string_view_name, summary='""')
+ self.expect_var_path("q_source", type=string_name, summary='"Hello world"')
+ self.expect_var_path("q", type=string_view_name, summary='"Hello world"')
self.expect_var_path(
"Q",
- type="std::string_view",
+ type=string_view_name,
summary='"quite a long std::strin with lots of info inside it"',
)
self.expect_var_path(
- "IHaveEmbeddedZeros", type="std::string_view", summary='"a\\0b\\0c\\0d"'
+ "IHaveEmbeddedZeros", type=string_view_name, summary='"a\\0b\\0c\\0d"'
)
self.expect_var_path(
"IHaveEmbeddedZerosToo",
- type="std::wstring_view",
+ type=wstring_view_name,
summary='L"hello world!\\0てざ ル゜䋨ミ㠧槊 きゅへ狦穤襩 じゃ馩リョ 䤦監"',
)
- self.expect_var_path("u16_string", type="std::u16string_view", summary='u"ß水氶"')
- self.expect_var_path("u16_empty", type="std::u16string_view", summary='u""')
- self.expect_var_path(
- "u32_string", type="std::u32string_view", summary='U"🍄🍅🍆🍌"'
- )
- self.expect_var_path("u32_empty", type="std::u32string_view", summary='U""')
+ self.expect_var_path("u16_string", type=u16string_view_name, summary='u"ß水氶"')
+ self.expect_var_path("u16_empty", type=u16string_view_name, summary='u""')
+ self.expect_var_path("u32_string", type=u32string_view_name, summary='U"🍄🍅🍆🍌"')
+ self.expect_var_path("u32_empty", type=u32string_view_name, summary='U""')
self.runCmd("cont")
self.expect(
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/tuple/TestDataFormatterStdTuple.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/tuple/TestDataFormatterStdTuple.py
index b23d549fe4c1..898438729ff8 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/tuple/TestDataFormatterStdTuple.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/tuple/TestDataFormatterStdTuple.py
@@ -9,6 +9,8 @@ from lldbsuite.test import lldbutil
class TestDataFormatterStdTuple(TestBase):
+ TEST_WITH_PDB_DEBUG_INFO = True
+
def setUp(self):
TestBase.setUp(self)
self.line = line_number("main.cpp", "// break here")
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/u8string/TestDataFormatterStdU8String.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/u8string/TestDataFormatterStdU8String.py
index b983ee175d38..dda97945f9b2 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/u8string/TestDataFormatterStdU8String.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/u8string/TestDataFormatterStdU8String.py
@@ -11,18 +11,26 @@ from lldbsuite.test import lldbutil
class StdU8StringDataFormatterTestCase(TestBase):
+ TEST_WITH_PDB_DEBUG_INFO = True
+
def do_test(self):
lldbutil.run_to_source_breakpoint(
self, "Set break point at this line.", lldb.SBFileSpec("main.cpp")
)
+ string_name = (
+ "std::basic_string<char8_t, std::char_traits<char8_t>, std::allocator<char8_t>>"
+ if self.getDebugInfo() == "pdb"
+ else "std::u8string"
+ )
+
self.expect(
"frame variable",
substrs=[
- '(std::u8string) u8_string_small = u8"🍄"',
- '(std::u8string) u8_string = u8"❤️👍📄📁😃🧑‍🌾"',
- '(std::u8string) u8_empty = u8""',
- '(std::u8string) u8_text = u8"ABCd"',
+ f'({string_name}) u8_string_small = u8"🍄"',
+ f'({string_name}) u8_string = u8"❤️👍📄📁😃🧑‍🌾"',
+ f'({string_name}) u8_empty = u8""',
+ f'({string_name}) u8_text = u8"ABCd"',
],
)
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/u8string_view/TestDataFormatterStdU8StringView.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/u8string_view/TestDataFormatterStdU8StringView.py
index 1e35a0f6bb04..6cf72d18a864 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/u8string_view/TestDataFormatterStdU8StringView.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/u8string_view/TestDataFormatterStdU8StringView.py
@@ -11,18 +11,26 @@ from lldbsuite.test import lldbutil
class StdU8StringViewDataFormatterTestCase(TestBase):
+ TEST_WITH_PDB_DEBUG_INFO = True
+
def do_test(self):
lldbutil.run_to_source_breakpoint(
self, "Set break point at this line.", lldb.SBFileSpec("main.cpp")
)
+ string_view_name = (
+ "std::basic_string_view<char8_t, std::char_traits<char8_t>>"
+ if self.getDebugInfo() == "pdb"
+ else "std::u8string_view"
+ )
+
self.expect(
"frame variable",
substrs=[
- '(std::u8string_view) u8_string_small = u8"🍄"',
- '(std::u8string_view) u8_string = u8"❤️👍📄📁😃🧑‍🌾"',
- '(std::u8string_view) u8_empty = u8""',
- '(std::u8string_view) u8_text = u8"ABCd"',
+ f'({string_view_name}) u8_string_small = u8"🍄"',
+ f'({string_view_name}) u8_string = u8"❤️👍📄📁😃🧑‍🌾"',
+ f'({string_view_name}) u8_empty = u8""',
+ f'({string_view_name}) u8_text = u8"ABCd"',
],
)
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/vbool/TestDataFormatterStdVBool.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/vbool/TestDataFormatterStdVBool.py
index dd142d2be193..f74092ca3a0b 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/vbool/TestDataFormatterStdVBool.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/vbool/TestDataFormatterStdVBool.py
@@ -9,6 +9,8 @@ from lldbsuite.test import lldbutil
class StdVBoolDataFormatterTestCase(TestBase):
+ TEST_WITH_PDB_DEBUG_INFO = True
+
def setUp(self):
# Call super's setUp().
TestBase.setUp(self)
diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestConnectRemoteDetach.py b/lldb/test/API/functionalities/gdb_remote_client/TestConnectRemoteDetach.py
new file mode 100644
index 000000000000..4380455efc45
--- /dev/null
+++ b/lldb/test/API/functionalities/gdb_remote_client/TestConnectRemoteDetach.py
@@ -0,0 +1,67 @@
+"""
+Test that ConnectRemote sets ShouldDetach flag correctly.
+
+When connecting to a remote process that stops after connection,
+the process should be marked for detach (not kill) on destruction.
+"""
+
+import lldb
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test.decorators import *
+from lldbsuite.test.gdbclientutils import *
+from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase
+from lldbsuite.test import lldbutil
+
+
+class TestConnectRemoteDetach(GDBRemoteTestBase):
+ """Test that ConnectRemote properly sets ShouldDetach flag."""
+
+ class StoppedResponder(MockGDBServerResponder):
+ """A responder that returns a stopped process."""
+
+ def qfThreadInfo(self):
+ return "m1"
+
+ def qsThreadInfo(self):
+ return "l"
+
+ def qC(self):
+ return "QC1"
+
+ def haltReason(self):
+ # Return that we're stopped
+ return "T05thread:1;"
+
+ def cont(self):
+ # Stay stopped
+ return "T05thread:1;"
+
+ def D(self):
+ # Detach packet: this is what we want to verify gets called.
+ return "OK"
+
+ def k(self):
+ # Kill packet: this is what we want to verify doesn't get called.
+ raise RuntimeError("should not receive k(ill) packet")
+
+ def test_connect_remote_sets_detach(self):
+ """Test that ConnectRemote to a stopped process sets ShouldDetach."""
+ self.server.responder = self.StoppedResponder()
+
+ target = self.createTarget("a.yaml")
+ process = self.connect(target)
+
+ # Wait for the process to be in stopped state after connecting.
+ # When ConnectRemote connects to a remote process that is stopped,
+ # it should call SetShouldDetach(true) before CompleteAttach().
+ lldbutil.expect_state_changes(
+ self, self.dbg.GetListener(), process, [lldb.eStateStopped]
+ )
+
+ # Now destroy the process. Because ShouldDetach was set to true
+ # during ConnectRemote, this should send a 'D' (detach) packet
+ # rather than a 'k' (kill) packet when the process is destroyed.
+ process.Destroy()
+
+ # Verify that the (D)etach packet was sent.
+ self.assertPacketLogReceived(["D"])
diff --git a/lldb/test/API/python_api/unified_section_list/Makefile b/lldb/test/API/python_api/unified_section_list/Makefile
new file mode 100644
index 000000000000..431e716ab8f6
--- /dev/null
+++ b/lldb/test/API/python_api/unified_section_list/Makefile
@@ -0,0 +1,5 @@
+CXX_SOURCES := main.cpp
+
+SPLIT_DEBUG_SYMBOLS := YES
+
+include Makefile.rules
diff --git a/lldb/test/API/python_api/unified_section_list/TestModuleUnifiedSectionList.py b/lldb/test/API/python_api/unified_section_list/TestModuleUnifiedSectionList.py
new file mode 100644
index 000000000000..93b23d0ba81c
--- /dev/null
+++ b/lldb/test/API/python_api/unified_section_list/TestModuleUnifiedSectionList.py
@@ -0,0 +1,285 @@
+"""
+Test Unified Section List merging.
+"""
+
+import os
+import shutil
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+from lldbsuite.test.lldbutil import symbol_type_to_str
+
+
+class ModuleUnifiedSectionList(TestBase):
+ @skipUnlessPlatform(["linux", "freebsd", "netbsd"])
+ def test_unified_section_list(self):
+ self.build()
+ exe = self.getBuildArtifact("a.out")
+ debug_info = self.getBuildArtifact("a.out.debug")
+ new_dir = os.path.join(os.path.dirname(debug_info), "new_dir")
+ os.mkdir(new_dir)
+ renamed_debug_info = os.path.join(new_dir, "renamed.debug")
+ os.rename(debug_info, renamed_debug_info)
+ target = self.dbg.CreateTarget(exe)
+ self.assertTrue(target, VALID_TARGET)
+ self.assertGreater(target.GetNumModules(), 0)
+
+ main_exe_module = target.GetModuleAtIndex(0)
+ eh_frame = main_exe_module.FindSection(".eh_frame")
+ self.assertTrue(eh_frame.IsValid())
+ self.assertGreater(eh_frame.size, 0)
+
+ # Should be stripped in main executable.
+ debug_info_section = main_exe_module.FindSection(".debug_info")
+ self.assertFalse(debug_info_section.IsValid())
+
+ ci = self.dbg.GetCommandInterpreter()
+ res = lldb.SBCommandReturnObject()
+ ci.HandleCommand(f"target symbols add {renamed_debug_info}", res)
+ self.assertTrue(res.Succeeded())
+
+ # Should be stripped in .debuginfo but be present in main executable.
+ main_exe_module = target.GetModuleAtIndex(0)
+ eh_frame = main_exe_module.FindSection(".eh_frame")
+ self.assertTrue(eh_frame.IsValid())
+ self.assertGreater(eh_frame.size, 0)
+
+ # Should be unified and both sections should have contents.
+ debug_info_section = main_exe_module.FindSection(".debug_info")
+ self.assertTrue(debug_info_section.IsValid())
+ self.assertGreater(debug_info_section.file_size, 0)
+
+ def test_unified_section_list_overwrite_larger_section(self):
+ """
+ Test the merging of an ELF file with another ELF File where all the new sections are bigger, validating we
+ overwrite .comment from SHT_NOBITS to the new SHT_PROGBITS section and the smaller .text with the larger
+ .text
+ """
+ exe = self.getBuildArtifact("a.out")
+ self.yaml2obj("main.yaml", exe)
+
+ target = self.dbg.CreateTarget(exe)
+ self.assertTrue(target, VALID_TARGET)
+ main_exe_module = target.GetModuleAtIndex(0)
+
+ # First we verify out .text section is the expected BEC0FFEE
+ text_before_merge = main_exe_module.FindSection(".text")
+ self.assertTrue(text_before_merge.IsValid())
+ error = lldb.SBError()
+ section_content = text_before_merge.data.ReadRawData(
+ error, 0, text_before_merge.data.size
+ )
+ self.assertTrue(error.Success())
+ self.assertEqual(section_content, bytes.fromhex("BEC0FFEE"))
+
+ # .comment in main.yaml should be SHT_NOBITS, and size 0
+ comment_before_merge = main_exe_module.FindSection(".comment")
+ self.assertTrue(comment_before_merge.IsValid())
+ self.assertEqual(comment_before_merge.data.size, 0)
+
+ # yamlize the main.largertext.yaml and force symbol loading
+ debug_info = self.getBuildArtifact("a.out.debug")
+ self.yaml2obj("main.largertext.yaml", debug_info)
+
+ ci = self.dbg.GetCommandInterpreter()
+ res = lldb.SBCommandReturnObject()
+ ci.HandleCommand(f"target symbols add {debug_info}", res)
+ self.assertTrue(res.Succeeded())
+
+ # verify we took the larger .text section
+ main_exe_module_after_merge = target.GetModuleAtIndex(0)
+ text_after_merge = main_exe_module_after_merge.FindSection(".text")
+ self.assertTrue(text_after_merge.IsValid())
+ self.assertGreater(text_after_merge.data.size, text_before_merge.data.size)
+ section_content_after_merge = text_after_merge.data.ReadRawData(
+ error, 0, text_after_merge.data.size
+ )
+ self.assertTrue(error.Success())
+ self.assertEqual(section_content_after_merge, bytes.fromhex("BEC0FFEEEEFF0CEB"))
+
+ # in main.largertext.yaml comment is not SHT_NOBITS, and so we should see
+ # the size > 0 and equal to BAADF00D
+ comment_after_merge = main_exe_module_after_merge.FindSection(".comment")
+ self.assertTrue(comment_after_merge.IsValid())
+ comment_content_after_merge = comment_after_merge.data.ReadRawData(
+ error, 0, comment_after_merge.data.size
+ )
+
+ self.assertTrue(error.Success())
+ self.assertEqual(comment_content_after_merge, bytes.fromhex("BAADF00D"))
+
+ def test_unified_section_list_overwrite_smaller_section(self):
+ """
+ Test the merging of an ELF file with another ELF File where all the existing sections are bigger, validating we don't
+ overwrite with the SHT_NOBITS for .comment or the smaller .text section.
+ """
+ exe = self.getBuildArtifact("a.out")
+ self.yaml2obj("main.largertext.yaml", exe)
+
+ target = self.dbg.CreateTarget(exe)
+ self.assertTrue(target, VALID_TARGET)
+ main_exe_module = target.GetModuleAtIndex(0)
+
+ # Same as above test but inverse, verify our larger .text section
+ # is the expected BEC0FFEE palindrome
+ text_before_merge = main_exe_module.FindSection(".text")
+ self.assertTrue(text_before_merge.IsValid())
+ error = lldb.SBError()
+ section_content = text_before_merge.data.ReadRawData(
+ error, 0, text_before_merge.data.size
+ )
+ self.assertTrue(error.Success())
+ self.assertEqual(section_content, bytes.fromhex("BEC0FFEEEEFF0CEB"))
+
+ # Comment is SHT_PROGBITS on the larger yaml and should remain
+ # the same after merge.
+ comment_before_merge = main_exe_module.FindSection(".comment")
+ self.assertTrue(comment_before_merge.IsValid())
+ comment_content = comment_before_merge.data.ReadRawData(
+ error, 0, comment_before_merge.data.size
+ )
+
+ self.assertTrue(error.Success())
+ self.assertEqual(comment_content, bytes.fromhex("BAADF00D"))
+
+ debug_info = self.getBuildArtifact("a.out.debug")
+ self.yaml2obj("main.yaml", debug_info)
+
+ ci = self.dbg.GetCommandInterpreter()
+ res = lldb.SBCommandReturnObject()
+ ci.HandleCommand(f"target symbols add {debug_info}", res)
+ self.assertTrue(res.Succeeded())
+
+ # Verify we didn't replace the sections after merge.s
+ main_exe_module_after_merge = target.GetModuleAtIndex(0)
+ text_after_merge = main_exe_module_after_merge.FindSection(".text")
+ self.assertTrue(text_after_merge.IsValid())
+ self.assertEqual(text_after_merge.data.size, text_before_merge.data.size)
+ section_content_after_merge = text_after_merge.data.ReadRawData(
+ error, 0, text_after_merge.data.size
+ )
+ self.assertTrue(error.Success())
+ self.assertEqual(section_content_after_merge, bytes.fromhex("BEC0FFEEEEFF0CEB"))
+
+ comment_after_merge = main_exe_module_after_merge.FindSection(".comment")
+ self.assertTrue(comment_after_merge.IsValid())
+ comment_content_after_merge = comment_after_merge.data.ReadRawData(
+ error, 0, comment_after_merge.data.size
+ )
+
+ self.assertTrue(error.Success())
+ self.assertEqual(comment_content_after_merge, bytes.fromhex("BAADF00D"))
+
+ def test_unified_section_list_overwrite_mixed_merge(self):
+ """
+ Test the merging of an ELF file with another ELF File where the lhs has a larger .comment section
+ and the RHS has a larger .text section.
+ """
+ exe = self.getBuildArtifact("a.out")
+ self.yaml2obj("main.largercomment.yaml", exe)
+
+ target = self.dbg.CreateTarget(exe)
+ self.assertTrue(target, VALID_TARGET)
+ main_exe_module = target.GetModuleAtIndex(0)
+
+ # Verify we have the expected smaller BEC0FFEE
+ text_before_merge = main_exe_module.FindSection(".text")
+ self.assertTrue(text_before_merge.IsValid())
+ error = lldb.SBError()
+ section_content = text_before_merge.data.ReadRawData(
+ error, 0, text_before_merge.data.size
+ )
+ self.assertTrue(error.Success())
+ self.assertEqual(section_content, bytes.fromhex("BEC0FFEE"))
+
+ # Verify we have the larger palindromic comment
+ comment_before_merge = main_exe_module.FindSection(".comment")
+ self.assertTrue(comment_before_merge.IsValid())
+ comment_content = comment_before_merge.data.ReadRawData(
+ error, 0, comment_before_merge.data.size
+ )
+
+ self.assertTrue(error.Success())
+ self.assertEqual(comment_content, bytes.fromhex("BAADF00DF00DBAAD"))
+
+ debug_info = self.getBuildArtifact("a.out.debug")
+ self.yaml2obj("main.largertext.yaml", debug_info)
+
+ ci = self.dbg.GetCommandInterpreter()
+ res = lldb.SBCommandReturnObject()
+ ci.HandleCommand(f"target symbols add {debug_info}", res)
+ self.assertTrue(res.Succeeded())
+
+ # Verify we replaced .text
+ main_exe_module_after_merge = target.GetModuleAtIndex(0)
+ text_after_merge = main_exe_module_after_merge.FindSection(".text")
+ self.assertTrue(text_after_merge.IsValid())
+ section_content_after_merge = text_after_merge.data.ReadRawData(
+ error, 0, text_after_merge.data.size
+ )
+ self.assertTrue(error.Success())
+ self.assertEqual(section_content_after_merge, bytes.fromhex("BEC0FFEEEEFF0CEB"))
+
+ # Verify .comment is still the same.
+ comment_after_merge = main_exe_module_after_merge.FindSection(".comment")
+ self.assertTrue(comment_after_merge.IsValid())
+ comment_content_after_merge = comment_after_merge.data.ReadRawData(
+ error, 0, comment_after_merge.data.size
+ )
+
+ self.assertTrue(error.Success())
+ self.assertEqual(comment_content_after_merge, bytes.fromhex("BAADF00DF00DBAAD"))
+
+ def test_unified_section_list_overwrite_equal_size(self):
+ """
+ Test the merging of an ELF file with an ELF file with sections of the same size with different values
+ .text
+ """
+ exe = self.getBuildArtifact("a.out")
+ self.yaml2obj("main.yaml", exe)
+
+ target = self.dbg.CreateTarget(exe)
+ self.assertTrue(target, VALID_TARGET)
+ main_exe_module = target.GetModuleAtIndex(0)
+
+ # First we verify out .text section is the expected BEC0FFEE
+ text_before_merge = main_exe_module.FindSection(".text")
+ self.assertTrue(text_before_merge.IsValid())
+ error = lldb.SBError()
+ section_content = text_before_merge.data.ReadRawData(
+ error, 0, text_before_merge.data.size
+ )
+ self.assertTrue(error.Success())
+ self.assertEqual(section_content, bytes.fromhex("BEC0FFEE"))
+
+ # .comment in main.yaml should be SHT_NOBITS, and size 0
+ comment_before_merge = main_exe_module.FindSection(".comment")
+ self.assertTrue(comment_before_merge.IsValid())
+ self.assertEqual(comment_before_merge.data.size, 0)
+
+ # yamlize the main with the .text reversed from BEC0FFEE
+ # to EEFF0CEB. We should still keep our .text with BEC0FFEE
+ debug_info = self.getBuildArtifact("a.out.debug")
+ self.yaml2obj("main.reversedtext.yaml", debug_info)
+
+ ci = self.dbg.GetCommandInterpreter()
+ res = lldb.SBCommandReturnObject()
+ ci.HandleCommand(f"target symbols add {debug_info}", res)
+ self.assertTrue(res.Succeeded())
+
+ # verify .text did not change
+ main_exe_module_after_merge = target.GetModuleAtIndex(0)
+ text_after_merge = main_exe_module_after_merge.FindSection(".text")
+ self.assertTrue(text_after_merge.IsValid())
+ section_content_after_merge = text_after_merge.data.ReadRawData(
+ error, 0, text_after_merge.data.size
+ )
+ self.assertTrue(error.Success())
+ self.assertEqual(section_content_after_merge, bytes.fromhex("BEC0FFEE"))
+
+ # verify comment did not change
+ comment_afer_merge = main_exe_module_after_merge.FindSection(".comment")
+ self.assertTrue(comment_afer_merge.IsValid())
+ self.assertEqual(comment_afer_merge.data.size, 0)
diff --git a/lldb/test/API/python_api/unified_section_list/main.cpp b/lldb/test/API/python_api/unified_section_list/main.cpp
new file mode 100644
index 000000000000..45fd52eeeb30
--- /dev/null
+++ b/lldb/test/API/python_api/unified_section_list/main.cpp
@@ -0,0 +1,3 @@
+#include <stdio.h>
+
+int main() { printf("Hello World\n"); }
diff --git a/lldb/test/API/python_api/unified_section_list/main.largercomment.yaml b/lldb/test/API/python_api/unified_section_list/main.largercomment.yaml
new file mode 100644
index 000000000000..f7860063e151
--- /dev/null
+++ b/lldb/test/API/python_api/unified_section_list/main.largercomment.yaml
@@ -0,0 +1,46 @@
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+ Entry: 0x1040
+ProgramHeaders:
+ - Type: PT_PHDR
+ Flags: [ PF_R ]
+ VAddr: 0x40
+ Align: 0x8
+ Offset: 0x40
+ - Type: PT_LOAD
+ Flags: [ PF_R ]
+ FirstSec: .text
+ LastSec: .fini
+ Align: 0x1000
+ Offset: 0x0
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1040
+ AddressAlign: 0x10
+ Content: BEC0FFEE
+ - Name: .fini
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1140
+ AddressAlign: 0x4
+ Content: DEADBEEF
+ - Name: .comment
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC ]
+ Address: 0x3140
+ AddressAlign: 0x4
+ Content: BAADF00DF00DBAAD
+Symbols:
+ - Name: main
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ Value: 0x1130
+ Size: 0xF
+...
diff --git a/lldb/test/API/python_api/unified_section_list/main.largertext.yaml b/lldb/test/API/python_api/unified_section_list/main.largertext.yaml
new file mode 100644
index 000000000000..6450e6769db6
--- /dev/null
+++ b/lldb/test/API/python_api/unified_section_list/main.largertext.yaml
@@ -0,0 +1,46 @@
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+ Entry: 0x1040
+ProgramHeaders:
+ - Type: PT_PHDR
+ Flags: [ PF_R ]
+ VAddr: 0x40
+ Align: 0x8
+ Offset: 0x40
+ - Type: PT_LOAD
+ Flags: [ PF_R ]
+ FirstSec: .text
+ LastSec: .fini
+ Align: 0x1000
+ Offset: 0x0
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1040
+ AddressAlign: 0x10
+ Content: BEC0FFEEEEFF0CEB
+ - Name: .fini
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1140
+ AddressAlign: 0x4
+ Content: DEADBEEF
+ - Name: .comment
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC ]
+ Address: 0x3140
+ AddressAlign: 0x4
+ Content: BAADF00D
+Symbols:
+ - Name: main
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ Value: 0x1130
+ Size: 0xF
+...
diff --git a/lldb/test/API/python_api/unified_section_list/main.reversedtext.yaml b/lldb/test/API/python_api/unified_section_list/main.reversedtext.yaml
new file mode 100644
index 000000000000..57206666046a
--- /dev/null
+++ b/lldb/test/API/python_api/unified_section_list/main.reversedtext.yaml
@@ -0,0 +1,45 @@
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+ Entry: 0x1040
+ProgramHeaders:
+ - Type: PT_PHDR
+ Flags: [ PF_R ]
+ VAddr: 0x40
+ Align: 0x8
+ Offset: 0x40
+ - Type: PT_LOAD
+ Flags: [ PF_R ]
+ FirstSec: .text
+ LastSec: .fini
+ Align: 0x1000
+ Offset: 0x0
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1040
+ AddressAlign: 0x10
+ Content: BEC0FFEE
+ - Name: .fini
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1140
+ AddressAlign: 0x4
+ Content: DEADBEEF
+ - Name: .comment
+ Type: SHT_NOBITS
+ Flags: [ SHF_ALLOC ]
+ Address: 0x3140
+ AddressAlign: 0x4
+Symbols:
+ - Name: main
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ Value: 0x1130
+ Size: 0xF
+...
diff --git a/lldb/test/API/python_api/unified_section_list/main.yaml b/lldb/test/API/python_api/unified_section_list/main.yaml
new file mode 100644
index 000000000000..57206666046a
--- /dev/null
+++ b/lldb/test/API/python_api/unified_section_list/main.yaml
@@ -0,0 +1,45 @@
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+ Entry: 0x1040
+ProgramHeaders:
+ - Type: PT_PHDR
+ Flags: [ PF_R ]
+ VAddr: 0x40
+ Align: 0x8
+ Offset: 0x40
+ - Type: PT_LOAD
+ Flags: [ PF_R ]
+ FirstSec: .text
+ LastSec: .fini
+ Align: 0x1000
+ Offset: 0x0
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1040
+ AddressAlign: 0x10
+ Content: BEC0FFEE
+ - Name: .fini
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1140
+ AddressAlign: 0x4
+ Content: DEADBEEF
+ - Name: .comment
+ Type: SHT_NOBITS
+ Flags: [ SHF_ALLOC ]
+ Address: 0x3140
+ AddressAlign: 0x4
+Symbols:
+ - Name: main
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ Value: 0x1130
+ Size: 0xF
+...
diff --git a/lldb/test/API/terminal/TestEditline.py b/lldb/test/API/terminal/TestEditline.py
index 38f4f34ed740..4696b1e1b112 100644
--- a/lldb/test/API/terminal/TestEditline.py
+++ b/lldb/test/API/terminal/TestEditline.py
@@ -94,7 +94,7 @@ class EditlineTest(PExpectTest):
# after the prompt.
self.child.send("foo")
# Check that there are no escape codes.
- self.child.expect(re.escape("\n(lldb) foo"))
+ self.child.expect(re.escape("\n\r\x1b[K(lldb) foo"))
@skipIfAsan
@skipIfEditlineSupportMissing
diff --git a/lldb/test/Shell/Commands/Inputs/sigchld.c b/lldb/test/Shell/Commands/Inputs/sigchld.c
index ba8c5ef45365..0121e70c1bdd 100644
--- a/lldb/test/Shell/Commands/Inputs/sigchld.c
+++ b/lldb/test/Shell/Commands/Inputs/sigchld.c
@@ -1,3 +1,7 @@
+#if defined(__linux__)
+#define _XOPEN_SOURCE 500 /* for CLD_EXITED */
+#endif
+
#include <assert.h>
#include <signal.h>
#include <stdio.h>
diff --git a/lldb/test/Shell/Commands/command-list-reach-beginning-of-file.test b/lldb/test/Shell/Commands/command-list-reach-beginning-of-file.test
index fa4a93e5904a..9987efedd802 100644
--- a/lldb/test/Shell/Commands/command-list-reach-beginning-of-file.test
+++ b/lldb/test/Shell/Commands/command-list-reach-beginning-of-file.test
@@ -4,7 +4,7 @@
# RUN: %lldb %t.out -b -s %s 2>&1 | FileCheck %s
list
-# CHECK: note: No source available
+# CHECK: note: No source available
b main
# CHECK: Breakpoint 1:
@@ -18,7 +18,7 @@ list
list -
# CHECK: int main()
-list -10
+list -13
# CHECK: #include <assert.h>
list -
diff --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm
index 3afaaa2f64c0..8df3f29a7e82 100644
--- a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm
+++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm
@@ -2853,12 +2853,6 @@ pid_t MachProcess::AttachForDebug(
if (err.Success()) {
m_flags |= eMachProcessFlagsAttached;
- // Sleep a bit to let the exception get received and set our process
- // status
- // to stopped.
- ::usleep(250000);
- DNBLog("[LaunchAttach] (%d) Done napping after ptrace(PT_ATTACHEXC)'ing",
- getpid());
DNBLogThreadedIf(LOG_PROCESS, "successfully attached to pid %d", pid);
return m_pid;
} else {
diff --git a/lldb/tools/lldb-dap/Handler/ExceptionInfoRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/ExceptionInfoRequestHandler.cpp
index c1c2adb32a51..ddf55e6fb382 100644
--- a/lldb/tools/lldb-dap/Handler/ExceptionInfoRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/ExceptionInfoRequestHandler.cpp
@@ -7,168 +7,75 @@
//===----------------------------------------------------------------------===//
#include "DAP.h"
-#include "EventHelper.h"
-#include "JSONUtils.h"
+#include "DAPError.h"
+#include "Protocol/ProtocolRequests.h"
+#include "Protocol/ProtocolTypes.h"
#include "RequestHandler.h"
#include "lldb/API/SBStream.h"
+using namespace lldb_dap::protocol;
+
namespace lldb_dap {
-// "ExceptionInfoRequest": {
-// "allOf": [ { "$ref": "#/definitions/Request" }, {
-// "type": "object",
-// "description": "Retrieves the details of the exception that
-// caused this event to be raised. Clients should only call this request if
-// the corresponding capability `supportsExceptionInfoRequest` is true.",
-// "properties": {
-// "command": {
-// "type": "string",
-// "enum": [ "exceptionInfo" ]
-// },
-// "arguments": {
-// "$ref": "#/definitions/ExceptionInfoArguments"
-// }
-// },
-// "required": [ "command", "arguments" ]
-// }]
-// },
-// "ExceptionInfoArguments": {
-// "type": "object",
-// "description": "Arguments for `exceptionInfo` request.",
-// "properties": {
-// "threadId": {
-// "type": "integer",
-// "description": "Thread for which exception information should be
-// retrieved."
-// }
-// },
-// "required": [ "threadId" ]
-// },
-// "ExceptionInfoResponse": {
-// "allOf": [ { "$ref": "#/definitions/Response" }, {
-// "type": "object",
-// "description": "Response to `exceptionInfo` request.",
-// "properties": {
-// "body": {
-// "type": "object",
-// "properties": {
-// "exceptionId": {
-// "type": "string",
-// "description": "ID of the exception that was thrown."
-// },
-// "description": {
-// "type": "string",
-// "description": "Descriptive text for the exception."
-// },
-// "breakMode": {
-// "$ref": "#/definitions/ExceptionBreakMode",
-// "description": "Mode that caused the exception notification to
-// be raised."
-// },
-// "details": {
-// "$ref": "#/definitions/ExceptionDetails",
-// "description": "Detailed information about the exception."
-// }
-// },
-// "required": [ "exceptionId", "breakMode" ]
-// }
-// },
-// "required": [ "body" ]
-// }]
-// }
-// "ExceptionDetails": {
-// "type": "object",
-// "description": "Detailed information about an exception that has
-// occurred.", "properties": {
-// "message": {
-// "type": "string",
-// "description": "Message contained in the exception."
-// },
-// "typeName": {
-// "type": "string",
-// "description": "Short type name of the exception object."
-// },
-// "fullTypeName": {
-// "type": "string",
-// "description": "Fully-qualified type name of the exception object."
-// },
-// "evaluateName": {
-// "type": "string",
-// "description": "An expression that can be evaluated in the current
-// scope to obtain the exception object."
-// },
-// "stackTrace": {
-// "type": "string",
-// "description": "Stack trace at the time the exception was thrown."
-// },
-// "innerException": {
-// "type": "array",
-// "items": {
-// "$ref": "#/definitions/ExceptionDetails"
-// },
-// "description": "Details of the exception contained by this exception,
-// if any."
-// }
-// }
-// },
-void ExceptionInfoRequestHandler::operator()(
- const llvm::json::Object &request) const {
- llvm::json::Object response;
- FillResponse(request, response);
- const auto *arguments = request.getObject("arguments");
- llvm::json::Object body;
- lldb::SBThread thread = dap.GetLLDBThread(*arguments);
- if (thread.IsValid()) {
- auto stopReason = thread.GetStopReason();
- if (stopReason == lldb::eStopReasonSignal)
- body.try_emplace("exceptionId", "signal");
- else if (stopReason == lldb::eStopReasonBreakpoint) {
- ExceptionBreakpoint *exc_bp = dap.GetExceptionBPFromStopReason(thread);
- if (exc_bp) {
- EmplaceSafeString(body, "exceptionId", exc_bp->GetFilter());
- EmplaceSafeString(body, "description", exc_bp->GetLabel());
- } else {
- body.try_emplace("exceptionId", "exception");
- }
+/// Retrieves the details of the exception that caused this event to be raised.
+///
+/// Clients should only call this request if the corresponding capability
+/// `supportsExceptionInfoRequest` is true.
+llvm::Expected<ExceptionInfoResponseBody>
+ExceptionInfoRequestHandler::Run(const ExceptionInfoArguments &args) const {
+
+ lldb::SBThread thread = dap.GetLLDBThread(args.threadId);
+ if (!thread.IsValid())
+ return llvm::make_error<DAPError>(
+ llvm::formatv("Invalid thread id: {}", args.threadId).str());
+
+ ExceptionInfoResponseBody response;
+ response.breakMode = eExceptionBreakModeAlways;
+ const lldb::StopReason stop_reason = thread.GetStopReason();
+ switch (stop_reason) {
+ case lldb::eStopReasonSignal:
+ response.exceptionId = "signal";
+ break;
+ case lldb::eStopReasonBreakpoint: {
+ const ExceptionBreakpoint *exc_bp =
+ dap.GetExceptionBPFromStopReason(thread);
+ if (exc_bp) {
+ response.exceptionId = exc_bp->GetFilter();
+ response.description = exc_bp->GetLabel();
} else {
- body.try_emplace("exceptionId", "exception");
+ response.exceptionId = "exception";
}
- if (!ObjectContainsKey(body, "description")) {
- char description[1024];
- if (thread.GetStopDescription(description, sizeof(description))) {
- EmplaceSafeString(body, "description", description);
- }
+ } break;
+ default:
+ response.exceptionId = "exception";
+ }
+
+ lldb::SBStream stream;
+ if (response.description.empty()) {
+ if (thread.GetStopDescription(stream)) {
+ response.description = {stream.GetData(), stream.GetSize()};
}
- body.try_emplace("breakMode", "always");
- auto exception = thread.GetCurrentException();
- if (exception.IsValid()) {
- llvm::json::Object details;
- lldb::SBStream stream;
- if (exception.GetDescription(stream)) {
- EmplaceSafeString(details, "message", stream.GetData());
- }
+ }
- auto exceptionBacktrace = thread.GetCurrentExceptionBacktrace();
- if (exceptionBacktrace.IsValid()) {
- lldb::SBStream stream;
- exceptionBacktrace.GetDescription(stream);
- for (uint32_t i = 0; i < exceptionBacktrace.GetNumFrames(); i++) {
- lldb::SBFrame frame = exceptionBacktrace.GetFrameAtIndex(i);
- frame.GetDescription(stream);
- }
- EmplaceSafeString(details, "stackTrace", stream.GetData());
- }
+ if (lldb::SBValue exception = thread.GetCurrentException()) {
+ stream.Clear();
+ response.details = ExceptionDetails{};
+ if (exception.GetDescription(stream)) {
+ response.details->message = {stream.GetData(), stream.GetSize()};
+ }
+
+ if (lldb::SBThread exception_backtrace =
+ thread.GetCurrentExceptionBacktrace()) {
+ stream.Clear();
+ exception_backtrace.GetDescription(stream);
- body.try_emplace("details", std::move(details));
+ for (uint32_t idx = 0; idx < exception_backtrace.GetNumFrames(); idx++) {
+ lldb::SBFrame frame = exception_backtrace.GetFrameAtIndex(idx);
+ frame.GetDescription(stream);
+ }
+ response.details->stackTrace = {stream.GetData(), stream.GetSize()};
}
- // auto excInfoCount = thread.GetStopReasonDataCount();
- // for (auto i=0; i<excInfoCount; ++i) {
- // uint64_t exc_data = thread.GetStopReasonDataAtIndex(i);
- // }
- } else {
- response["success"] = llvm::json::Value(false);
}
- response.try_emplace("body", std::move(body));
- dap.SendJSON(llvm::json::Value(std::move(response)));
+ return response;
}
} // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.h b/lldb/tools/lldb-dap/Handler/RequestHandler.h
index 977a24799675..bc22133d9245 100644
--- a/lldb/tools/lldb-dap/Handler/RequestHandler.h
+++ b/lldb/tools/lldb-dap/Handler/RequestHandler.h
@@ -302,14 +302,18 @@ public:
}
};
-class ExceptionInfoRequestHandler : public LegacyRequestHandler {
+class ExceptionInfoRequestHandler final
+ : public RequestHandler<
+ protocol::ExceptionInfoArguments,
+ llvm::Expected<protocol::ExceptionInfoResponseBody>> {
public:
- using LegacyRequestHandler::LegacyRequestHandler;
+ using RequestHandler::RequestHandler;
static llvm::StringLiteral GetCommand() { return "exceptionInfo"; }
FeatureSet GetSupportedFeatures() const override {
return {protocol::eAdapterFeatureExceptionInfoRequest};
}
- void operator()(const llvm::json::Object &request) const override;
+ llvm::Expected<protocol::ExceptionInfoResponseBody>
+ Run(const protocol::ExceptionInfoArguments &args) const override;
};
class InitializeRequestHandler
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp
index b9393356b4e0..44ae79f8b9f4 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp
@@ -625,4 +625,22 @@ llvm::json::Value toJSON(const ModuleSymbolsResponseBody &DGMSR) {
return result;
}
+bool fromJSON(const json::Value &Params, ExceptionInfoArguments &Args,
+ json::Path Path) {
+ json::ObjectMapper O(Params, Path);
+ return O && O.map("threadId", Args.threadId);
+}
+
+json::Value toJSON(const ExceptionInfoResponseBody &ERB) {
+ json::Object result{{"exceptionId", ERB.exceptionId},
+ {"breakMode", ERB.breakMode}};
+
+ if (!ERB.description.empty())
+ result.insert({"description", ERB.description});
+ if (ERB.details.has_value())
+ result.insert({"details", *ERB.details});
+
+ return result;
+}
+
} // namespace lldb_dap::protocol
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
index a85a68b87014..b894f2b4ed44 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
@@ -1039,6 +1039,28 @@ struct ModuleSymbolsResponseBody {
};
llvm::json::Value toJSON(const ModuleSymbolsResponseBody &);
+struct ExceptionInfoArguments {
+ /// Thread for which exception information should be retrieved.
+ lldb::tid_t threadId = LLDB_INVALID_THREAD_ID;
+};
+bool fromJSON(const llvm::json::Value &, ExceptionInfoArguments &,
+ llvm::json::Path);
+
+struct ExceptionInfoResponseBody {
+ /// ID of the exception that was thrown.
+ std::string exceptionId;
+
+ /// Descriptive text for the exception.
+ std::string description;
+
+ /// Mode that caused the exception notification to be raised.
+ ExceptionBreakMode breakMode = eExceptionBreakModeNever;
+
+ /// Detailed information about the exception.
+ std::optional<ExceptionDetails> details;
+};
+llvm::json::Value toJSON(const ExceptionInfoResponseBody &);
+
} // namespace lldb_dap::protocol
#endif
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp
index dc8edaadcd9b..95007013742a 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp
@@ -1136,4 +1136,37 @@ bool fromJSON(const json::Value &Param, Variable &V, json::Path Path) {
Path, /*required=*/false);
}
+json::Value toJSON(const ExceptionBreakMode Mode) {
+ switch (Mode) {
+ case eExceptionBreakModeNever:
+ return "never";
+ case eExceptionBreakModeAlways:
+ return "always";
+ case eExceptionBreakModeUnhandled:
+ return "unhandled";
+ case eExceptionBreakModeUserUnhandled:
+ return "userUnhandled";
+ }
+ llvm_unreachable("unhandled exception breakMode.");
+}
+
+json::Value toJSON(const ExceptionDetails &ED) {
+ json::Object result;
+
+ if (!ED.message.empty())
+ result.insert({"message", ED.message});
+ if (!ED.typeName.empty())
+ result.insert({"typeName", ED.typeName});
+ if (!ED.fullTypeName.empty())
+ result.insert({"fullTypeName", ED.fullTypeName});
+ if (!ED.evaluateName.empty())
+ result.insert({"evaluateName", ED.evaluateName});
+ if (!ED.stackTrace.empty())
+ result.insert({"stackTrace", ED.stackTrace});
+ if (!ED.innerException.empty())
+ result.insert({"innerException", ED.innerException});
+
+ return result;
+}
+
} // namespace lldb_dap::protocol
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h
index 7077df90a85b..6d85c74377bd 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h
@@ -1007,6 +1007,36 @@ struct Variable {
llvm::json::Value toJSON(const Variable &);
bool fromJSON(const llvm::json::Value &, Variable &, llvm::json::Path);
+enum ExceptionBreakMode : unsigned {
+ eExceptionBreakModeNever,
+ eExceptionBreakModeAlways,
+ eExceptionBreakModeUnhandled,
+ eExceptionBreakModeUserUnhandled,
+};
+llvm::json::Value toJSON(ExceptionBreakMode);
+
+struct ExceptionDetails {
+ /// Message contained in the exception.
+ std::string message;
+
+ /// Short type name of the exception object.
+ std::string typeName;
+
+ /// Fully-qualified type name of the exception object.
+ std::string fullTypeName;
+
+ /// An expression that can be evaluated in the current scope to obtain the
+ /// exception object.
+ std::string evaluateName;
+
+ /// Stack trace at the time the exception was thrown.
+ std::string stackTrace;
+
+ /// Details of the exception contained by this exception, if any.
+ std::vector<ExceptionDetails> innerException;
+};
+llvm::json::Value toJSON(const ExceptionDetails &);
+
} // namespace lldb_dap::protocol
#endif
diff --git a/lldb/unittests/Core/CMakeLists.txt b/lldb/unittests/Core/CMakeLists.txt
index 6e609a63ad9b..f0c9a9a9d505 100644
--- a/lldb/unittests/Core/CMakeLists.txt
+++ b/lldb/unittests/Core/CMakeLists.txt
@@ -7,6 +7,7 @@ add_lldb_unittest(LLDBCoreTests
DumpRegisterInfoTest.cpp
FormatEntityTest.cpp
MangledTest.cpp
+ ModuleListTest.cpp
ModuleSpecTest.cpp
PluginManagerTest.cpp
ProgressReportTest.cpp
diff --git a/lldb/unittests/Core/MangledTest.cpp b/lldb/unittests/Core/MangledTest.cpp
index cbc0c5d951b9..706e67801e01 100644
--- a/lldb/unittests/Core/MangledTest.cpp
+++ b/lldb/unittests/Core/MangledTest.cpp
@@ -636,6 +636,16 @@ DemanglingPartsTestCase g_demangling_parts_test_cases[] = {
/*.basename=*/"operator()",
/*.scope=*/"dyld4::Loader::runInitializersBottomUpPlusUpwardLinks(dyld4::RuntimeState&) const::$_0::",
/*.qualifiers=*/" const",
+ },
+ {"_Z4funcILN3foo4EnumE1EEvv",
+ {
+ /*.BasenameRange=*/{5, 9}, /*.TemplateArgumentsRange=*/{9, 23}, /*.ScopeRange=*/{5, 5},
+ /*.ArgumentsRange=*/{23, 25}, /*.QualifiersRange=*/{25, 25}, /*.NameQualifiersRange=*/{0, 0},
+ /*.PrefixRange=*/{0, 0}, /*.SuffixRange=*/{0, 0}
+ },
+ /*.basename=*/"func",
+ /*.scope=*/"",
+ /*.qualifiers=*/"",
}
// clang-format on
};
diff --git a/lldb/unittests/Core/ModuleListTest.cpp b/lldb/unittests/Core/ModuleListTest.cpp
new file mode 100644
index 000000000000..3c70b0a4b21b
--- /dev/null
+++ b/lldb/unittests/Core/ModuleListTest.cpp
@@ -0,0 +1,178 @@
+//===-- ModuleListTest.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Core/ModuleList.h"
+#include "TestingSupport/SubsystemRAII.h"
+#include "TestingSupport/TestUtilities.h"
+#include "lldb/Core/Module.h"
+#include "lldb/Core/ModuleSpec.h"
+#include "lldb/Host/FileSystem.h"
+#include "lldb/Utility/ArchSpec.h"
+#include "lldb/Utility/UUID.h"
+
+#include "Plugins/ObjectFile/ELF/ObjectFileELF.h"
+
+#include "gtest/gtest.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+// Test that when we already have a module in the shared_module_list with a
+// specific UUID, the next call to GetSharedModule with a module_spec with the
+// same UUID should return the existing module instead of creating a new one.
+TEST(ModuleListTest, GetSharedModuleReusesExistingModuleWithSameUUID) {
+ SubsystemRAII<FileSystem, ObjectFileELF> subsystems;
+
+ auto ExpectedFile = TestFile::fromYaml(R"(
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x0000000000000010
+...
+)");
+ ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
+
+ // First, let's verify that calling GetSharedModule twice with the same
+ // module_spec returns the same module pointer
+
+ ModuleSP first_module;
+ bool first_did_create = false;
+ Status error_first =
+ ModuleList::GetSharedModule(ExpectedFile->moduleSpec(), first_module,
+ nullptr, &first_did_create, false);
+
+ // Second call with the same spec
+ ModuleSP second_module;
+ bool second_did_create = false;
+ Status error_second =
+ ModuleList::GetSharedModule(ExpectedFile->moduleSpec(), second_module,
+ nullptr, &second_did_create, false);
+
+ if (error_first.Success() && error_second.Success()) {
+ // If both succeeded, verify they're the same module
+ EXPECT_EQ(first_module.get(), second_module.get())
+ << "GetSharedModule should return the same module for the same spec";
+ EXPECT_TRUE(first_did_create) << "First call should create the module";
+ EXPECT_FALSE(second_did_create)
+ << "Second call should reuse the existing module";
+ }
+}
+
+// Test that UUID-based lookup finds existing modules
+TEST(ModuleListTest, FindSharedModuleByUUID) {
+ SubsystemRAII<FileSystem, ObjectFileELF> subsystems;
+
+ auto ExpectedFile = TestFile::fromYaml(R"(
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x0000000000000010
+...
+)");
+ ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
+
+ // Create and add a module to the shared module list using the moduleSpec()
+ ModuleSP created_module;
+ bool did_create = false;
+ Status error = ModuleList::GetSharedModule(
+ ExpectedFile->moduleSpec(), created_module, nullptr, &did_create, false);
+
+ if (error.Success() && created_module) {
+ // Get the UUID of the created module
+ UUID module_uuid = created_module->GetUUID();
+
+ if (module_uuid.IsValid()) {
+ // Now try to find the module by UUID
+ ModuleSP found_module = ModuleList::FindSharedModule(module_uuid);
+
+ ASSERT_NE(found_module.get(), nullptr)
+ << "FindSharedModule should find the module by UUID";
+ EXPECT_EQ(found_module.get(), created_module.get())
+ << "FindSharedModule should return the same module instance";
+ EXPECT_EQ(found_module->GetUUID(), module_uuid)
+ << "Found module should have the same UUID";
+ }
+ }
+}
+
+// Test that GetSharedModule with UUID finds existing module even with different
+// path
+TEST(ModuleListTest, GetSharedModuleByUUIDIgnoresPath) {
+ SubsystemRAII<FileSystem, ObjectFileELF> subsystems;
+
+ auto ExpectedFile = TestFile::fromYaml(R"(
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x0000000000000010
+...
+)");
+ ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
+
+ // Create and add a module to the shared module list
+ ModuleSP first_module;
+ bool first_did_create = false;
+ Status first_error =
+ ModuleList::GetSharedModule(ExpectedFile->moduleSpec(), first_module,
+ nullptr, &first_did_create, false);
+
+ if (first_error.Success() && first_module) {
+ UUID module_uuid = first_module->GetUUID();
+
+ if (module_uuid.IsValid()) {
+ // Now try to get a module with the same UUID but different path
+ ModuleSpec second_spec;
+ second_spec.GetFileSpec() = FileSpec("/different/path/to/module.so");
+ second_spec.GetArchitecture() = ArchSpec("x86_64-pc-linux");
+ second_spec.GetUUID() = module_uuid;
+
+ ModuleSP second_module;
+ bool second_did_create = false;
+ Status second_error = ModuleList::GetSharedModule(
+ second_spec, second_module, nullptr, &second_did_create, false);
+
+ if (second_error.Success() && second_module) {
+ // If we got a module back, check if it's the same one
+ bool is_same_module = (second_module.get() == first_module.get());
+
+ // Document the behavior: ideally UUID should take precedence
+ // and return the existing module
+ EXPECT_TRUE(is_same_module)
+ << "GetSharedModule with matching UUID should return existing "
+ "module, "
+ << "even with different path (per PR #160199)";
+
+ if (is_same_module) {
+ EXPECT_FALSE(second_did_create)
+ << "Should not create a new module when UUID matches";
+ }
+ }
+ }
+ }
+}
diff --git a/lldb/unittests/DAP/CMakeLists.txt b/lldb/unittests/DAP/CMakeLists.txt
index b1fdef18fddb..a478cf07eedb 100644
--- a/lldb/unittests/DAP/CMakeLists.txt
+++ b/lldb/unittests/DAP/CMakeLists.txt
@@ -8,6 +8,7 @@ add_lldb_unittest(DAPTests
Handler/ContinueTest.cpp
JSONUtilsTest.cpp
LLDBUtilsTest.cpp
+ ProtocolRequestsTest.cpp
ProtocolTypesTest.cpp
ProtocolUtilsTest.cpp
TestBase.cpp
diff --git a/lldb/unittests/DAP/ProtocolRequestsTest.cpp b/lldb/unittests/DAP/ProtocolRequestsTest.cpp
new file mode 100644
index 000000000000..498195dc0932
--- /dev/null
+++ b/lldb/unittests/DAP/ProtocolRequestsTest.cpp
@@ -0,0 +1,69 @@
+//===-- ProtocolRequestsTest.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Protocol/ProtocolRequests.h"
+#include "Protocol/ProtocolTypes.h"
+#include "TestingSupport/TestUtilities.h"
+#include "llvm/Testing/Support/Error.h"
+#include <gtest/gtest.h>
+
+using namespace llvm;
+using namespace lldb_dap::protocol;
+using lldb_private::PrettyPrint;
+using llvm::json::parse;
+
+TEST(ProtocolRequestsTest, ExceptionInfoArguments) {
+ llvm::Expected<ExceptionInfoArguments> expected =
+ parse<ExceptionInfoArguments>(R"({
+ "threadId": 3434
+ })");
+ ASSERT_THAT_EXPECTED(expected, llvm::Succeeded());
+ EXPECT_EQ(expected->threadId, 3434U);
+
+ // Check required keys;
+ EXPECT_THAT_EXPECTED(parse<ExceptionInfoArguments>(R"({})"),
+ FailedWithMessage("missing value at (root).threadId"));
+
+ EXPECT_THAT_EXPECTED(parse<ExceptionInfoArguments>(R"({"id": 10})"),
+ FailedWithMessage("missing value at (root).threadId"));
+}
+
+TEST(ProtocolRequestsTest, ExceptionInfoResponseBody) {
+ ExceptionInfoResponseBody body;
+ body.exceptionId = "signal";
+ body.breakMode = eExceptionBreakModeAlways;
+
+ // Check required keys.
+ Expected<json::Value> expected = parse(
+ R"({
+ "exceptionId": "signal",
+ "breakMode": "always"
+ })");
+
+ ASSERT_THAT_EXPECTED(expected, llvm::Succeeded());
+ EXPECT_EQ(PrettyPrint(*expected), PrettyPrint(body));
+
+ // Check optional keys.
+ body.description = "SIGNAL SIGWINCH";
+ body.breakMode = eExceptionBreakModeNever;
+ body.details = ExceptionDetails{};
+ body.details->message = "some message";
+
+ Expected<json::Value> expected_opt = parse(
+ R"({
+ "exceptionId": "signal",
+ "description": "SIGNAL SIGWINCH",
+ "breakMode": "never",
+ "details": {
+ "message": "some message"
+ }
+ })");
+
+ ASSERT_THAT_EXPECTED(expected_opt, llvm::Succeeded());
+ EXPECT_EQ(PrettyPrint(*expected_opt), PrettyPrint(body));
+}
diff --git a/lldb/unittests/DAP/ProtocolTypesTest.cpp b/lldb/unittests/DAP/ProtocolTypesTest.cpp
index 8170abdd25bc..6a4620a3f1e5 100644
--- a/lldb/unittests/DAP/ProtocolTypesTest.cpp
+++ b/lldb/unittests/DAP/ProtocolTypesTest.cpp
@@ -1129,3 +1129,50 @@ TEST(ProtocolTypesTest, DataBreakpointInfoArguments) {
EXPECT_THAT_EXPECTED(parse<DataBreakpointInfoArguments>(R"({"name":"data"})"),
llvm::Succeeded());
}
+
+TEST(ProtocolTypesTest, ExceptionBreakMode) {
+ const std::vector<std::pair<ExceptionBreakMode, llvm::StringRef>> test_cases =
+ {{ExceptionBreakMode::eExceptionBreakModeAlways, "always"},
+ {ExceptionBreakMode::eExceptionBreakModeNever, "never"},
+ {ExceptionBreakMode::eExceptionBreakModeUnhandled, "unhandled"},
+ {ExceptionBreakMode::eExceptionBreakModeUserUnhandled, "userUnhandled"}};
+
+ for (const auto [value, expected] : test_cases) {
+ json::Value const serialized = toJSON(value);
+ ASSERT_EQ(serialized.kind(), llvm::json::Value::Kind::String);
+ EXPECT_EQ(serialized.getAsString(), expected);
+ }
+}
+
+TEST(ProtocolTypesTest, ExceptionDetails) {
+ ExceptionDetails details;
+
+ // Check required keys.
+ Expected<json::Value> expected = parse(R"({})");
+ ASSERT_THAT_EXPECTED(expected, llvm::Succeeded());
+ EXPECT_EQ(pp(*expected), pp(details));
+
+ // Check optional keys.
+ details.message = "SIGABRT exception";
+ details.typeName = "signal";
+ details.fullTypeName = "SIGABRT";
+ details.evaluateName = "process handle SIGABRT";
+ details.stackTrace = "some stacktrace";
+ ExceptionDetails inner_details;
+ inner_details.message = "inner message";
+ details.innerException = {std::move(inner_details)};
+
+ Expected<json::Value> expected_opt = parse(R"({
+ "message": "SIGABRT exception",
+ "typeName": "signal",
+ "fullTypeName": "SIGABRT",
+ "evaluateName": "process handle SIGABRT",
+ "stackTrace": "some stacktrace",
+ "innerException": [{
+ "message": "inner message"
+ }]
+ })");
+
+ ASSERT_THAT_EXPECTED(expected_opt, llvm::Succeeded());
+ EXPECT_EQ(pp(*expected_opt), pp(details));
+}
diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp
index 3d0e2d8a6248..a63b740d9472 100644
--- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp
+++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp
@@ -161,6 +161,11 @@ void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBExecutionContext(
return nullptr;
}
+void *
+lldb_private::python::LLDBSWIGPython_CastPyObjectToSBFrameList(PyObject *data) {
+ return nullptr;
+}
+
lldb::ValueObjectSP
lldb_private::python::SWIGBridge::LLDBSWIGPython_GetValueObjectSPFromSBValue(
void *data) {
@@ -329,6 +334,11 @@ lldb_private::python::SWIGBridge::ToSWIGWrapper(lldb::ProcessSP) {
return python::PythonObject();
}
+python::PythonObject
+lldb_private::python::SWIGBridge::ToSWIGWrapper(lldb::StackFrameListSP) {
+ return python::PythonObject();
+}
+
python::PythonObject lldb_private::python::SWIGBridge::ToSWIGWrapper(
const lldb_private::StructuredDataImpl &) {
return python::PythonObject();
diff --git a/lldb/unittests/Target/LocateModuleCallbackTest.cpp b/lldb/unittests/Target/LocateModuleCallbackTest.cpp
index 6ffa41b16b4f..d727cea9f6ea 100644
--- a/lldb/unittests/Target/LocateModuleCallbackTest.cpp
+++ b/lldb/unittests/Target/LocateModuleCallbackTest.cpp
@@ -362,7 +362,7 @@ TEST_F(LocateModuleCallbackTest, GetOrCreateModuleCallbackFailureNoCache) {
});
m_module_sp = m_target_sp->GetOrCreateModule(m_module_spec, /*notify=*/false);
- ASSERT_EQ(callback_call_count, 2);
+ ASSERT_EQ(callback_call_count, 3);
ASSERT_FALSE(m_module_sp);
}
@@ -383,7 +383,7 @@ TEST_F(LocateModuleCallbackTest, GetOrCreateModuleCallbackFailureCached) {
});
m_module_sp = m_target_sp->GetOrCreateModule(m_module_spec, /*notify=*/false);
- ASSERT_EQ(callback_call_count, 2);
+ ASSERT_EQ(callback_call_count, 3);
CheckModule(m_module_sp);
ASSERT_EQ(m_module_sp->GetFileSpec(), uuid_view);
ASSERT_FALSE(m_module_sp->GetSymbolFileFileSpec());
@@ -409,7 +409,7 @@ TEST_F(LocateModuleCallbackTest, GetOrCreateModuleCallbackNoFiles) {
});
m_module_sp = m_target_sp->GetOrCreateModule(m_module_spec, /*notify=*/false);
- ASSERT_EQ(callback_call_count, 2);
+ ASSERT_EQ(callback_call_count, 3);
CheckModule(m_module_sp);
ASSERT_EQ(m_module_sp->GetFileSpec(), uuid_view);
ASSERT_FALSE(m_module_sp->GetSymbolFileFileSpec());
@@ -435,7 +435,7 @@ TEST_F(LocateModuleCallbackTest, GetOrCreateModuleCallbackNonExistentModule) {
});
m_module_sp = m_target_sp->GetOrCreateModule(m_module_spec, /*notify=*/false);
- ASSERT_EQ(callback_call_count, 2);
+ ASSERT_EQ(callback_call_count, 3);
CheckModule(m_module_sp);
ASSERT_EQ(m_module_sp->GetFileSpec(), uuid_view);
ASSERT_FALSE(m_module_sp->GetSymbolFileFileSpec());
@@ -464,7 +464,7 @@ TEST_F(LocateModuleCallbackTest, GetOrCreateModuleCallbackNonExistentSymbol) {
});
m_module_sp = m_target_sp->GetOrCreateModule(m_module_spec, /*notify=*/false);
- ASSERT_EQ(callback_call_count, 2);
+ ASSERT_EQ(callback_call_count, 3);
CheckModule(m_module_sp);
ASSERT_EQ(m_module_sp->GetFileSpec(), uuid_view);
ASSERT_TRUE(m_module_sp->GetSymbolFileFileSpec().GetPath().empty());
@@ -622,7 +622,7 @@ TEST_F(LocateModuleCallbackTest,
});
m_module_sp = m_target_sp->GetOrCreateModule(m_module_spec, /*notify=*/false);
- ASSERT_EQ(callback_call_count, 2);
+ ASSERT_EQ(callback_call_count, 3);
CheckModule(m_module_sp);
ASSERT_EQ(m_module_sp->GetFileSpec(), uuid_view);
ASSERT_EQ(m_module_sp->GetSymbolFileFileSpec(),
@@ -650,7 +650,7 @@ TEST_F(LocateModuleCallbackTest,
});
m_module_sp = m_target_sp->GetOrCreateModule(m_module_spec, /*notify=*/false);
- ASSERT_EQ(callback_call_count, 2);
+ ASSERT_EQ(callback_call_count, 3);
CheckModule(m_module_sp);
ASSERT_EQ(m_module_sp->GetFileSpec(), uuid_view);
ASSERT_EQ(m_module_sp->GetSymbolFileFileSpec(),
@@ -682,7 +682,7 @@ TEST_F(LocateModuleCallbackTest,
});
m_module_sp = m_target_sp->GetOrCreateModule(m_module_spec, /*notify=*/false);
- ASSERT_EQ(callback_call_count, 2);
+ ASSERT_EQ(callback_call_count, 3);
CheckModule(m_module_sp);
ASSERT_EQ(m_module_sp->GetFileSpec(), uuid_view);
ASSERT_EQ(m_module_sp->GetSymbolFileFileSpec(),
@@ -709,7 +709,7 @@ TEST_F(LocateModuleCallbackTest,
});
m_module_sp = m_target_sp->GetOrCreateModule(m_module_spec, /*notify=*/false);
- ASSERT_EQ(callback_call_count, 2);
+ ASSERT_EQ(callback_call_count, 3);
ASSERT_FALSE(m_module_sp);
}
@@ -731,7 +731,7 @@ TEST_F(LocateModuleCallbackTest,
});
m_module_sp = m_target_sp->GetOrCreateModule(m_module_spec, /*notify=*/false);
- ASSERT_EQ(callback_call_count, 2);
+ ASSERT_EQ(callback_call_count, 3);
ASSERT_FALSE(m_module_sp);
}
diff --git a/lldb/unittests/Target/RemoteAwarePlatformTest.cpp b/lldb/unittests/Target/RemoteAwarePlatformTest.cpp
index 3278674ed0a0..cfcec693b874 100644
--- a/lldb/unittests/Target/RemoteAwarePlatformTest.cpp
+++ b/lldb/unittests/Target/RemoteAwarePlatformTest.cpp
@@ -32,15 +32,12 @@ public:
ProcessSP(ProcessAttachInfo &, Debugger &, Target *, Status &));
MOCK_METHOD0(CalculateTrapHandlerSymbolNames, void());
- MOCK_METHOD2(ResolveExecutable,
- std::pair<bool, ModuleSP>(const ModuleSpec &,
- const FileSpecList *));
- Status
- ResolveExecutable(const ModuleSpec &module_spec,
- lldb::ModuleSP &exe_module_sp,
- const FileSpecList *module_search_paths_ptr) /*override*/
+ MOCK_METHOD1(ResolveExecutable,
+ std::pair<bool, ModuleSP>(const ModuleSpec &));
+ Status ResolveExecutable(const ModuleSpec &module_spec,
+ lldb::ModuleSP &exe_module_sp) /*override*/
{ // NOLINT(modernize-use-override)
- auto pair = ResolveExecutable(module_spec, module_search_paths_ptr);
+ auto pair = ResolveExecutable(module_spec);
exe_module_sp = pair.second;
return pair.first ? Status() : Status::FromErrorString("error");
}
@@ -80,14 +77,14 @@ TEST_F(RemoteAwarePlatformTest, TestResolveExecutabelOnClientByPlatform) {
static const ArchSpec process_host_arch;
EXPECT_CALL(platform, GetSupportedArchitectures(process_host_arch))
.WillRepeatedly(Return(std::vector<ArchSpec>()));
- EXPECT_CALL(platform, ResolveExecutable(_, _))
+ EXPECT_CALL(platform, ResolveExecutable(_))
.WillRepeatedly(Return(std::make_pair(true, expected_executable)));
platform.SetRemotePlatform(std::make_shared<TargetPlatformTester>(false));
ModuleSP resolved_sp;
lldb_private::Status status =
- platform.ResolveExecutable(executable_spec, resolved_sp, nullptr);
+ platform.ResolveExecutable(executable_spec, resolved_sp);
ASSERT_TRUE(status.Success());
EXPECT_EQ(expected_executable.get(), resolved_sp.get());
diff --git a/lldb/unittests/TestingSupport/TestUtilities.cpp b/lldb/unittests/TestingSupport/TestUtilities.cpp
index b53822e38324..d164c227afb9 100644
--- a/lldb/unittests/TestingSupport/TestUtilities.cpp
+++ b/lldb/unittests/TestingSupport/TestUtilities.cpp
@@ -20,6 +20,11 @@ using namespace lldb_private;
extern const char *TestMainArgv0;
std::once_flag TestUtilities::g_debugger_initialize_flag;
+
+std::string lldb_private::PrettyPrint(const llvm::json::Value &value) {
+ return llvm::formatv("{0:2}", value).str();
+}
+
std::string lldb_private::GetInputFilePath(const llvm::Twine &name) {
llvm::SmallString<128> result = llvm::sys::path::parent_path(TestMainArgv0);
llvm::sys::fs::make_absolute(result);
diff --git a/lldb/unittests/TestingSupport/TestUtilities.h b/lldb/unittests/TestingSupport/TestUtilities.h
index cc93a68a6a43..f05d176618fa 100644
--- a/lldb/unittests/TestingSupport/TestUtilities.h
+++ b/lldb/unittests/TestingSupport/TestUtilities.h
@@ -30,6 +30,10 @@
}
namespace lldb_private {
+
+/// Returns a pretty printed json string of a `llvm::json::Value`.
+std::string PrettyPrint(const llvm::json::Value &E);
+
std::string GetInputFilePath(const llvm::Twine &name);
class TestUtilities {
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 30b22a4a6d60..30b24b2f6f03 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1180,6 +1180,51 @@ is conservatively correct for OpenCL.
other operations within the same address space.
======================= ===================================================
+Target Types
+------------
+
+The AMDGPU backend implements some target extension types.
+
+.. _amdgpu-types-named-barriers:
+
+Named Barriers
+~~~~~~~~~~~~~~
+
+Named barriers are fixed function hardware barrier objects that are available
+in gfx12.5+ in addition to the traditional default barriers.
+
+In LLVM IR, named barriers are represented by global variables of type
+``target("amdgcn.named.barrier", 0)`` in the LDS address space. Named barrier
+global variables do not occupy actual LDS memory, but their lifetime and
+allocation scope matches that of global variables in LDS. Programs in LLVM IR
+refer to named barriers using pointers.
+
+The following named barrier types are supported in global variables, defined
+recursively:
+
+* a single, standalone ``target("amdgcn.named.barrier", 0)``
+* an array of supported types
+* a struct containing a single element of supported type
+
+.. code-block:: llvm
+
+ @bar = addrspace(3) global target("amdgcn.named.barrier", 0) undef
+ @foo = addrspace(3) global [2 x target("amdgcn.named.barrier", 0)] undef
+ @baz = addrspace(3) global { target("amdgcn.named.barrier", 0) } undef
+
+ ...
+
+ %foo.i = getelementptr [2 x target("amdgcn.named.barrier", 0)], ptr addrspace(3) @foo, i32 0, i32 %i
+ call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %foo.i, i32 0)
+
+Named barrier types may not be used in ``alloca``.
+
+Named barriers do not have an underlying byte representation.
+It is undefined behavior to use a pointer to any part of a named barrier object
+as the pointer operand of a regular memory access instruction or intrinsic.
+Pointers to named barrier objects are intended to be used with dedicated
+intrinsics. Reading from or writing to such pointers is undefined behavior.
+
LLVM IR Intrinsics
------------------
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index b9507a2d054f..ab085ca0b149 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -20368,6 +20368,77 @@ Arguments:
""""""""""
The argument to this intrinsic must be a vector of floating-point values.
+Vector Partial Reduction Intrinsics
+-----------------------------------
+
+Partial reductions of vectors can be expressed using the intrinsics described in
+this section. Each one reduces the concatenation of the two vector arguments
+down to the number of elements of the result vector type.
+
+Other than the reduction operator (e.g. add, fadd), the way in which the
+concatenated arguments is reduced is entirely unspecified. By their nature these
+intrinsics are not expected to be useful in isolation but can instead be used to
+implement the first phase of an overall reduction operation.
+
+The typical use case is loop vectorization where reductions are split into an
+in-loop phase, where maintaining an unordered vector result is important for
+performance, and an out-of-loop phase is required to calculate the final scalar
+result.
+
+By avoiding the introduction of new ordering constraints, these intrinsics
+enhance the ability to leverage a target's accumulation instructions.
+
+'``llvm.vector.partial.reduce.add.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> %a, <8 x i32> %b)
+ declare <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> %a, <16 x i32> %b)
+ declare <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv8i32(<vscale x 4 x i32> %a, <vscale x 8 x i32> %b)
+ declare <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv16i32(<vscale x 4 x i32> %a, <vscale x 16 x i32> %b)
+
+Arguments:
+""""""""""
+
+The first argument is an integer vector with the same type as the result.
+
+The second argument is a vector with a length that is a known integer multiple
+of the result's type, while maintaining the same element type.
+
+'``llvm.vector.partial.reduce.fadd.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare <4 x f32> @llvm.vector.partial.reduce.fadd.v4f32.v8f32(<4 x f32> %a, <8 x f32> %b)
+ declare <vscale x 4 x f32> @llvm.vector.partial.reduce.fadd.nxv4f32.nxv8f32(<vscale x 4 x f32> %a, <vscale x 8 x f32> %b)
+
+Arguments:
+""""""""""
+
+The first argument is a floating-point vector with the same type as the result.
+
+The second argument is a vector with a length that is a known integer multiple
+of the result's type, while maintaining the same element type.
+
+Semantics:
+""""""""""
+
+As the way in which the arguments to this floating-point intrinsic are reduced
+is unspecified, this intrinsic will assume floating-point reassociation and
+contraction can be leveraged to implement the reduction, which may result in
+variations to the results due to reordering or by lowering to different
+instructions (including combining multiple instructions into a single one).
+
'``llvm.vector.insert``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -20741,50 +20812,6 @@ Note that it has the following implications:
- If ``%cnt`` is non-zero, the return value is non-zero as well.
- If ``%cnt`` is less than or equal to ``%max_lanes``, the return value is equal to ``%cnt``.
-'``llvm.vector.partial.reduce.add.*``' Intrinsic
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Syntax:
-"""""""
-This is an overloaded intrinsic.
-
-::
-
- declare <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> %a, <8 x i32> %b)
- declare <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> %a, <16 x i32> %b)
- declare <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv8i32(<vscale x 4 x i32> %a, <vscale x 8 x i32> %b)
- declare <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv16i32(<vscale x 4 x i32> %a, <vscale x 16 x i32> %b)
-
-Overview:
-"""""""""
-
-The '``llvm.vector.partial.reduce.add.*``' intrinsics reduce the
-concatenation of the two vector arguments down to the number of elements of the
-result vector type.
-
-Arguments:
-""""""""""
-
-The first argument is an integer vector with the same type as the result.
-
-The second argument is a vector with a length that is a known integer multiple
-of the result's type, while maintaining the same element type.
-
-Semantics:
-""""""""""
-
-Other than the reduction operator (e.g., add) the way in which the concatenated
-arguments is reduced is entirely unspecified. By their nature these intrinsics
-are not expected to be useful in isolation but instead implement the first phase
-of an overall reduction operation.
-
-The typical use case is loop vectorization where reductions are split into an
-in-loop phase, where maintaining an unordered vector result is important for
-performance, and an out-of-loop phase to calculate the final scalar result.
-
-By avoiding the introduction of new ordering constraints, these intrinsics
-enhance the ability to leverage a target's accumulation instructions.
-
'``llvm.experimental.vector.histogram.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -30968,6 +30995,37 @@ This intrinsic does nothing, but optimizers must consider it a use of its single
operand and should try to preserve the intrinsic and its position in the
function.
+.. _llvm_reloc_none:
+
+'``llvm.reloc.none``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare void @llvm.reloc.none(metadata !<name_str>)
+
+Overview:
+"""""""""
+
+The ``llvm.reloc.none`` intrinsic emits a no-op relocation against a given
+operand symbol. This can bring the symbol definition into the link without
+emitting any code or data to the binary for that purpose.
+
+Arguments:
+""""""""""
+
+The ``llvm.reloc.none`` intrinsic takes the symbol as a metadata string
+argument.
+
+Semantics:
+""""""""""
+
+This intrinsic emits a no-op relocation for the symbol at the location of the
+intrinsic call.
+
Stack Map Intrinsics
--------------------
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index bfe68274eae3..23bba99ec874 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -67,6 +67,9 @@ Changes to the LLVM IR
Instead, the `align` attribute should be placed on the pointer (or vector of
pointers) argument.
* A `load atomic` may now be used with vector types on x86.
+* Added `@llvm.reloc.none` intrinsic to emit null relocations to symbols. This
+ emits an undefined symbol reference without adding any dedicated code or data to
+ to bear the relocation.
Changes to LLVM infrastructure
------------------------------
diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h
index 98b52579d03b..2f1364d19971 100644
--- a/llvm/include/llvm/CodeGen/Analysis.h
+++ b/llvm/include/llvm/CodeGen/Analysis.h
@@ -71,7 +71,7 @@ void ComputeValueTypes(const DataLayout &DL, Type *Ty,
///
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<EVT> *MemVTs = nullptr,
SmallVectorImpl<TypeSize> *Offsets = nullptr,
TypeSize StartingOffset = TypeSize::getZero());
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
@@ -80,20 +80,6 @@ void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
SmallVectorImpl<uint64_t> *FixedOffsets,
uint64_t StartingOffset);
-/// Variant of ComputeValueVTs that don't produce memory VTs.
-inline void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
- Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<TypeSize> *Offsets = nullptr,
- TypeSize StartingOffset = TypeSize::getZero()) {
- ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, Offsets, StartingOffset);
-}
-inline void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
- Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<uint64_t> *FixedOffsets,
- uint64_t StartingOffset) {
- ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, FixedOffsets, StartingOffset);
-}
-
/// computeValueLLTs - Given an LLVM IR type, compute a sequence of
/// LLTs that represent all the individual underlying
/// non-aggregate types that comprise it.
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index ff3dd0d4c3c5..b3a2ced70e62 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1516,6 +1516,7 @@ enum NodeType {
PARTIAL_REDUCE_SMLA, // sext, sext
PARTIAL_REDUCE_UMLA, // zext, zext
PARTIAL_REDUCE_SUMLA, // sext, zext
+ PARTIAL_REDUCE_FMLA, // fpext, fpext
// The `llvm.experimental.stackmap` intrinsic.
// Operands: input chain, glue, <id>, <numShadowBytes>, [live0[, live1...]]
@@ -1537,6 +1538,9 @@ enum NodeType {
#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID,
#include "llvm/IR/VPIntrinsics.def"
+ // Issue a no-op relocation against a given symbol at the current location.
+ RELOC_NONE,
+
// The `llvm.experimental.convergence.*` intrinsics.
CONVERGENCECTRL_ANCHOR,
CONVERGENCECTRL_ENTRY,
diff --git a/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h b/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h
index e8eceeed6aca..e88079e796e7 100644
--- a/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h
@@ -6,15 +6,18 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_LIBCALLLOWERINGINFO_H
+#define LLVM_CODEGEN_LIBCALLLOWERINGINFO_H
+
#include "llvm/IR/RuntimeLibcalls.h"
namespace llvm {
class LibcallLoweringInfo {
private:
- LLVM_ABI const RTLIB::RuntimeLibcallsInfo &RTLCI;
+ const RTLIB::RuntimeLibcallsInfo &RTLCI;
/// Stores the implementation choice for each each libcall.
- LLVM_ABI RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = {
+ RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = {
RTLIB::Unsupported};
public:
@@ -64,3 +67,5 @@ public:
};
} // end namespace llvm
+
+#endif // LLVM_CODEGEN_LIBCALLLOWERINGINFO_H
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index d7921c3eb3f7..27acc83369f0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -474,6 +474,7 @@ private:
void Select_WRITE_REGISTER(SDNode *Op);
void Select_UNDEF(SDNode *N);
void Select_FAKE_USE(SDNode *N);
+ void Select_RELOC_NONE(SDNode *N);
void CannotYetSelect(SDNode *N);
void Select_FREEZE(SDNode *N);
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 1759463ea796..cd466dceb900 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1949,6 +1949,10 @@ LLVM_ABI bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
/// be zero.
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false);
+/// Return true if the value is a constant floating-point value, or a splatted
+/// vector of a constant floating-point value, of 1.0 (with no undefs).
+LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs = false);
+
/// Return true if the value is a constant -1 integer or a splatted vector of a
/// constant -1 integer (with no undefs).
/// Does not permit build vector implicit truncation.
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 175f20532836..2dcedfb40f3e 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1761,6 +1761,17 @@ public:
return true;
}
+ /// Return true if it's safe to move a machine instruction.
+ /// This allows the backend to prevent certain special instruction
+ /// sequences from being broken by instruction motion in optimization
+ /// passes.
+ /// By default, this returns true for every instruction.
+ virtual bool isSafeToMove(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ return true;
+ }
+
/// Test if the given instruction should be considered a scheduling boundary.
/// This primarily includes labels and terminators.
virtual bool isSchedulingBoundary(const MachineInstr &MI,
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 2550c2bee5f7..98565f423df3 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1679,7 +1679,7 @@ public:
LegalizeAction getPartialReduceMLAAction(unsigned Opc, EVT AccVT,
EVT InputVT) const {
assert(Opc == ISD::PARTIAL_REDUCE_SMLA || Opc == ISD::PARTIAL_REDUCE_UMLA ||
- Opc == ISD::PARTIAL_REDUCE_SUMLA);
+ Opc == ISD::PARTIAL_REDUCE_SUMLA || Opc == ISD::PARTIAL_REDUCE_FMLA);
PartialReduceActionTypes Key = {Opc, AccVT.getSimpleVT().SimpleTy,
InputVT.getSimpleVT().SimpleTy};
auto It = PartialReduceMLAActions.find(Key);
@@ -2793,7 +2793,7 @@ protected:
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT,
LegalizeAction Action) {
assert(Opc == ISD::PARTIAL_REDUCE_SMLA || Opc == ISD::PARTIAL_REDUCE_UMLA ||
- Opc == ISD::PARTIAL_REDUCE_SUMLA);
+ Opc == ISD::PARTIAL_REDUCE_SUMLA || Opc == ISD::PARTIAL_REDUCE_FMLA);
assert(AccVT.isValid() && InputVT.isValid() &&
"setPartialReduceMLAAction types aren't valid");
PartialReduceActionTypes Key = {Opc, AccVT.SimpleTy, InputVT.SimpleTy};
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 62d427c3966b..67de123fdbad 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -1366,7 +1366,7 @@ public:
template <typename Fn> void match(Fn F) const { F(Name, Params, Requires); }
void printLeft(OutputBuffer &OB) const override {
- ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+ ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
OB += "template<";
Params.printWithComma(OB);
OB += "> typename ";
@@ -1550,7 +1550,7 @@ public:
NodeArray getParams() { return Params; }
void printLeft(OutputBuffer &OB) const override {
- ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+ ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
OB += "<";
Params.printWithComma(OB);
OB += ">";
@@ -1824,7 +1824,7 @@ public:
void printDeclarator(OutputBuffer &OB) const {
if (!TemplateParams.empty()) {
- ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+ ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
OB += "<";
TemplateParams.printWithComma(OB);
OB += ">";
@@ -1885,7 +1885,9 @@ public:
}
void printLeft(OutputBuffer &OB) const override {
- bool ParenAll = OB.isGtInsideTemplateArgs() &&
+ // If we're printing a '<' inside of a template argument, and we haven't
+ // yet parenthesized the expression, do so now.
+ bool ParenAll = !OB.isInParensInTemplateArgs() &&
(InfixOperator == ">" || InfixOperator == ">>");
if (ParenAll)
OB.printOpen();
@@ -2061,7 +2063,7 @@ public:
void printLeft(OutputBuffer &OB) const override {
OB += CastKind;
{
- ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+ ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
OB += "<";
OB.printLeft(*To);
OB += ">";
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 6e6203d716e7..afdc1a397ca6 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -104,18 +104,32 @@ public:
unsigned CurrentPackIndex = std::numeric_limits<unsigned>::max();
unsigned CurrentPackMax = std::numeric_limits<unsigned>::max();
- /// When zero, we're printing template args and '>' needs to be parenthesized.
- /// Use a counter so we can simply increment inside parentheses.
- unsigned GtIsGt = 1;
+ struct {
+ /// The depth of '(' and ')' inside the currently printed template
+ /// arguments.
+ unsigned ParenDepth = 0;
- bool isGtInsideTemplateArgs() const { return GtIsGt == 0; }
+ /// True if we're currently printing a template argument.
+ bool InsideTemplate = false;
+ } TemplateTracker;
+
+ /// Returns true if we're currently between a '(' and ')' when printing
+ /// template args.
+ bool isInParensInTemplateArgs() const {
+ return TemplateTracker.ParenDepth > 0;
+ }
+
+ /// Returns true if we're printing template args.
+ bool isInsideTemplateArgs() const { return TemplateTracker.InsideTemplate; }
void printOpen(char Open = '(') {
- GtIsGt++;
+ if (isInsideTemplateArgs())
+ TemplateTracker.ParenDepth++;
*this += Open;
}
void printClose(char Close = ')') {
- GtIsGt--;
+ if (isInsideTemplateArgs())
+ TemplateTracker.ParenDepth--;
*this += Close;
}
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index d7f0e3a3d49d..a86dca06f8ec 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -446,7 +446,12 @@ struct CollapseT {
N v;
};
-// V5.2: [15.8.3] `extended-atomic` clauses
+// [6.0:266]
+template <typename T, typename I, typename E> //
+struct CollectorT {
+ using IncompleteTrait = std::true_type;
+};
+
template <typename T, typename I, typename E> //
struct CompareT {
using EmptyTrait = std::true_type;
@@ -736,6 +741,12 @@ struct IndirectT {
OPT(InvokedByFptr) v;
};
+// [6.0:265-266]
+template <typename T, typename I, typename E> //
+struct InductorT {
+ using IncompleteTrait = std::true_type;
+};
+
// V5.2: [14.1.2] `init` clause
template <typename T, typename I, typename E> //
struct InitT {
@@ -1324,8 +1335,9 @@ using EmptyClausesT = std::variant<
template <typename T, typename I, typename E>
using IncompleteClausesT =
- std::variant<AdjustArgsT<T, I, E>, AppendArgsT<T, I, E>, GraphIdT<T, I, E>,
- GraphResetT<T, I, E>, MatchT<T, I, E>, OtherwiseT<T, I, E>,
+ std::variant<AdjustArgsT<T, I, E>, AppendArgsT<T, I, E>,
+ CollectorT<T, I, E>, GraphIdT<T, I, E>, GraphResetT<T, I, E>,
+ InductorT<T, I, E>, MatchT<T, I, E>, OtherwiseT<T, I, E>,
ReplayableT<T, I, E>, TransparentT<T, I, E>, WhenT<T, I, E>>;
template <typename T, typename I, typename E>
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 208609f64f41..bebab9ba6348 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -123,6 +123,8 @@ def OMPC_Collapse : Clause<[Spelling<"collapse">]> {
let clangClass = "OMPCollapseClause";
let flangClass = "ScalarIntConstantExpr";
}
+def OMPC_Collector : Clause<[Spelling<"collector">]> {
+}
def OMPC_Compare : Clause<[Spelling<"compare">]> {
let clangClass = "OMPCompareClause";
}
@@ -264,6 +266,8 @@ def OMPC_Inclusive : Clause<[Spelling<"inclusive">]> {
def OMPC_Indirect : Clause<[Spelling<"indirect">]> {
let flangClass = "OmpIndirectClause";
}
+def OMPC_Inductor : Clause<[Spelling<"inductor">]> {
+}
def OMPC_Init : Clause<[Spelling<"init">]> {
let clangClass = "OMPInitClause";
let flangClass = "OmpInitClause";
@@ -749,6 +753,14 @@ def OMP_Critical : Directive<[Spelling<"critical">]> {
let association = AS_Block;
let category = CA_Executable;
}
+def OMP_DeclareInduction : Directive<[Spelling<"declare_induction">]> {
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_Collector, 60>,
+ VersionedClause<OMPC_Inductor, 60>,
+ ];
+ let association = AS_None;
+ let category = CA_Declarative;
+}
def OMP_DeclareMapper : Directive<[Spelling<"declare mapper", 1, 52>,
Spelling<"declare_mapper", 60>]> {
let requiredClauses = [
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 6a079f62dd9c..07aa2faffa7c 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1913,6 +1913,9 @@ def int_threadlocal_address : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatch
def int_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[], [IntrNoMem]>;
+def int_reloc_none : DefaultAttrsIntrinsic<[], [llvm_metadata_ty],
+ [IntrNoMem, IntrHasSideEffects]>;
+
//===---------------- Vector Predication Intrinsics --------------===//
// Memory Intrinsics
def int_vp_store : DefaultAttrsIntrinsic<[],
@@ -2810,6 +2813,10 @@ def int_vector_partial_reduce_add : DefaultAttrsIntrinsic<[LLVMMatchType<0>],
[IntrNoMem,
IntrSpeculatable]>;
+def int_vector_partial_reduce_fadd : DefaultAttrsIntrinsic<[LLVMMatchType<0>],
+ [llvm_anyfloat_ty, llvm_anyfloat_ty],
+ [IntrNoMem]>;
+
//===----------------- Pointer Authentication Intrinsics ------------------===//
//
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index b81edc385cd4..4cab6e05ba79 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -499,6 +499,7 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_neon_ummla : AdvSIMD_MatMul_Intrinsic;
def int_aarch64_neon_smmla : AdvSIMD_MatMul_Intrinsic;
def int_aarch64_neon_usmmla : AdvSIMD_MatMul_Intrinsic;
+ def int_aarch64_neon_fmmla : AdvSIMD_MatMul_Intrinsic;
def int_aarch64_neon_usdot : AdvSIMD_Dot_Intrinsic;
def int_aarch64_neon_bfdot : AdvSIMD_Dot_Intrinsic;
def int_aarch64_neon_bfmmla
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index e3ec7e1764da..88aef4a368f2 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -872,6 +872,9 @@ inline bind_and_match_ty<const Value, MatchTy> m_Value(const Value *&V,
/// Match an instruction, capturing it if we match.
inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
+inline bind_ty<const Instruction> m_Instruction(const Instruction *&I) {
+ return I;
+}
/// Match against the nested pattern, and capture the instruction if we match.
template <typename MatchTy>
@@ -879,11 +882,22 @@ inline bind_and_match_ty<Instruction, MatchTy>
m_Instruction(Instruction *&I, const MatchTy &Match) {
return {I, Match};
}
+template <typename MatchTy>
+inline bind_and_match_ty<const Instruction, MatchTy>
+m_Instruction(const Instruction *&I, const MatchTy &Match) {
+ return {I, Match};
+}
/// Match a unary operator, capturing it if we match.
inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; }
+inline bind_ty<const UnaryOperator> m_UnOp(const UnaryOperator *&I) {
+ return I;
+}
/// Match a binary operator, capturing it if we match.
inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
+inline bind_ty<const BinaryOperator> m_BinOp(const BinaryOperator *&I) {
+ return I;
+}
/// Match a with overflow intrinsic, capturing it if we match.
inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) {
return I;
@@ -3069,12 +3083,26 @@ m_c_MaxOrMin(const LHS &L, const RHS &R) {
m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R)));
}
+template <Intrinsic::ID IntrID, typename LHS, typename RHS>
+struct CommutativeBinaryIntrinsic_match {
+ LHS L;
+ RHS R;
+
+ CommutativeBinaryIntrinsic_match(const LHS &L, const RHS &R) : L(L), R(R) {}
+
+ template <typename OpTy> bool match(OpTy *V) const {
+ const auto *II = dyn_cast<IntrinsicInst>(V);
+ if (!II || II->getIntrinsicID() != IntrID)
+ return false;
+ return (L.match(II->getArgOperand(0)) && R.match(II->getArgOperand(1))) ||
+ (L.match(II->getArgOperand(1)) && R.match(II->getArgOperand(0)));
+ }
+};
+
template <Intrinsic::ID IntrID, typename T0, typename T1>
-inline match_combine_or<typename m_Intrinsic_Ty<T0, T1>::Ty,
- typename m_Intrinsic_Ty<T1, T0>::Ty>
+inline CommutativeBinaryIntrinsic_match<IntrID, T0, T1>
m_c_Intrinsic(const T0 &Op0, const T1 &Op1) {
- return m_CombineOr(m_Intrinsic<IntrID>(Op0, Op1),
- m_Intrinsic<IntrID>(Op1, Op0));
+ return CommutativeBinaryIntrinsic_match<IntrID, T0, T1>(Op0, Op1);
}
/// Matches FAdd with LHS and RHS in either order.
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 85a9efe73855..7886478158c3 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1058,8 +1058,10 @@ struct NamedInstrProfRecord : InstrProfRecord {
StringRef Name;
uint64_t Hash;
- // We reserve this bit as the flag for context sensitive profile record.
- static const int CS_FLAG_IN_FUNC_HASH = 60;
+ // We reserve the highest 4 bits as flags.
+ static constexpr uint64_t FUNC_HASH_MASK = 0x0FFF'FFFF'FFFF'FFFF;
+ // The 60th bit is for context sensitive profile record.
+ static constexpr unsigned CS_FLAG_IN_FUNC_HASH = 60;
NamedInstrProfRecord() = default;
NamedInstrProfRecord(StringRef Name, uint64_t Hash,
@@ -1174,7 +1176,9 @@ enum ProfVersion {
Version11 = 11,
// VTable profiling, decision record and bitmap are modified for mcdc.
Version12 = 12,
- // The current version is 12.
+ // In this version, the frontend PGO stable hash algorithm defaults to V4.
+ Version13 = 13,
+ // The current version is 13.
CurrentVersion = INSTR_PROF_INDEX_VERSION
};
const uint64_t Version = ProfVersion::CurrentVersion;
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 0496f240dc82..46d6bb5bd889 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -722,7 +722,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
/* Raw profile format version (start from 1). */
#define INSTR_PROF_RAW_VERSION 10
/* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 12
+#define INSTR_PROF_INDEX_VERSION 13
/* Coverage mapping format version (start from 0). */
#define INSTR_PROF_COVMAP_VERSION 6
diff --git a/llvm/include/llvm/Support/Casting.h b/llvm/include/llvm/Support/Casting.h
index a6435a2562a2..af283e2c8ada 100644
--- a/llvm/include/llvm/Support/Casting.h
+++ b/llvm/include/llvm/Support/Casting.h
@@ -878,18 +878,18 @@ inline constexpr detail::IsaAndPresentCheckPredicate<Types...>
IsaAndPresentPred{};
/// Function objects corresponding to the Cast types defined above.
-template <typename From>
-inline constexpr detail::StaticCastFunc<From> StaticCastTo{};
+template <typename To>
+inline constexpr detail::StaticCastFunc<To> StaticCastTo{};
-template <typename From> inline constexpr detail::CastFunc<From> CastTo{};
+template <typename To> inline constexpr detail::CastFunc<To> CastTo{};
-template <typename From>
-inline constexpr detail::CastIfPresentFunc<From> CastIfPresentTo{};
+template <typename To>
+inline constexpr detail::CastIfPresentFunc<To> CastIfPresentTo{};
-template <typename From>
-inline constexpr detail::DynCastIfPresentFunc<From> DynCastIfPresentTo{};
+template <typename To>
+inline constexpr detail::DynCastIfPresentFunc<To> DynCastIfPresentTo{};
-template <typename From> inline constexpr detail::DynCastFunc<From> DynCastTo{};
+template <typename To> inline constexpr detail::DynCastFunc<To> DynCastTo{};
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index e55314568d68..fb20da336dda 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -233,6 +233,9 @@ HANDLE_TARGET_OPCODE(MEMBARRIER)
// using.
HANDLE_TARGET_OPCODE(JUMP_TABLE_DEBUG_INFO)
+// Issue a no-op relocation against a given symbol at the current location.
+HANDLE_TARGET_OPCODE(RELOC_NONE)
+
HANDLE_TARGET_OPCODE(CONVERGENCECTRL_ENTRY)
HANDLE_TARGET_OPCODE(CONVERGENCECTRL_ANCHOR)
HANDLE_TARGET_OPCODE(CONVERGENCECTRL_LOOP)
diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h
index c20efc7396b7..d3276a18dc2c 100644
--- a/llvm/include/llvm/Support/ThreadPool.h
+++ b/llvm/include/llvm/Support/ThreadPool.h
@@ -14,6 +14,7 @@
#define LLVM_SUPPORT_THREADPOOL_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FunctionExtras.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Jobserver.h"
@@ -51,7 +52,7 @@ class ThreadPoolTaskGroup;
class LLVM_ABI ThreadPoolInterface {
/// The actual method to enqueue a task to be defined by the concrete
/// implementation.
- virtual void asyncEnqueue(std::function<void()> Task,
+ virtual void asyncEnqueue(llvm::unique_function<void()> Task,
ThreadPoolTaskGroup *Group) = 0;
public:
@@ -95,22 +96,22 @@ public:
/// used to wait for the task to finish and is *non-blocking* on destruction.
template <typename Func>
auto async(Func &&F) -> std::shared_future<decltype(F())> {
- return asyncImpl(std::function<decltype(F())()>(std::forward<Func>(F)),
- nullptr);
+ return asyncImpl(
+ llvm::unique_function<decltype(F())()>(std::forward<Func>(F)), nullptr);
}
template <typename Func>
auto async(ThreadPoolTaskGroup &Group, Func &&F)
-> std::shared_future<decltype(F())> {
- return asyncImpl(std::function<decltype(F())()>(std::forward<Func>(F)),
- &Group);
+ return asyncImpl(
+ llvm::unique_function<decltype(F())()>(std::forward<Func>(F)), &Group);
}
private:
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
template <typename ResTy>
- std::shared_future<ResTy> asyncImpl(std::function<ResTy()> Task,
+ std::shared_future<ResTy> asyncImpl(llvm::unique_function<ResTy()> Task,
ThreadPoolTaskGroup *Group) {
auto Future = std::async(std::launch::deferred, std::move(Task)).share();
asyncEnqueue([Future]() { Future.wait(); }, Group);
@@ -160,7 +161,7 @@ private:
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
- void asyncEnqueue(std::function<void()> Task,
+ void asyncEnqueue(llvm::unique_function<void()> Task,
ThreadPoolTaskGroup *Group) override {
int requestedThreads;
{
@@ -189,7 +190,8 @@ private:
mutable llvm::sys::RWMutex ThreadsLock;
/// Tasks waiting for execution in the pool.
- std::deque<std::pair<std::function<void()>, ThreadPoolTaskGroup *>> Tasks;
+ std::deque<std::pair<llvm::unique_function<void()>, ThreadPoolTaskGroup *>>
+ Tasks;
/// Locking and signaling for accessing the Tasks queue.
std::mutex QueueLock;
@@ -239,13 +241,14 @@ public:
private:
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
- void asyncEnqueue(std::function<void()> Task,
+ void asyncEnqueue(llvm::unique_function<void()> Task,
ThreadPoolTaskGroup *Group) override {
Tasks.emplace_back(std::make_pair(std::move(Task), Group));
}
/// Tasks waiting for execution in the pool.
- std::deque<std::pair<std::function<void()>, ThreadPoolTaskGroup *>> Tasks;
+ std::deque<std::pair<llvm::unique_function<void()>, ThreadPoolTaskGroup *>>
+ Tasks;
};
#if LLVM_ENABLE_THREADS
diff --git a/llvm/include/llvm/Support/thread.h b/llvm/include/llvm/Support/thread.h
index ecde62d8368e..51873e7d529b 100644
--- a/llvm/include/llvm/Support/thread.h
+++ b/llvm/include/llvm/Support/thread.h
@@ -34,7 +34,7 @@ typedef PVOID HANDLE;
namespace llvm {
-#if LLVM_ON_UNIX || _WIN32
+#if defined(LLVM_ON_UNIX) || defined(_WIN32)
/// LLVM thread following std::thread interface with added constructor to
/// specify stack size.
@@ -49,7 +49,7 @@ class thread {
}
public:
-#if LLVM_ON_UNIX
+#ifdef LLVM_ON_UNIX
using native_handle_type = pthread_t;
using id = pthread_t;
using start_routine_type = void *(*)(void *);
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 13175177edd3..db99885121ec 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1554,6 +1554,11 @@ def JUMP_TABLE_DEBUG_INFO : StandardPseudoInstruction {
let Size = 0;
let isMeta = true;
}
+def RELOC_NONE : StandardPseudoInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins unknown:$symbol);
+ let hasSideEffects = true;
+}
let hasSideEffects = false, isMeta = true, isConvergent = true in {
def CONVERGENCECTRL_ANCHOR : StandardPseudoInstruction {
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 07a858fd682f..a9750a5ab03f 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -527,6 +527,8 @@ def partial_reduce_smla : SDNode<"ISD::PARTIAL_REDUCE_SMLA",
SDTPartialReduceMLA>;
def partial_reduce_sumla : SDNode<"ISD::PARTIAL_REDUCE_SUMLA",
SDTPartialReduceMLA>;
+def partial_reduce_fmla : SDNode<"ISD::PARTIAL_REDUCE_FMLA",
+ SDTPartialReduceMLA>;
def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;
def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 11d829492a10..b3b62cfe8b45 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -407,9 +407,10 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
continue;
Value *Ptr = getLoadStorePointerOperand(&Inst);
const Loop *L = LI.getLoopFor(Inst.getParent());
+ const Loop *OutermostLoop = L ? L->getOutermostLoop() : nullptr;
const SCEV *PtrSCEV = SE.getSCEVAtScope(Ptr, L);
const SCEV *AccessFn = SE.removePointerBase(PtrSCEV);
- SCEVMonotonicity Mon = Checker.checkMonotonicity(AccessFn, L);
+ SCEVMonotonicity Mon = Checker.checkMonotonicity(AccessFn, OutermostLoop);
OS.indent(2) << "Inst: " << Inst << "\n";
OS.indent(4) << "Expr: " << *AccessFn << "\n";
Mon.print(OS, 4);
@@ -945,6 +946,8 @@ SCEVMonotonicity SCEVMonotonicityChecker::invariantOrUnknown(const SCEV *Expr) {
SCEVMonotonicity
SCEVMonotonicityChecker::checkMonotonicity(const SCEV *Expr,
const Loop *OutermostLoop) {
+ assert((!OutermostLoop || OutermostLoop->isOutermost()) &&
+ "OutermostLoop must be outermost");
assert(Expr->getType()->isIntegerTy() && "Expr must be integer type");
this->OutermostLoop = OutermostLoop;
return visit(Expr);
@@ -1587,6 +1590,15 @@ static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
return nullptr;
}
+/// Returns \p A * \p B if it guaranteed not to signed wrap. Otherwise returns
+/// nullptr. \p A and \p B must have the same integer type.
+static const SCEV *mulSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
+ ScalarEvolution &SE) {
+ if (SE.willNotOverflow(Instruction::Mul, /*Signed=*/true, A, B))
+ return SE.getMulExpr(A, B);
+ return nullptr;
+}
+
/// Returns the absolute value of \p A. In the context of dependence analysis,
/// we need an absolute value in a mathematical sense. If \p A is the signed
/// minimum value, we cannot represent it unless extending the original type.
@@ -1686,7 +1698,11 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
assert(0 < Level && Level <= CommonLevels && "level out of range");
Level--;
- const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
+ const SCEV *Delta = minusSCEVNoSignedOverflow(SrcConst, DstConst, *SE);
+ if (!Delta) {
+ Result.Consistent = false;
+ return false;
+ }
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
@@ -1702,7 +1718,9 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
const SCEV *AbsCoeff = absSCEVNoSignedOverflow(Coeff, *SE);
if (!AbsDelta || !AbsCoeff)
return false;
- const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
+ const SCEV *Product = mulSCEVNoSignedOverflow(UpperBound, AbsCoeff, *SE);
+ if (!Product)
+ return false;
return isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product);
}();
if (IsDeltaLarge) {
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 789a98366cea..41ff816a3326 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -350,6 +350,139 @@ unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL,
return V->getType()->getScalarSizeInBits() - SignBits + 1;
}
+/// Try to detect the lerp pattern: a * (b - c) + c * d
+/// where a >= 0, b >= 0, c >= 0, d >= 0, and b >= c.
+///
+/// In that particular case, we can use the following chain of reasoning:
+///
+/// a * (b - c) + c * d <= a' * (b - c) + a' * c = a' * b where a' = max(a, d)
+///
+/// Since that is true for arbitrary a, b, c and d within our constraints, we
+/// can conclude that:
+///
+/// max(a * (b - c) + c * d) <= max(max(a), max(d)) * max(b) = U
+///
+/// Considering that any result of the lerp would be less or equal to U, it
+/// would have at least the number of leading 0s as in U.
+///
+/// While being quite a specific situation, it is fairly common in computer
+/// graphics in the shape of alpha blending.
+///
+/// Modifies given KnownOut in-place with the inferred information.
+static void computeKnownBitsFromLerpPattern(const Value *Op0, const Value *Op1,
+ const APInt &DemandedElts,
+ KnownBits &KnownOut,
+ const SimplifyQuery &Q,
+ unsigned Depth) {
+
+ Type *Ty = Op0->getType();
+ const unsigned BitWidth = Ty->getScalarSizeInBits();
+
+ // Only handle scalar types for now
+ if (Ty->isVectorTy())
+ return;
+
+ // Try to match: a * (b - c) + c * d.
+ // When a == 1 => A == nullptr, the same applies to d/D as well.
+ const Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
+ const Instruction *SubBC = nullptr;
+
+ const auto MatchSubBC = [&]() {
+ // (b - c) can have two forms that interest us:
+ //
+ // 1. sub nuw %b, %c
+ // 2. xor %c, %b
+ //
+ // For the first case, nuw flag guarantees our requirement b >= c.
+ //
+ // The second case might happen when the analysis can infer that b is a mask
+ // for c and we can transform sub operation into xor (that is usually true
+ // for constant b's). Even though xor is symmetrical, canonicalization
+ // ensures that the constant will be the RHS. We have additional checks
+ // later on to ensure that this xor operation is equivalent to subtraction.
+ return m_Instruction(SubBC, m_CombineOr(m_NUWSub(m_Value(B), m_Value(C)),
+ m_Xor(m_Value(C), m_Value(B))));
+ };
+
+ const auto MatchASubBC = [&]() {
+ // Cases:
+ // - a * (b - c)
+ // - (b - c) * a
+ // - (b - c) <- a implicitly equals 1
+ return m_CombineOr(m_c_Mul(m_Value(A), MatchSubBC()), MatchSubBC());
+ };
+
+ const auto MatchCD = [&]() {
+ // Cases:
+ // - d * c
+ // - c * d
+ // - c <- d implicitly equals 1
+ return m_CombineOr(m_c_Mul(m_Value(D), m_Specific(C)), m_Specific(C));
+ };
+
+ const auto Match = [&](const Value *LHS, const Value *RHS) {
+ // We do use m_Specific(C) in MatchCD, so we have to make sure that
+ // it's bound to anything and match(LHS, MatchASubBC()) absolutely
+ // has to evaluate first and return true.
+ //
+ // If Match returns true, it is guaranteed that B != nullptr, C != nullptr.
+ return match(LHS, MatchASubBC()) && match(RHS, MatchCD());
+ };
+
+ if (!Match(Op0, Op1) && !Match(Op1, Op0))
+ return;
+
+ const auto ComputeKnownBitsOrOne = [&](const Value *V) {
+ // For some of the values we use the convention of leaving
+ // it nullptr to signify an implicit constant 1.
+ return V ? computeKnownBits(V, DemandedElts, Q, Depth + 1)
+ : KnownBits::makeConstant(APInt(BitWidth, 1));
+ };
+
+ // Check that all operands are non-negative
+ const KnownBits KnownA = ComputeKnownBitsOrOne(A);
+ if (!KnownA.isNonNegative())
+ return;
+
+ const KnownBits KnownD = ComputeKnownBitsOrOne(D);
+ if (!KnownD.isNonNegative())
+ return;
+
+ const KnownBits KnownB = computeKnownBits(B, DemandedElts, Q, Depth + 1);
+ if (!KnownB.isNonNegative())
+ return;
+
+ const KnownBits KnownC = computeKnownBits(C, DemandedElts, Q, Depth + 1);
+ if (!KnownC.isNonNegative())
+ return;
+
+ // If we matched subtraction as xor, we need to actually check that xor
+ // is semantically equivalent to subtraction.
+ //
+ // For that to be true, b has to be a mask for c or that b's known
+ // ones cover all known and possible ones of c.
+ if (SubBC->getOpcode() == Instruction::Xor &&
+ !KnownC.getMaxValue().isSubsetOf(KnownB.getMinValue()))
+ return;
+
+ const APInt MaxA = KnownA.getMaxValue();
+ const APInt MaxD = KnownD.getMaxValue();
+ const APInt MaxAD = APIntOps::umax(MaxA, MaxD);
+ const APInt MaxB = KnownB.getMaxValue();
+
+ // We can't infer leading zeros info if the upper-bound estimate wraps.
+ bool Overflow;
+ const APInt UpperBound = MaxAD.umul_ov(MaxB, Overflow);
+
+ if (Overflow)
+ return;
+
+ // If we know that x <= y and both are positive than x has at least the same
+ // number of leading zeros as y.
+ const unsigned MinimumNumberOfLeadingZeros = UpperBound.countl_zero();
+ KnownOut.Zero.setHighBits(MinimumNumberOfLeadingZeros);
+}
+
static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
bool NSW, bool NUW,
const APInt &DemandedElts,
@@ -369,6 +502,10 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
isImpliedByDomCondition(ICmpInst::ICMP_SLE, Op1, Op0, Q.CxtI, Q.DL)
.value_or(false))
KnownOut.makeNonNegative();
+
+ if (Add)
+ // Try to match lerp pattern and combine results
+ computeKnownBitsFromLerpPattern(Op0, Op1, DemandedElts, KnownOut, Q, Depth);
}
static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
index ae9d818241f4..7b790bb005ce 100644
--- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp
+++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
@@ -174,7 +174,7 @@ getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) {
return createFileError(Path, EC);
llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool {
- return LHS.Order <= RHS.Order;
+ return LHS.Order < RHS.Order;
});
SmallVector<std::string, 4> DBDirs;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 713277d0bc5e..3aa245b7f3f1 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2087,6 +2087,17 @@ void AsmPrinter::emitFunctionBody() {
// This is only used to influence register allocation behavior, no
// actual initialization is needed.
break;
+ case TargetOpcode::RELOC_NONE: {
+ // Generate a temporary label for the current PC.
+ MCSymbol *Sym = OutContext.createTempSymbol("reloc_none");
+ OutStreamer->emitLabel(Sym);
+ const MCExpr *Dot = MCSymbolRefExpr::create(Sym, OutContext);
+ const MCExpr *Value = MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol(MI.getOperand(0).getSymbolName()),
+ OutContext);
+ OutStreamer->emitRelocDirective(*Dot, "BFD_RELOC_NONE", Value, SMLoc());
+ break;
+ }
default:
emitInstruction(&MI);
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 7292bc2be0df..af1625a20956 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1979,6 +1979,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator FIB = FBB->begin();
MachineBasicBlock::iterator TIE = TBB->end();
MachineBasicBlock::iterator FIE = FBB->end();
+ MachineFunction &MF = *MBB->getParent();
while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
TIB = skipDebugInstructionsForward(TIB, TIE, false);
@@ -1993,6 +1994,10 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
// Hard to reason about register liveness with predicated instruction.
break;
+ if (!TII->isSafeToMove(*TIB, MBB, MF))
+ // Don't hoist the instruction if it isn't safe to move.
+ break;
+
bool IsSafe = true;
for (MachineOperand &MO : TIB->operands()) {
// Don't attempt to hoist instructions with register masks.
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index b3c312569736..7be746830056 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -292,7 +292,8 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
LLVMContext &Ctx = OrigArg.Ty->getContext();
SmallVector<EVT, 4> SplitVTs;
- ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, Offsets, 0);
+ ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, /*MemVTs=*/nullptr, Offsets,
+ 0);
if (SplitVTs.size() == 0)
return;
@@ -996,7 +997,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
SmallVector<EVT, 4> SplitVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, /*MemVTs=*/nullptr, &Offsets, 0);
assert(VRegs.size() == SplitVTs.size());
@@ -1028,7 +1029,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
SmallVector<EVT, 4> SplitVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, /*MemVTs=*/nullptr, &Offsets, 0);
assert(VRegs.size() == SplitVTs.size());
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index be1b51f54681..4f6a19fe6633 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2686,6 +2686,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::experimental_convergence_entry:
case Intrinsic::experimental_convergence_loop:
return translateConvergenceControlIntrinsic(CI, ID, MIRBuilder);
+ case Intrinsic::reloc_none: {
+ Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(0))->getMetadata();
+ StringRef SymbolName = cast<MDString>(MD)->getString();
+ MIRBuilder.buildInstr(TargetOpcode::RELOC_NONE)
+ .addExternalSymbol(SymbolName.data());
+ return true;
+ }
}
return false;
}
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 8ad9245a4768..37e5c517d24d 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1547,10 +1547,14 @@ bool MachineInstr::mayAlias(BatchAAResults *AA, const MachineInstr &Other,
// Check each pair of memory operands from both instructions, which can't
// alias only if all pairs won't alias.
- for (auto *MMOa : memoperands())
- for (auto *MMOb : Other.memoperands())
+ for (auto *MMOa : memoperands()) {
+ for (auto *MMOb : Other.memoperands()) {
+ if (!MMOa->isStore() && !MMOb->isStore())
+ continue;
if (MemOperandsHaveAlias(MFI, AA, UseTBAA, MMOa, MMOb))
return true;
+ }
+ }
return false;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 816b7ba92bd8..f144f17d5a8f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2042,6 +2042,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
return visitPARTIAL_REDUCE_MLA(N);
case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
case ISD::LIFETIME_END: return visitLIFETIME_END(N);
@@ -13006,6 +13007,9 @@ SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
//
// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
// -> partial_reduce_*mla(acc, x, C)
+//
+// partial_reduce_fmla(acc, fmul(fpext(a), fpext(b)), splat(1.0))
+// -> partial_reduce_fmla(acc, a, b)
SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
SDLoc DL(N);
auto *Context = DAG.getContext();
@@ -13014,7 +13018,7 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
SDValue Op2 = N->getOperand(2);
unsigned Opc = Op1->getOpcode();
- if (Opc != ISD::MUL && Opc != ISD::SHL)
+ if (Opc != ISD::MUL && Opc != ISD::FMUL && Opc != ISD::SHL)
return SDValue();
SDValue LHS = Op1->getOperand(0);
@@ -13033,13 +13037,16 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
Opc = ISD::MUL;
}
- APInt C;
- if (Opc != ISD::MUL || !ISD::isConstantSplatVector(Op2.getNode(), C) ||
- !C.isOne())
+ if (!(Opc == ISD::MUL && llvm::isOneOrOneSplat(Op2)) &&
+ !(Opc == ISD::FMUL && llvm::isOneOrOneSplatFP(Op2)))
return SDValue();
+ auto IsIntOrFPExtOpcode = [](unsigned int Opcode) {
+ return (ISD::isExtOpcode(Opcode) || Opcode == ISD::FP_EXTEND);
+ };
+
unsigned LHSOpcode = LHS->getOpcode();
- if (!ISD::isExtOpcode(LHSOpcode))
+ if (!IsIntOrFPExtOpcode(LHSOpcode))
return SDValue();
SDValue LHSExtOp = LHS->getOperand(0);
@@ -13047,6 +13054,7 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
// -> partial_reduce_*mla(acc, x, C)
+ APInt C;
if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
// TODO: Make use of partial_reduce_sumla here
APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
@@ -13071,7 +13079,7 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
}
unsigned RHSOpcode = RHS->getOpcode();
- if (!ISD::isExtOpcode(RHSOpcode))
+ if (!IsIntOrFPExtOpcode(RHSOpcode))
return SDValue();
SDValue RHSExtOp = RHS->getOperand(0);
@@ -13088,6 +13096,8 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
std::swap(LHSExtOp, RHSExtOp);
+ } else if (LHSOpcode == ISD::FP_EXTEND && RHSOpcode == ISD::FP_EXTEND) {
+ NewOpc = ISD::PARTIAL_REDUCE_FMLA;
} else
return SDValue();
// For a 2-stage extend the signedness of both of the extends must match
@@ -13115,30 +13125,33 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
// -> partial.reduce.smla(acc, op, splat(trunc(1)))
// partial.reduce.sumla(acc, sext(op), splat(1))
// -> partial.reduce.smla(acc, op, splat(trunc(1)))
+// partial.reduce.fmla(acc, fpext(op), splat(1.0))
+// -> partial.reduce.fmla(acc, op, splat(1.0))
SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
SDLoc DL(N);
SDValue Acc = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Op2 = N->getOperand(2);
- APInt ConstantOne;
- if (!ISD::isConstantSplatVector(Op2.getNode(), ConstantOne) ||
- !ConstantOne.isOne())
+ if (!llvm::isOneOrOneSplat(Op2) && !llvm::isOneOrOneSplatFP(Op2))
return SDValue();
unsigned Op1Opcode = Op1.getOpcode();
- if (!ISD::isExtOpcode(Op1Opcode))
+ if (!ISD::isExtOpcode(Op1Opcode) && Op1Opcode != ISD::FP_EXTEND)
return SDValue();
- bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND;
+ bool Op1IsSigned =
+ Op1Opcode == ISD::SIGN_EXTEND || Op1Opcode == ISD::FP_EXTEND;
bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
EVT AccElemVT = Acc.getValueType().getVectorElementType();
if (Op1IsSigned != NodeIsSigned &&
Op1.getValueType().getVectorElementType() != AccElemVT)
return SDValue();
- unsigned NewOpcode =
- Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
+ unsigned NewOpcode = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
+ ? ISD::PARTIAL_REDUCE_FMLA
+ : Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA
+ : ISD::PARTIAL_REDUCE_UMLA;
SDValue UnextOp1 = Op1.getOperand(0);
EVT UnextOp1VT = UnextOp1.getValueType();
@@ -13148,8 +13161,12 @@ SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
return SDValue();
+ SDValue Constant = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
+ ? DAG.getConstantFP(1, DL, UnextOp1VT)
+ : DAG.getConstant(1, DL, UnextOp1VT);
+
return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
- DAG.getConstant(1, DL, UnextOp1VT));
+ Constant);
}
SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
@@ -16736,38 +16753,51 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
}
// fold (conv (load x)) -> (load (conv*)x)
+ // fold (conv (freeze (load x))) -> (freeze (load (conv*)x))
// If the resultant load doesn't need a higher alignment than the original!
- if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- // Do not remove the cast if the types differ in endian layout.
- TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
- TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
- // If the load is volatile, we only want to change the load type if the
- // resulting load is legal. Otherwise we might increase the number of
- // memory accesses. We don't care if the original type was legal or not
- // as we assume software couldn't rely on the number of accesses of an
- // illegal type.
- ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
- TLI.isOperationLegal(ISD::LOAD, VT))) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ auto CastLoad = [this, &VT](SDValue N0, const SDLoc &DL) {
+ if (!ISD::isNormalLoad(N0.getNode()) || !N0.hasOneUse())
+ return SDValue();
- if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
- *LN0->getMemOperand())) {
- // If the range metadata type does not match the new memory
- // operation type, remove the range metadata.
- if (const MDNode *MD = LN0->getRanges()) {
- ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
- if (Lower->getBitWidth() != VT.getScalarSizeInBits() ||
- !VT.isInteger()) {
- LN0->getMemOperand()->clearRanges();
- }
+ // Do not remove the cast if the types differ in endian layout.
+ if (TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) !=
+ TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()))
+ return SDValue();
+
+ // If the load is volatile, we only want to change the load type if the
+ // resulting load is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
+ auto *LN0 = cast<LoadSDNode>(N0);
+ if ((LegalOperations || !LN0->isSimple()) &&
+ !TLI.isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
+ if (!TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
+ *LN0->getMemOperand()))
+ return SDValue();
+
+ // If the range metadata type does not match the new memory
+ // operation type, remove the range metadata.
+ if (const MDNode *MD = LN0->getRanges()) {
+ ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
+ if (Lower->getBitWidth() != VT.getScalarSizeInBits() || !VT.isInteger()) {
+ LN0->getMemOperand()->clearRanges();
}
- SDValue Load =
- DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getMemOperand());
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
- return Load;
}
- }
+ SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+ return Load;
+ };
+
+ if (SDValue NewLd = CastLoad(N0, SDLoc(N)))
+ return NewLd;
+
+ if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse())
+ if (SDValue NewLd = CastLoad(N0.getOperand(0), SDLoc(N)))
+ return DAG.getFreeze(NewLd);
if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
return V;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 8e423c4f83b3..94751be5b798 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -534,6 +534,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
Action =
TLI.getPartialReduceMLAAction(Op.getOpcode(), Node->getValueType(0),
Node->getOperand(1).getValueType());
@@ -1243,6 +1244,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
Results.push_back(TLI.expandPartialReduceMLA(Node, DAG));
return;
case ISD::VECREDUCE_SEQ_FADD:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bb4a8d9967f9..dd5c011bfe78 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1474,6 +1474,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
SplitVecRes_PARTIAL_REDUCE_MLA(N, Lo, Hi);
break;
case ISD::GET_ACTIVE_LANE_MASK:
@@ -3689,6 +3690,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
Res = SplitVecOp_PARTIAL_REDUCE_MLA(N);
break;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ff6a7b2cf546..bbc1d734cfef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8404,7 +8404,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SMLA:
- case ISD::PARTIAL_REDUCE_SUMLA: {
+ case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA: {
[[maybe_unused]] EVT AccVT = N1.getValueType();
[[maybe_unused]] EVT Input1VT = N2.getValueType();
[[maybe_unused]] EVT Input2VT = N3.getValueType();
@@ -12746,6 +12747,10 @@ void SelectionDAG::getTopologicallyOrderedNodes(
for (unsigned i = 0U; i < SortedNodes.size(); ++i) {
const SDNode *N = SortedNodes[i];
for (const SDNode *U : N->users()) {
+ // HandleSDNode is never part of a DAG and therefore has no entry in
+ // RemainingOperands.
+ if (U->getOpcode() == ISD::HANDLENODE)
+ continue;
unsigned &NumRemOperands = RemainingOperands[U];
assert(NumRemOperands && "Invalid number of remaining operands");
--NumRemOperands;
@@ -12759,8 +12764,6 @@ void SelectionDAG::getTopologicallyOrderedNodes(
"First node in topological sort is not the entry token");
assert(SortedNodes.front()->getNumOperands() == 0 &&
"First node in topological sort has operands");
- assert(SortedNodes.back()->use_empty() &&
- "Last node in topologic sort has users");
}
/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
@@ -13062,6 +13065,11 @@ bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
return C && C->isOne();
}
+bool llvm::isOneOrOneSplatFP(SDValue N, bool AllowUndefs) {
+ ConstantFPSDNode *C = isConstOrConstSplatFP(N, AllowUndefs);
+ return C && C->isExactlyValue(1.0);
+}
+
bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
N = peekThroughBitcasts(N);
unsigned BitWidth = N.getScalarValueSizeInBits();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 9961c982bdf3..88b0809b767b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4758,7 +4758,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SmallVector<uint64_t, 4> Offsets;
const Value *SrcV = I.getOperand(0);
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
- SrcV->getType(), ValueVTs, &Offsets, 0);
+ SrcV->getType(), ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
@@ -4794,7 +4794,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
- ValueVTs, &Offsets, 0);
+ ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
@@ -7811,6 +7811,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
+ case Intrinsic::reloc_none: {
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
+ StringRef SymbolName = cast<MDString>(MD)->getString();
+ SDValue Ops[2] = {
+ getRoot(),
+ DAG.getTargetExternalSymbol(
+ SymbolName.data(), TLI.getProgramPointerTy(DAG.getDataLayout()))};
+ DAG.setRoot(DAG.getNode(ISD::RELOC_NONE, sdl, MVT::Other, Ops));
+ return;
+ }
+
case Intrinsic::eh_exceptionpointer:
case Intrinsic::eh_exceptioncode: {
// Get the exception pointer vreg, copy from it, and resize it to fit.
@@ -8176,6 +8187,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Input, DAG.getConstant(1, sdl, Input.getValueType())));
return;
}
+ case Intrinsic::vector_partial_reduce_fadd: {
+ SDValue Acc = getValue(I.getOperand(0));
+ SDValue Input = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(
+ ISD::PARTIAL_REDUCE_FMLA, sdl, Acc.getValueType(), Acc,
+ Input, DAG.getConstantFP(1.0, sdl, Input.getValueType())));
+ return;
+ }
case Intrinsic::experimental_cttz_elts: {
auto DL = getCurSDLoc();
SDValue Op = getValue(I.getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 77377d348b83..ec5edd5f1397 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -472,6 +472,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::LIFETIME_END: return "lifetime.end";
case ISD::FAKE_USE:
return "fake_use";
+ case ISD::RELOC_NONE:
+ return "reloc_none";
case ISD::PSEUDO_PROBE:
return "pseudoprobe";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
@@ -588,6 +590,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "partial_reduce_smla";
case ISD::PARTIAL_REDUCE_SUMLA:
return "partial_reduce_sumla";
+ case ISD::PARTIAL_REDUCE_FMLA:
+ return "partial_reduce_fmla";
case ISD::LOOP_DEPENDENCE_WAR_MASK:
return "loop_dep_war";
case ISD::LOOP_DEPENDENCE_RAW_MASK:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6c11c5b815b6..8bc5d2f3e421 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2550,6 +2550,11 @@ void SelectionDAGISel::Select_FAKE_USE(SDNode *N) {
N->getOperand(1), N->getOperand(0));
}
+void SelectionDAGISel::Select_RELOC_NONE(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::RELOC_NONE, N->getValueType(0),
+ N->getOperand(1), N->getOperand(0));
+}
+
void SelectionDAGISel::Select_FREEZE(SDNode *N) {
// TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
// If FREEZE instruction is added later, the code below must be changed as
@@ -3325,6 +3330,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::FAKE_USE:
Select_FAKE_USE(NodeToMatch);
return;
+ case ISD::RELOC_NONE:
+ Select_RELOC_NONE(NodeToMatch);
+ return;
case ISD::FREEZE:
Select_FREEZE(NodeToMatch);
return;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9bdf82210fed..b51d6649af2e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12074,22 +12074,32 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
EVT::getVectorVT(*DAG.getContext(), AccVT.getVectorElementType(),
MulOpVT.getVectorElementCount());
- unsigned ExtOpcLHS = N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
- ? ISD::ZERO_EXTEND
- : ISD::SIGN_EXTEND;
- unsigned ExtOpcRHS = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA
- ? ISD::SIGN_EXTEND
- : ISD::ZERO_EXTEND;
+ unsigned ExtOpcLHS, ExtOpcRHS;
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case ISD::PARTIAL_REDUCE_UMLA:
+ ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
+ break;
+ case ISD::PARTIAL_REDUCE_SMLA:
+ ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
+ break;
+ case ISD::PARTIAL_REDUCE_FMLA:
+ ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
+ break;
+ }
if (ExtMulOpVT != MulOpVT) {
MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
}
SDValue Input = MulLHS;
- APInt ConstantOne;
- if (!ISD::isConstantSplatVector(MulRHS.getNode(), ConstantOne) ||
- !ConstantOne.isOne())
+ if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
+ if (!llvm::isOneOrOneSplatFP(MulRHS))
+ Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
+ } else if (!llvm::isOneOrOneSplat(MulRHS)) {
Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
+ }
unsigned Stride = AccVT.getVectorMinNumElements();
unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
@@ -12099,10 +12109,13 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
for (unsigned I = 0; I < ScaleFactor; I++)
Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
+ unsigned FlatNode =
+ N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
+
// Flatten the subvector tree
while (Subvectors.size() > 1) {
Subvectors.push_back(
- DAG.getNode(ISD::ADD, DL, AccVT, {Subvectors[0], Subvectors[1]}));
+ DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
Subvectors.pop_front();
Subvectors.pop_front();
}
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 414e414738b7..b99e1c7f19b7 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1665,6 +1665,17 @@ void TwoAddressInstructionImpl::processTiedPairs(MachineInstr *MI,
// by SubRegB is compatible with RegA with no subregister. So regardless of
// whether the dest oper writes a subreg, the source oper should not.
MO.setSubReg(0);
+
+ // Update uses of RegB to uses of RegA inside the bundle.
+ if (MI->isBundle()) {
+ for (MachineOperand &MO : mi_bundle_ops(*MI)) {
+ if (MO.isReg() && MO.getReg() == RegB) {
+ assert(MO.getSubReg() == 0 && SubRegB == 0 &&
+ "tied subregister uses in bundled instructions not supported");
+ MO.setReg(RegA);
+ }
+ }
+ }
}
if (AllUsesCopied) {
diff --git a/llvm/lib/DWARFLinker/Parallel/SyntheticTypeNameBuilder.cpp b/llvm/lib/DWARFLinker/Parallel/SyntheticTypeNameBuilder.cpp
index 34174f98b7e3..ca918f6e17b3 100644
--- a/llvm/lib/DWARFLinker/Parallel/SyntheticTypeNameBuilder.cpp
+++ b/llvm/lib/DWARFLinker/Parallel/SyntheticTypeNameBuilder.cpp
@@ -377,8 +377,10 @@ Error SyntheticTypeNameBuilder::addTypeName(UnitEntryPairTy InputUnitEntryPair,
} break;
}
- // If name for the DIE is not determined yet add referenced types to the name.
- if (!HasLinkageName && !HasShortName && !HasDeclFileName) {
+ // If name for the DIE is not determined yet or if the DIE is a typedef, add
+ // referenced types to the name.
+ if ((!HasLinkageName && !HasShortName && !HasDeclFileName) ||
+ InputUnitEntryPair.DieEntry->getTag() == dwarf::DW_TAG_typedef) {
if (InputUnitEntryPair.CU->find(InputUnitEntryPair.DieEntry,
getODRAttributes()))
if (Error Err = addReferencedODRDies(InputUnitEntryPair, AddParentNames,
diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp
index 1009cc91ca12..8e476cdafdb7 100644
--- a/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -25,10 +25,6 @@
using namespace llvm;
using namespace llvm::itanium_demangle;
-constexpr const char *itanium_demangle::FloatData<float>::spec;
-constexpr const char *itanium_demangle::FloatData<double>::spec;
-constexpr const char *itanium_demangle::FloatData<long double>::spec;
-
// <discriminator> := _ <non-negative number> # when number < 10
// := __ <non-negative number> _ # when number >= 10
// extension := decimal-digit+ # at the end of string
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 24f90bf6de7f..59eb87079823 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6013,6 +6013,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 2,
"cache type argument to llvm.prefetch must be 0-1", Call);
break;
+ case Intrinsic::reloc_none: {
+ Check(isa<MDString>(
+ cast<MetadataAsValue>(Call.getArgOperand(0))->getMetadata()),
+ "llvm.reloc.none argument must be a metadata string", &Call);
+ break;
+ }
case Intrinsic::stackprotector:
Check(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()),
"llvm.stackprotector parameter #2 must resolve to an alloca.", Call);
@@ -6577,6 +6583,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
}
break;
}
+ case Intrinsic::vector_partial_reduce_fadd:
case Intrinsic::vector_partial_reduce_add: {
VectorType *AccTy = cast<VectorType>(Call.getArgOperand(0)->getType());
VectorType *VecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
diff --git a/llvm/lib/Option/ArgList.cpp b/llvm/lib/Option/ArgList.cpp
index 2f4e21257af0..9f9d63f212c8 100644
--- a/llvm/lib/Option/ArgList.cpp
+++ b/llvm/lib/Option/ArgList.cpp
@@ -230,10 +230,8 @@ StringRef ArgList::getSubCommand(
HandleMultipleSubcommands(SubCommands);
return {};
}
- if (!OtherPositionals.empty()) {
+ if (!OtherPositionals.empty())
HandleOtherPositionals(OtherPositionals);
- return {};
- }
if (SubCommands.size() == 1)
return SubCommands.front();
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 02087355ab31..54987872f2d8 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1690,7 +1690,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
IndexedInstrProf::ProfVersion::CurrentVersion)
return make_error<InstrProfError>(instrprof_error::unsupported_version);
- static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
+ static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
"Please update the reader as needed when a new field is added "
"or when indexed profile version gets bumped.");
@@ -1723,10 +1723,11 @@ size_t Header::size() const {
// of the header, and byte offset of existing fields shouldn't change when
// indexed profile version gets incremented.
static_assert(
- IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
+ IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
"Please update the size computation below if a new field has "
"been added to the header; for a version bump without new "
"fields, add a case statement to fall through to the latest version.");
+ case 13ull:
case 12ull:
return 72;
case 11ull:
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index a3473514d463..0f15ca8ff6df 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -542,7 +542,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// The WritePrevVersion handling will either need to be removed or updated
// if the version is advanced beyond 12.
static_assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
- IndexedInstrProf::ProfVersion::Version12);
+ IndexedInstrProf::ProfVersion::Version13);
if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
Header.Version |= VARIANT_MASK_IR_PROF;
if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index 69602688cf3f..4779e673cc05 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -73,7 +73,7 @@ static LLVM_THREAD_LOCAL std::vector<ThreadPoolTaskGroup *>
// WaitingForGroup == nullptr means all tasks regardless of their group.
void StdThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) {
while (true) {
- std::function<void()> Task;
+ llvm::unique_function<void()> Task;
ThreadPoolTaskGroup *GroupOfTask;
{
std::unique_lock<std::mutex> LockGuard(QueueLock);
@@ -189,7 +189,7 @@ void StdThreadPool::processTasksWithJobserver() {
// While we hold a job slot, process tasks from the internal queue.
while (true) {
- std::function<void()> Task;
+ llvm::unique_function<void()> Task;
ThreadPoolTaskGroup *GroupOfTask = nullptr;
{
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index c31a090bba77..e8766bc1b8c6 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -3364,6 +3364,22 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
TS->emitARM64WinCFIPACSignLR();
return;
+ case AArch64::SEH_SaveAnyRegI:
+ assert(MI->getOperand(1).getImm() <= 1008 &&
+ "SaveAnyRegQP SEH opcode offset must fit into 6 bits");
+ TS->emitARM64WinCFISaveAnyRegI(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case AArch64::SEH_SaveAnyRegIP:
+ assert(MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1 &&
+ "Non-consecutive registers not allowed for save_any_reg");
+ assert(MI->getOperand(2).getImm() <= 1008 &&
+ "SaveAnyRegQP SEH opcode offset must fit into 6 bits");
+ TS->emitARM64WinCFISaveAnyRegIP(MI->getOperand(0).getImm(),
+ MI->getOperand(2).getImm());
+ return;
+
case AArch64::SEH_SaveAnyRegQP:
assert(MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1 &&
"Non-consecutive registers not allowed for save_any_reg");
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 3ee4d58ca892..70c5c2914928 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1082,14 +1082,24 @@ AArch64FrameLowering::insertSEH(MachineBasicBlock::iterator MBBI,
case AArch64::LDPXi: {
Register Reg0 = MBBI->getOperand(0).getReg();
Register Reg1 = MBBI->getOperand(1).getReg();
+
+ int SEHReg0 = RegInfo->getSEHRegNum(Reg0);
+ int SEHReg1 = RegInfo->getSEHRegNum(Reg1);
+
if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
.addImm(Imm * 8)
.setMIFlag(Flag);
- else
+ else if (SEHReg0 >= 19 && SEHReg1 >= 19)
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
- .addImm(RegInfo->getSEHRegNum(Reg0))
- .addImm(RegInfo->getSEHRegNum(Reg1))
+ .addImm(SEHReg0)
+ .addImm(SEHReg1)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ else
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegIP))
+ .addImm(SEHReg0)
+ .addImm(SEHReg1)
.addImm(Imm * 8)
.setMIFlag(Flag);
break;
@@ -1097,10 +1107,16 @@ AArch64FrameLowering::insertSEH(MachineBasicBlock::iterator MBBI,
case AArch64::STRXui:
case AArch64::LDRXui: {
int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
- MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
- .addImm(Reg)
- .addImm(Imm * 8)
- .setMIFlag(Flag);
+ if (Reg >= 19)
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
+ .addImm(Reg)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ else
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegI))
+ .addImm(Reg)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
break;
}
case AArch64::STRDui:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 40e6400756c7..c8a038fa99b3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1916,6 +1916,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv8i16, Legal);
setPartialReduceMLAAction(MLAOps, MVT::nxv8i16, MVT::nxv16i8, Legal);
}
+
+ // Handle floating-point partial reduction
+ if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
+ setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_FMLA, MVT::nxv4f32,
+ MVT::nxv8f16, Legal);
+ }
}
// Handle non-aliasing elements mask
@@ -2283,6 +2289,11 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
MVT::getVectorVT(MVT::i8, NumElts * 8), Custom);
}
+ if (Subtarget->hasSVE2p1() && VT.getVectorElementType() == MVT::f32) {
+ setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_FMLA, VT,
+ MVT::getVectorVT(MVT::f16, NumElts * 2), Custom);
+ }
+
// Lower fixed length vector operations to scalable equivalents.
setOperationAction(ISD::ABDS, VT, Default);
setOperationAction(ISD::ABDU, VT, Default);
@@ -7875,6 +7886,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
return LowerPARTIAL_REDUCE_MLA(Op, DAG);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 58a53af76e1b..bb2f083db19e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -13292,18 +13292,24 @@ multiclass AtomicFPStore<bit R, bits<3> op0, string asm> {
def H : BaseAtomicFPStore<FPR16, 0b01, R, op0, asm>;
}
-class BaseSIMDThreeSameVectorFP8MatrixMul<string asm, bits<2> size, string kind>
+class BaseSIMDThreeSameVectorFP8MatrixMul<string asm, bits<2> size, string kind, list<dag> pattern>
: BaseSIMDThreeSameVectorTied<1, 1, {size, 0}, 0b11101,
- V128, asm, ".16b", []> {
+ V128, asm, ".16b", pattern> {
let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn.16b, $Rm.16b",
"|", kind, "\t$Rd, $Rn, $Rm}");
}
-multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
- def v8f16: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b00, ".8h">{
+multiclass SIMDThreeSameVectorFP8MatrixMul<string asm, SDPatternOperator OpNode>{
+ def v8f16: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b00, ".8h",
+ [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
+ (v16i8 V128:$Rn),
+ (v16i8 V128:$Rm)))]> {
let Predicates = [HasNEON, HasF8F16MM];
}
- def v4f32: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b10, ".4s">{
+ def v4f32: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b10, ".4s",
+ [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
+ (v16i8 V128:$Rn),
+ (v16i8 V128:$Rm)))]> {
let Predicates = [HasNEON, HasF8F32MM];
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index ccc8eb8a9706..4b4073365483 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1217,6 +1217,8 @@ bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
case AArch64::SEH_EpilogStart:
case AArch64::SEH_EpilogEnd:
case AArch64::SEH_PACSignLR:
+ case AArch64::SEH_SaveAnyRegI:
+ case AArch64::SEH_SaveAnyRegIP:
case AArch64::SEH_SaveAnyRegQP:
case AArch64::SEH_SaveAnyRegQPX:
case AArch64::SEH_AllocZ:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2871a20e28b6..76f076a60765 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5666,6 +5666,8 @@ let isPseudo = 1 in {
def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def SEH_SaveAnyRegI : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveAnyRegIP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
def SEH_SaveAnyRegQP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
def SEH_SaveAnyRegQPX : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
def SEH_AllocZ : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
@@ -11415,7 +11417,7 @@ let Predicates = [HasF16F32MM] in
defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;
let Uses = [FPMR, FPCR] in
- defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla", int_aarch64_neon_fmmla>;
//===----------------------------------------------------------------------===//
// Contention Management Hints (FEAT_CMH)
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 4df4d54e60c9..965585f40571 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -253,6 +253,8 @@ static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
case AArch64::SEH_SaveReg:
case AArch64::SEH_SaveFRegP:
case AArch64::SEH_SaveFReg:
+ case AArch64::SEH_SaveAnyRegI:
+ case AArch64::SEH_SaveAnyRegIP:
case AArch64::SEH_SaveAnyRegQP:
case AArch64::SEH_SaveAnyRegQPX:
ImmOpnd = &MBBI->getOperand(ImmIdx);
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3b268dcbca60..e1f43867bbe5 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -375,6 +375,11 @@ def AArch64fclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm),
node:$Zm)
]>;
+def AArch64fdot : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm),
+ [(int_aarch64_sve_fdot_x2 node:$Zd, node:$Zn, node:$Zm),
+ (partial_reduce_fmla node:$Zd, node:$Zn, node:$Zm)
+ ]>;
+
def SDT_AArch64FCVT : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>, SDTCisSameAs<0,3>
@@ -4251,7 +4256,7 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
let Predicates = [HasSVE2p1_or_SME2] in {
defm FCLAMP_ZZZ : sve_fp_clamp<"fclamp", AArch64fclamp>;
-defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>;
+defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, AArch64fdot>;
defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>;
defm BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1b559a628be0..f5081a9d2dd5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1248,7 +1248,8 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
SmallVector<EVT, 16> ValueVTs;
SmallVector<uint64_t, 16> Offsets;
- ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset);
+ ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, /*MemVTs=*/nullptr,
+ &Offsets, ArgOffset);
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp
index 65e6ed9d1d42..c52eb4e47768 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp
@@ -57,10 +57,11 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
const UniformityInfo &UI,
ValueMap<const Value *, bool> &Tracker) {
llvm::Intrinsic::ID IID = II.getIntrinsicID();
-
+ /// We deliberately do not simplify readfirstlane with a uniform argument, so
+ /// that frontends can use it to force a copy to SGPR and thereby prevent the
+ /// backend from generating unwanted waterfall loops.
switch (IID) {
case Intrinsic::amdgcn_permlane64:
- case Intrinsic::amdgcn_readfirstlane:
case Intrinsic::amdgcn_readlane: {
Value *Src = II.getArgOperand(0);
if (isDivergentUseWithNew(II.getOperandUse(0), UI, Tracker))
@@ -107,7 +108,7 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
return Changed;
}
default:
- llvm_unreachable("Unexpected intrinsic ID in optimizeUniformIntrinsic");
+ return false;
}
return false;
}
@@ -121,16 +122,6 @@ static bool runUniformIntrinsicCombine(Function &F, const UniformityInfo &UI) {
auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II)
continue;
-
- switch (II->getIntrinsicID()) {
- case Intrinsic::amdgcn_permlane64:
- case Intrinsic::amdgcn_readfirstlane:
- case Intrinsic::amdgcn_readlane:
- case Intrinsic::amdgcn_ballot:
- break;
- default:
- continue;
- }
IsChanged |= optimizeUniformIntrinsic(*II, UI, Tracker);
}
return IsChanged;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6ce18ea921a9..9c74c654d8e3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10163,7 +10163,7 @@ static bool followSubRegDef(MachineInstr &MI,
}
MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
- MachineRegisterInfo &MRI) {
+ const MachineRegisterInfo &MRI) {
assert(MRI.isSSA());
if (!P.Reg.isVirtual())
return nullptr;
@@ -10628,6 +10628,8 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
const SIRegisterInfo &RI) {
MachineInstr *KillsSCC = nullptr;
+ if (SCCValid->getParent() != SCCRedefine->getParent())
+ return false;
for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
SCCRedefine->getIterator())) {
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
@@ -10672,8 +10674,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (CmpValue != 0)
return false;
- MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
- if (!Def || Def->getParent() != CmpInstr.getParent())
+ MachineInstr *Def = MRI->getVRegDef(SrcReg);
+ if (!Def)
return false;
// For S_OP that set SCC = DST!=0, do the transformation
@@ -10692,6 +10694,32 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (!optimizeSCC(Def, &CmpInstr, RI))
return false;
+ // If s_or_b32 result, sY, is unused (i.e. it is effectively a 64-bit
+ // s_cmp_lg of a register pair) and the inputs are the hi and lo-halves of a
+ // 64-bit foldableSelect then delete s_or_b32 in the sequence:
+ // sX = s_cselect_b64 (non-zero imm), 0
+ // sLo = copy sX.sub0
+ // sHi = copy sX.sub1
+ // sY = s_or_b32 sLo, sHi
+ if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
+ MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
+ const MachineOperand &OrOpnd1 = Def->getOperand(1);
+ const MachineOperand &OrOpnd2 = Def->getOperand(2);
+ if (OrOpnd1.isReg() && OrOpnd2.isReg()) {
+ MachineInstr *Def1 = MRI->getVRegDef(OrOpnd1.getReg());
+ MachineInstr *Def2 = MRI->getVRegDef(OrOpnd2.getReg());
+ if (Def1 && Def1->getOpcode() == AMDGPU::COPY && Def2 &&
+ Def2->getOpcode() == AMDGPU::COPY && Def1->getOperand(1).isReg() &&
+ Def2->getOperand(1).isReg() &&
+ Def1->getOperand(1).getSubReg() == AMDGPU::sub0 &&
+ Def2->getOperand(1).getSubReg() == AMDGPU::sub1 &&
+ Def1->getOperand(1).getReg() == Def2->getOperand(1).getReg()) {
+ MachineInstr *Select = MRI->getVRegDef(Def1->getOperand(1).getReg());
+ if (Select && foldableSelect(*Select))
+ optimizeSCC(Select, Def, RI);
+ }
+ }
+ }
return true;
};
@@ -10721,8 +10749,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n
// s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 1 << n => s_bitcmp0_b64 $src, n
- MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
- if (!Def || Def->getParent() != CmpInstr.getParent())
+ MachineInstr *Def = MRI->getVRegDef(SrcReg);
+ if (!Def)
return false;
if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 0643b532ea04..8d693b1b19dc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1687,7 +1687,7 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
/// skipping copy like instructions and subreg-manipulation pseudos.
/// Following another subreg of a reg:subreg isn't supported.
MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
- MachineRegisterInfo &MRI);
+ const MachineRegisterInfo &MRI);
/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 6e5520c3dbb1..3c61216cd932 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -803,26 +803,6 @@ SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
return getAddr(N, DAG);
}
-const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((BPFISD::NodeType)Opcode) {
- case BPFISD::FIRST_NUMBER:
- break;
- case BPFISD::RET_GLUE:
- return "BPFISD::RET_GLUE";
- case BPFISD::CALL:
- return "BPFISD::CALL";
- case BPFISD::SELECT_CC:
- return "BPFISD::SELECT_CC";
- case BPFISD::BR_CC:
- return "BPFISD::BR_CC";
- case BPFISD::Wrapper:
- return "BPFISD::Wrapper";
- case BPFISD::MEMCPY:
- return "BPFISD::MEMCPY";
- }
- return nullptr;
-}
-
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 5243d4944667..3d6e7c70df28 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -20,17 +20,6 @@
namespace llvm {
class BPFSubtarget;
-namespace BPFISD {
-enum NodeType : unsigned {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- RET_GLUE,
- CALL,
- SELECT_CC,
- BR_CC,
- Wrapper,
- MEMCPY
-};
-}
class BPFTargetLowering : public TargetLowering {
public:
@@ -39,9 +28,6 @@ public:
// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- // This method returns the name of a target specific DAG node.
- const char *getTargetNodeName(unsigned Opcode) const override;
-
// This method decides whether folding a constant offset
// with the given GlobalAddress is legal.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 51c32b22510f..bdacf9cc3a6a 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -41,14 +41,12 @@ def BPFcallseq_start: SDNode<"ISD::CALLSEQ_START", SDT_BPFCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
def BPFcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_BPFCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue]>;
+def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC, [SDNPHasChain]>;
def BPFselectcc : SDNode<"BPFISD::SELECT_CC", SDT_BPFSelectCC>;
def BPFWrapper : SDNode<"BPFISD::Wrapper", SDT_BPFWrapper>;
def BPFmemcpy : SDNode<"BPFISD::MEMCPY", SDT_BPFMEMCPY,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
- SDNPMayStore, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def BPFIsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
def BPFIsBigEndian : Predicate<"!Subtarget->isLittleEndian()">;
def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">;
diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp
index 3e29e6c7ed38..0e6d35dd3781 100644
--- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp
@@ -10,12 +10,20 @@
//
//===----------------------------------------------------------------------===//
+#include "BPFSelectionDAGInfo.h"
#include "BPFTargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
+
+#define GET_SDNODE_DESC
+#include "BPFGenSDNodeInfo.inc"
+
using namespace llvm;
#define DEBUG_TYPE "bpf-selectiondag-info"
+BPFSelectionDAGInfo::BPFSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(BPFGenSDNodeInfo) {}
+
SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
@@ -31,11 +39,7 @@ SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy(
if (StoresNumEstimate > getCommonMaxStoresPerMemFunc())
return SDValue();
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
-
- Dst = DAG.getNode(BPFISD::MEMCPY, dl, VTs, Chain, Dst, Src,
- DAG.getConstant(CopyLen, dl, MVT::i64),
- DAG.getConstant(Alignment.value(), dl, MVT::i64));
-
- return Dst.getValue(0);
+ return DAG.getNode(BPFISD::MEMCPY, dl, MVT::Other, Chain, Dst, Src,
+ DAG.getConstant(CopyLen, dl, MVT::i64),
+ DAG.getConstant(Alignment.value(), dl, MVT::i64));
}
diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h
index 79f05e57bb5c..7345d2d7e473 100644
--- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h
+++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h
@@ -15,10 +15,15 @@
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "BPFGenSDNodeInfo.inc"
+
namespace llvm {
-class BPFSelectionDAGInfo : public SelectionDAGTargetInfo {
+class BPFSelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
+ BPFSelectionDAGInfo();
+
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment,
@@ -27,9 +32,8 @@ public:
MachinePointerInfo SrcPtrInfo) const override;
unsigned getCommonMaxStoresPerMemFunc() const { return 128; }
-
};
-}
+} // namespace llvm
#endif
diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt
index 3678f1335ca3..fa539a0a7b80 100644
--- a/llvm/lib/Target/BPF/CMakeLists.txt
+++ b/llvm/lib/Target/BPF/CMakeLists.txt
@@ -10,6 +10,7 @@ tablegen(LLVM BPFGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM BPFGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM BPFGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM BPFGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM BPFGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM BPFGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM BPFGenGlobalISel.inc -gen-global-isel)
tablegen(LLVM BPFGenRegisterBank.inc -gen-register-bank)
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index d507d71b99fc..9f1616f6960f 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -304,40 +304,76 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
GEPOperator *GOp = cast<GEPOperator>(&GEPI);
Value *PtrOperand = GOp->getPointerOperand();
Type *NewGEPType = GOp->getSourceElementType();
- bool NeedsTransform = false;
// Unwrap GEP ConstantExprs to find the base operand and element type
- while (auto *CE = dyn_cast<ConstantExpr>(PtrOperand)) {
- if (auto *GEPCE = dyn_cast<GEPOperator>(CE)) {
- GOp = GEPCE;
- PtrOperand = GEPCE->getPointerOperand();
- NewGEPType = GEPCE->getSourceElementType();
- } else
- break;
+ while (auto *GEPCE = dyn_cast_or_null<GEPOperator>(
+ dyn_cast<ConstantExpr>(PtrOperand))) {
+ GOp = GEPCE;
+ PtrOperand = GEPCE->getPointerOperand();
+ NewGEPType = GEPCE->getSourceElementType();
}
+ Type *const OrigGEPType = NewGEPType;
+ Value *const OrigOperand = PtrOperand;
+
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
NewGEPType = NewGlobal->getValueType();
PtrOperand = NewGlobal;
- NeedsTransform = true;
} else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
Type *AllocatedType = Alloca->getAllocatedType();
if (isa<ArrayType>(AllocatedType) &&
- AllocatedType != GOp->getResultElementType()) {
+ AllocatedType != GOp->getResultElementType())
NewGEPType = AllocatedType;
- NeedsTransform = true;
+ } else
+ return false; // Only GEPs into an alloca or global variable are considered
+
+ // Defer changing i8 GEP types until dxil-flatten-arrays
+ if (OrigGEPType->isIntegerTy(8))
+ NewGEPType = OrigGEPType;
+
+ // If the original type is a "sub-type" of the new type, then ensure the gep
+ // correctly zero-indexes the extra dimensions to keep the offset calculation
+ // correct.
+ // Eg:
+ // i32, [4 x i32] and [8 x [4 x i32]] are sub-types of [8 x [4 x i32]], etc.
+ //
+ // So then:
+ // gep [4 x i32] %idx
+ // -> gep [8 x [4 x i32]], i32 0, i32 %idx
+ // gep i32 %idx
+ // -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx
+ uint32_t MissingDims = 0;
+ Type *SubType = NewGEPType;
+
+ // The new type will be in its array version; so match accordingly.
+ Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType);
+
+ while (SubType != GEPArrType) {
+ MissingDims++;
+
+ ArrayType *ArrType = dyn_cast<ArrayType>(SubType);
+ if (!ArrType) {
+ assert(SubType == GEPArrType &&
+ "GEP uses an DXIL invalid sub-type of alloca/global variable");
+ break;
}
+
+ SubType = ArrType->getElementType();
}
+ bool NeedsTransform = OrigOperand != PtrOperand ||
+ OrigGEPType != NewGEPType || MissingDims != 0;
+
if (!NeedsTransform)
return false;
- // Keep scalar GEPs scalar; dxil-flatten-arrays will do flattening later
- if (!isa<ArrayType>(GOp->getSourceElementType()))
- NewGEPType = GOp->getSourceElementType();
-
IRBuilder<> Builder(&GEPI);
- SmallVector<Value *, MaxVecSize> Indices(GOp->indices());
+ SmallVector<Value *, MaxVecSize> Indices;
+
+ for (uint32_t I = 0; I < MissingDims; I++)
+ Indices.push_back(Builder.getInt32(0));
+ llvm::append_range(Indices, GOp->indices());
+
Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
GOp->getName(), GOp->getNoWrapFlags());
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index ebb7c2607c0c..e0d2dbde9215 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -197,6 +197,7 @@ static Value *expand16BitIsNormal(CallInst *Orig) {
static bool isIntrinsicExpansion(Function &F) {
switch (F.getIntrinsicID()) {
+ case Intrinsic::assume:
case Intrinsic::abs:
case Intrinsic::atan2:
case Intrinsic::exp:
@@ -988,6 +989,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::abs:
Result = expandAbs(Orig);
break;
+ case Intrinsic::assume:
+ Orig->eraseFromParent();
+ return true;
case Intrinsic::atan2:
Result = expandAtan2Intrinsic(Orig);
break;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 8720460cceb2..e46a393e5090 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -904,8 +904,6 @@ public:
case Intrinsic::dx_resource_casthandle:
// NOTE: llvm.dbg.value is supported as is in DXIL.
case Intrinsic::dbg_value:
- // NOTE: llvm.assume is supported as is in DXIL.
- case Intrinsic::assume:
case Intrinsic::not_intrinsic:
if (F.use_empty())
F.eraseFromParent();
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 47726d6447ad..55bafdea234f 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -4753,6 +4753,19 @@ bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const {
return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0);
}
+bool HexagonInstrInfo::isQFPMul(const MachineInstr *MI) const {
+ return (MI->getOpcode() == Hexagon::V6_vmpy_qf16_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_sf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16 ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_qf16 ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32);
+}
+
// Addressing mode relations.
short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const {
return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index c17e5277ae2e..48adf82833f5 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -532,6 +532,7 @@ public:
}
MCInst getNop() const override;
+ bool isQFPMul(const MachineInstr *MF) const;
};
/// \brief Create RegSubRegPair from a register MachineOperand
diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
index f29a739cb5c0..8801f698effe 100644
--- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
@@ -58,7 +58,7 @@
// are PHI inst.
//
//===----------------------------------------------------------------------===//
-#include <unordered_set>
+
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
#include "Hexagon.h"
@@ -86,6 +86,9 @@ using namespace llvm;
cl::opt<bool>
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
cl::desc("Disable optimization of Qfloat operations."));
+cl::opt<bool> DisableQFOptForMul(
+ "disable-qfp-opt-mul", cl::init(true),
+ cl::desc("Disable optimization of Qfloat operations for multiply."));
namespace {
const std::map<unsigned short, unsigned short> QFPInstMap{
@@ -101,11 +104,21 @@ const std::map<unsigned short, unsigned short> QFPInstMap{
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
- {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
+ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32},
+ {Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32},
+ {Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16},
+ {Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32},
+ {Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16},
+ {Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32},
+ {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}};
} // namespace
-namespace {
+namespace llvm {
+FunctionPass *createHexagonQFPOptimizer();
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+} // namespace llvm
+namespace {
struct HexagonQFPOptimizer : public MachineFunctionPass {
public:
static char ID;
@@ -116,6 +129,10 @@ public:
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB);
+
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -142,19 +159,69 @@ FunctionPass *llvm::createHexagonQFPOptimizer() {
bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
MachineBasicBlock *MBB) {
- // Early exit:
- // - if instruction is invalid or has too few operands (QFP ops need 2 sources
- // + 1 dest),
- // - or does not have a transformation mapping.
- if (MI->getNumOperands() < 3)
+ if (MI->getNumOperands() == 2)
+ return optimizeQfpOneOp(MI, MBB);
+ else if (MI->getNumOperands() == 3)
+ return optimizeQfpTwoOp(MI, MBB);
+ else
return false;
+}
+
+bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+
+ unsigned Op0F = 0;
auto It = QFPInstMap.find(MI->getOpcode());
if (It == QFPInstMap.end())
return false;
+
unsigned short InstTy = It->second;
+ // Get the reachind defs of MI
+ MachineInstr *DefMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineOperand &Res = MI->getOperand(0);
+ if (!Res.isReg())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump());
+ MachineInstr *ReachDefDef = nullptr;
+
+ // Get the reaching def of the reaching def to check for W reg def
+ if (DefMI->getNumOperands() > 1 && DefMI->getOperand(1).isReg() &&
+ DefMI->getOperand(1).getReg().isVirtual())
+ ReachDefDef = MRI->getVRegDef(DefMI->getOperand(1).getReg());
+ unsigned ReachDefOp = DefMI->getOpcode();
+ MachineInstrBuilder MIB;
+
+ // Check if the reaching def is a conversion
+ if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 ||
+ ReachDefOp == Hexagon::V6_vconv_hf_qf16) {
+
+ // Return if the reaching def of reaching def is W type
+ if (ReachDefDef && MRI->getRegClass(ReachDefDef->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ // Analyze the use operands of the conversion to get their KILL status
+ MachineOperand &SrcOp = DefMI->getOperand(1);
+ Op0F = getKillRegState(SrcOp.isKill());
+ SrcOp.setIsKill(false);
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(SrcOp.getReg(), Op0F, SrcOp.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+ }
+ return false;
+}
+
+bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
unsigned Op0F = 0;
unsigned Op1F = 0;
+ auto It = QFPInstMap.find(MI->getOpcode());
+ if (It == QFPInstMap.end())
+ return false;
+ unsigned short InstTy = It->second;
// Get the reaching defs of MI, DefMI1 and DefMI2
MachineInstr *DefMI1 = nullptr;
MachineInstr *DefMI2 = nullptr;
@@ -167,6 +234,9 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
return false;
MachineOperand &Res = MI->getOperand(0);
+ if (!Res.isReg())
+ return false;
+
MachineInstr *Inst1 = nullptr;
MachineInstr *Inst2 = nullptr;
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
@@ -185,7 +255,8 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
unsigned Def2OP = DefMI2->getOpcode();
MachineInstrBuilder MIB;
- // Case 1: Both reaching defs of MI are qf to sf/hf conversions
+
+ // Check if the both the reaching defs of MI are qf to sf/hf conversions
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
@@ -226,7 +297,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
- // Case 2: Left operand is conversion to sf/hf
+ // Check if left operand's reaching def is a conversion to sf/hf
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
@@ -250,7 +321,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
- // Case 2: Left operand is conversion to sf/hf
+ // Check if right operand's reaching def is a conversion to sf/hf
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
@@ -258,13 +329,6 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
!DefMI1->isPHI() &&
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
// The second operand of original instruction is converted.
- // In "mix" instructions, "qf" operand is always the first operand.
-
- // Caveat: vsub is not commutative w.r.t operands.
- if (InstTy == Hexagon::V6_vsub_qf16_mix ||
- InstTy == Hexagon::V6_vsub_qf32_mix)
- return false;
-
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
&Hexagon::HvxWRRegClass)
return false;
@@ -275,10 +339,26 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
Op1F = getKillRegState(Src2.isKill());
Src2.setIsKill(false);
Op0F = getKillRegState(Src1.isKill());
- MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
- .addReg(Src2.getReg(), Op1F,
- Src2.getSubReg()) // Notice the operands are flipped.
- .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ if (InstTy == Hexagon::V6_vsub_qf16_mix ||
+ InstTy == Hexagon::V6_vsub_qf32_mix) {
+ if (!HST->useHVXV81Ops())
+ // vsub_(hf|sf)_mix insts are only avlbl on hvx81+
+ return false;
+ // vsub is not commutative w.r.t. operands -> treat it as a special case
+ // to choose the correct mix instruction.
+ if (Def2OP == Hexagon::V6_vconv_sf_qf32)
+ InstTy = Hexagon::V6_vsub_sf_mix;
+ else if (Def2OP == Hexagon::V6_vconv_hf_qf16)
+ InstTy = Hexagon::V6_vsub_hf_mix;
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ } else {
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src2.getReg(), Op1F,
+ Src2.getSubReg()) // Notice the operands are flipped.
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ }
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
}
@@ -309,15 +389,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
while (MII != MBBI->instr_end()) {
MachineInstr *MI = &*MII;
++MII; // As MI might be removed.
-
- if (QFPInstMap.count(MI->getOpcode()) &&
- MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
- MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
- LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
- if (optimizeQfp(MI, MBB)) {
- MI->eraseFromParent();
- LLVM_DEBUG(dbgs() << "\t....Removing....");
- Changed = true;
+ if (QFPInstMap.count(MI->getOpcode())) {
+ auto OpC = MI->getOpcode();
+ if (DisableQFOptForMul && HII->isQFPMul(MI))
+ continue;
+ if (OpC != Hexagon::V6_vconv_sf_qf32 &&
+ OpC != Hexagon::V6_vconv_hf_qf16) {
+ LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
+ if (optimizeQfp(MI, MBB)) {
+ MI->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "\t....Removing....");
+ Changed = true;
+ }
}
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 90a4723c9a3e..9a35df2f240c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -378,12 +378,9 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
}
}
-bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
- const MachineBasicBlock *MBB,
- const MachineFunction &MF) const {
- if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
- return true;
-
+bool LoongArchInstrInfo::isSafeToMove(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
auto MII = MI.getIterator();
auto MIE = MBB->end();
@@ -429,25 +426,25 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
auto MO2 = Lu32I->getOperand(2).getTargetFlags();
if (MO0 == LoongArchII::MO_PCREL_HI && MO1 == LoongArchII::MO_PCREL_LO &&
MO2 == LoongArchII::MO_PCREL64_LO)
- return true;
+ return false;
if ((MO0 == LoongArchII::MO_GOT_PC_HI || MO0 == LoongArchII::MO_LD_PC_HI ||
MO0 == LoongArchII::MO_GD_PC_HI) &&
MO1 == LoongArchII::MO_GOT_PC_LO && MO2 == LoongArchII::MO_GOT_PC64_LO)
- return true;
+ return false;
if (MO0 == LoongArchII::MO_IE_PC_HI && MO1 == LoongArchII::MO_IE_PC_LO &&
MO2 == LoongArchII::MO_IE_PC64_LO)
- return true;
+ return false;
if (MO0 == LoongArchII::MO_DESC_PC_HI &&
MO1 == LoongArchII::MO_DESC_PC_LO &&
MO2 == LoongArchII::MO_DESC64_PC_LO)
- return true;
+ return false;
break;
}
case LoongArch::LU52I_D: {
auto MO = MI.getOperand(2).getTargetFlags();
if (MO == LoongArchII::MO_PCREL64_HI || MO == LoongArchII::MO_GOT_PC64_HI ||
MO == LoongArchII::MO_IE_PC64_HI || MO == LoongArchII::MO_DESC64_PC_HI)
- return true;
+ return false;
break;
}
default:
@@ -487,7 +484,7 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2));
auto MO2 = LoongArchII::getDirectFlags(Ld->getOperand(2));
if (MO1 == LoongArchII::MO_DESC_PC_LO && MO2 == LoongArchII::MO_DESC_LD)
- return true;
+ return false;
break;
}
if (SecondOp == MIE ||
@@ -496,34 +493,34 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2));
if (MO0 == LoongArchII::MO_PCREL_HI && SecondOp->getOpcode() == AddiOp &&
MO1 == LoongArchII::MO_PCREL_LO)
- return true;
+ return false;
if (MO0 == LoongArchII::MO_GOT_PC_HI && SecondOp->getOpcode() == LdOp &&
MO1 == LoongArchII::MO_GOT_PC_LO)
- return true;
+ return false;
if ((MO0 == LoongArchII::MO_LD_PC_HI ||
MO0 == LoongArchII::MO_GD_PC_HI) &&
SecondOp->getOpcode() == AddiOp && MO1 == LoongArchII::MO_GOT_PC_LO)
- return true;
+ return false;
break;
}
case LoongArch::ADDI_W:
case LoongArch::ADDI_D: {
auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
if (MO == LoongArchII::MO_PCREL_LO || MO == LoongArchII::MO_GOT_PC_LO)
- return true;
+ return false;
break;
}
case LoongArch::LD_W:
case LoongArch::LD_D: {
auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
if (MO == LoongArchII::MO_GOT_PC_LO)
- return true;
+ return false;
break;
}
case LoongArch::PseudoDESC_CALL: {
auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
if (MO == LoongArchII::MO_DESC_CALL)
- return true;
+ return false;
break;
}
default:
@@ -531,6 +528,18 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
}
}
+ return true;
+}
+
+bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
+ return true;
+
+ if (!isSafeToMove(MI, MBB, MF))
+ return true;
+
return false;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index f69a558bdeca..e61314c034bd 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -64,6 +64,9 @@ public:
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;
+ bool isSafeToMove(const MachineInstr &MI, const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
bool isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 2f1a7ad2d401..a3deb36074e6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -305,7 +305,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
uint64_t StartingOffset = 0) {
SmallVector<EVT, 16> TempVTs;
SmallVector<uint64_t, 16> TempOffsets;
- ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
+ ComputeValueVTs(TLI, DL, Ty, TempVTs, /*MemVTs=*/nullptr, &TempOffsets,
+ StartingOffset);
for (const auto [VT, Off] : zip(TempVTs, TempOffsets)) {
MVT RegisterVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 20fc849ea4aa..dd233e236e17 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -657,6 +657,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
+ if (Subtarget.isISA3_0() && isPPC64) {
+ setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
+ setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
+ setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
+ setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
+ setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
+ setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
+ setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
+ setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
+ }
+
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
@@ -11917,6 +11928,62 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
}
+// Adjust the length value for a load/store with length to account for the
+// instructions requiring a left justified length, and for non-byte element
+// types requiring scaling by element size.
+static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
+ SelectionDAG &DAG) {
+ SDLoc dl(Val);
+ EVT VT = Val->getValueType(0);
+ unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
+ unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
+ SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
+ return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
+}
+
+SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
+ auto VPLD = cast<VPLoadSDNode>(Op);
+ bool Future = Subtarget.isISAFuture();
+ SDLoc dl(Op);
+ assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
+ "Mask predication not supported");
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
+ unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
+ unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
+ Len = AdjustLength(Len, EltBits, !Future, DAG);
+ SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
+ VPLD->getOperand(1), Len};
+ SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
+ SDValue VPL =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys, Ops,
+ VPLD->getMemoryVT(), VPLD->getMemOperand());
+ return VPL;
+}
+
+SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
+ auto VPST = cast<VPStoreSDNode>(Op);
+ assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
+ "Mask predication not supported");
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc dl(Op);
+ SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
+ unsigned EltBits =
+ Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
+ bool Future = Subtarget.isISAFuture();
+ unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
+ Len = AdjustLength(Len, EltBits, !Future, DAG);
+ SDValue Ops[] = {
+ VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
+ VPST->getOperand(2), Len};
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue VPS =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
+ VPST->getMemoryVT(), VPST->getMemOperand());
+ return VPS;
+}
+
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -12771,6 +12838,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
if (Op->getFlags().hasNoFPExcept())
return Op;
return SDValue();
+ case ISD::VP_LOAD:
+ return LowerVP_LOAD(Op, DAG);
+ case ISD::VP_STORE:
+ return LowerVP_STORE(Op, DAG);
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 880aca751d7d..d96701898273 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1345,6 +1345,9 @@ namespace llvm {
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index b04e8874f58a..e74f1bdec800 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -24,6 +24,10 @@ using namespace llvm;
#define DEBUG_TYPE "ppctti"
+static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
+ cl::desc("Allow vp.load and vp.store for pwr9"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
@@ -1031,3 +1035,42 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
return TLI->supportsTailCallFor(CB);
}
+
+// Target hook used by CodeGen to decide whether to expand vector predication
+// intrinsics into scalar operations or to use special ISD nodes to represent
+// them. The Target will not see the intrinsics.
+TargetTransformInfo::VPLegalization
+PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
+ using VPLegalization = TargetTransformInfo::VPLegalization;
+ unsigned Directive = ST->getCPUDirective();
+ VPLegalization DefaultLegalization = BaseT::getVPLegalizationStrategy(PI);
+ if (Directive != PPC::DIR_PWR10 && Directive != PPC::DIR_PWR_FUTURE &&
+ (!Pwr9EVL || Directive != PPC::DIR_PWR9))
+ return DefaultLegalization;
+
+ if (!ST->isPPC64())
+ return DefaultLegalization;
+
+ unsigned IID = PI.getIntrinsicID();
+ if (IID != Intrinsic::vp_load && IID != Intrinsic::vp_store)
+ return DefaultLegalization;
+
+ bool IsLoad = IID == Intrinsic::vp_load;
+ Type *VecTy = IsLoad ? PI.getType() : PI.getOperand(0)->getType();
+ EVT VT = TLI->getValueType(DL, VecTy, true);
+ if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
+ VT != MVT::v16i8)
+ return DefaultLegalization;
+
+ auto IsAllTrueMask = [](Value *MaskVal) {
+ if (Value *SplattedVal = getSplatValue(MaskVal))
+ if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
+ return ConstValue->isAllOnesValue();
+ return false;
+ };
+ unsigned MaskIx = IsLoad ? 1 : 2;
+ if (!IsAllTrueMask(PI.getOperand(MaskIx)))
+ return DefaultLegalization;
+
+ return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 8d7f25539332..f80ebdbce7f6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -150,6 +150,9 @@ public:
ArrayRef<Type *> Types) const override;
bool supportsTailCallFor(const CallBase *CB) const override;
+ TargetTransformInfo::VPLegalization
+ getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
+
private:
// The following constant is used for estimating costs on power9.
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 995ae75da1c3..1977d3372c5f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16117,6 +16117,46 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
}
+// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)
+// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)
+// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)
+// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)
+static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (!Subtarget.hasCZEROLike())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ auto IsEqualCompZero = [](SDValue &V) -> bool {
+ if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();
+ if (ISD::isIntEqualitySetCC(CC))
+ return true;
+ }
+ return false;
+ };
+
+ if (!IsEqualCompZero(N0) || !N0.hasOneUse())
+ std::swap(N0, N1);
+ if (!IsEqualCompZero(N0) || !N0.hasOneUse())
+ return SDValue();
+
+ KnownBits Known = DAG.computeKnownBits(N1);
+ if (Known.getMaxValue().ugt(1))
+ return SDValue();
+
+ unsigned CzeroOpcode =
+ (cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)
+ ? RISCVISD::CZERO_EQZ
+ : RISCVISD::CZERO_NEZ;
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));
+}
+
static SDValue reduceANDOfAtomicLoad(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
@@ -16180,7 +16220,9 @@ static SDValue performANDCombine(SDNode *N,
if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
return V;
-
+ if (DCI.isAfterLegalizeDAG())
+ if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))
+ return V;
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
@@ -16496,30 +16538,50 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
}
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
- unsigned ShY, bool AddX) {
+ unsigned ShY, bool AddX, unsigned Shift) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue X = N->getOperand(0);
- SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ // Put the shift first if we can fold a zext into the shift forming a slli.uw.
+ using namespace SDPatternMatch;
+ if (Shift != 0 &&
+ sd_match(X, m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) {
+ X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
+ Shift = 0;
+ }
+ SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getTargetConstant(ShY, DL, VT), X);
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getTargetConstant(ShX, DL, VT), AddX ? X : Mul359);
+ if (ShX != 0)
+ ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, ShlAdd,
+ DAG.getTargetConstant(ShX, DL, VT), AddX ? X : ShlAdd);
+ if (Shift == 0)
+ return ShlAdd;
+ // Otherwise, put the shl last so that it can fold with following instructions
+ // (e.g. sext or add).
+ return DAG.getNode(ISD::SHL, DL, VT, ShlAdd, DAG.getConstant(Shift, DL, VT));
}
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
- uint64_t MulAmt) {
- // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
+ uint64_t MulAmt, unsigned Shift) {
switch (MulAmt) {
+ // 3/5/9 -> (shYadd X, X)
+ case 3:
+ return getShlAddShlAdd(N, DAG, 0, 1, /*AddX=*/false, Shift);
+ case 5:
+ return getShlAddShlAdd(N, DAG, 0, 2, /*AddX=*/false, Shift);
+ case 9:
+ return getShlAddShlAdd(N, DAG, 0, 3, /*AddX=*/false, Shift);
+ // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
case 5 * 3:
- return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false);
+ return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false, Shift);
case 9 * 3:
- return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false);
+ return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false, Shift);
case 5 * 5:
- return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false);
+ return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false, Shift);
case 9 * 5:
- return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false);
+ return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false, Shift);
case 9 * 9:
- return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false);
+ return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false, Shift);
default:
break;
}
@@ -16529,7 +16591,7 @@ static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
if (int ShY = isShifted359(MulAmt - 1, ShX)) {
assert(ShX != 0 && "MulAmt=4,6,10 handled before");
if (ShX <= 3)
- return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true);
+ return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);
}
return SDValue();
}
@@ -16569,42 +16631,18 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
// real regressions, and no other target properly freezes X in these cases
// either.
if (Subtarget.hasShlAdd(3)) {
- SDValue X = N->getOperand(0);
- int Shift;
- if (int ShXAmount = isShifted359(MulAmt, Shift)) {
- // 3/5/9 * 2^N -> shl (shXadd X, X), N
- SDLoc DL(N);
- // Put the shift first if we can fold a zext into the shift forming
- // a slli.uw.
- if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
- X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
- SDValue Shl =
- DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
- DAG.getTargetConstant(ShXAmount, DL, VT), Shl);
- }
- // Otherwise, put the shl second so that it can fold with following
- // instructions (e.g. sext or add).
- SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getTargetConstant(ShXAmount, DL, VT), X);
- return DAG.getNode(ISD::SHL, DL, VT, Mul359,
- DAG.getConstant(Shift, DL, VT));
- }
-
+ // 3/5/9 * 2^N -> (shl (shXadd X, X), N)
// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
// of 25 which happen to be quite common.
// (2/4/8 * 3/5/9 + 1) * 2^N
- Shift = llvm::countr_zero(MulAmt);
- if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
- if (Shift == 0)
- return V;
- SDLoc DL(N);
- return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
- }
+ unsigned Shift = llvm::countr_zero(MulAmt);
+ if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift, Shift))
+ return V;
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
// shXadd. First check if this a sum of two power of 2s because that's
// easy. Then count how many zeros are up to the first bit.
+ SDValue X = N->getOperand(0);
if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
SDLoc DL(N);
@@ -17867,6 +17905,7 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
SmallVector<SDNode *> Worklist;
SmallPtrSet<SDNode *, 8> Inserted;
+ SmallPtrSet<SDNode *, 8> ExtensionsToRemove;
Worklist.push_back(N);
Inserted.insert(N);
SmallVector<CombineResult> CombinesToApply;
@@ -17876,22 +17915,25 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
- auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
- &Inserted](const NodeExtensionHelper &Op) {
- if (Op.needToPromoteOtherUsers()) {
- for (SDUse &Use : Op.OrigOperand->uses()) {
- SDNode *TheUser = Use.getUser();
- if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
- return false;
- // We only support the first 2 operands of FMA.
- if (Use.getOperandNo() >= 2)
- return false;
- if (Inserted.insert(TheUser).second)
- Worklist.push_back(TheUser);
- }
- }
- return true;
- };
+ auto AppendUsersIfNeeded =
+ [&Worklist, &Subtarget, &Inserted,
+ &ExtensionsToRemove](const NodeExtensionHelper &Op) {
+ if (Op.needToPromoteOtherUsers()) {
+ // Remember that we're supposed to remove this extension.
+ ExtensionsToRemove.insert(Op.OrigOperand.getNode());
+ for (SDUse &Use : Op.OrigOperand->uses()) {
+ SDNode *TheUser = Use.getUser();
+ if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
+ return false;
+ // We only support the first 2 operands of FMA.
+ if (Use.getOperandNo() >= 2)
+ return false;
+ if (Inserted.insert(TheUser).second)
+ Worklist.push_back(TheUser);
+ }
+ }
+ return true;
+ };
// Control the compile time by limiting the number of node we look at in
// total.
@@ -17912,6 +17954,15 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
std::optional<CombineResult> Res =
FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
if (Res) {
+ // If this strategy wouldn't remove an extension we're supposed to
+ // remove, reject it.
+ if (!Res->LHSExt.has_value() &&
+ ExtensionsToRemove.contains(LHS.OrigOperand.getNode()))
+ continue;
+ if (!Res->RHSExt.has_value() &&
+ ExtensionsToRemove.contains(RHS.OrigOperand.getNode()))
+ continue;
+
Matched = true;
CombinesToApply.push_back(*Res);
// All the inputs that are extended need to be folded, otherwise
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index b8ab70bd9e38..b05956b674d1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
@@ -3526,6 +3527,27 @@ RISCVInstrInfo::getOutliningCandidateInfo(
Candidate.getMF()->getSubtarget<RISCVSubtarget>().hasStdExtZca() ? 2 : 4;
unsigned CallOverhead = 0, FrameOverhead = 0;
+ // Count the number of CFI instructions in the candidate, if present.
+ unsigned CFICount = 0;
+ for (auto &I : Candidate) {
+ if (I.isCFIInstruction())
+ CFICount++;
+ }
+
+ // Ensure CFI coverage matches: comparing the number of CFIs in the candidate
+ // with the total number of CFIs in the parent function for each candidate.
+ // Outlining only a subset of a function’s CFIs would split the unwind state
+ // across two code regions and lead to incorrect address offsets between the
+ // outlined body and the remaining code. To preserve correct unwind info, we
+ // only outline when all CFIs in the function can be outlined together.
+ for (outliner::Candidate &C : RepeatedSequenceLocs) {
+ std::vector<MCCFIInstruction> CFIInstructions =
+ C.getMF()->getFrameInstructions();
+
+ if (CFICount > 0 && CFICount != CFIInstructions.size())
+ return std::nullopt;
+ }
+
MachineOutlinerConstructionID MOCI = MachineOutlinerDefault;
if (Candidate.back().isReturn()) {
MOCI = MachineOutlinerTailCall;
@@ -3541,6 +3563,11 @@ RISCVInstrInfo::getOutliningCandidateInfo(
FrameOverhead = InstrSizeCExt;
}
+ // If we have CFI instructions, we can only outline if the outlined section
+ // can be a tail call.
+ if (MOCI != MachineOutlinerTailCall && CFICount > 0)
+ return std::nullopt;
+
for (auto &C : RepeatedSequenceLocs)
C.setCallInfo(MOCI, CallOverhead);
@@ -3562,13 +3589,11 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
MBB->getParent()->getSubtarget().getRegisterInfo();
const auto &F = MI.getMF()->getFunction();
- // We can manually strip out CFI instructions later.
+ // We can only outline CFI instructions if we will tail call the outlined
+ // function, or fix up the CFI offsets. Currently, CFI instructions are
+ // outlined only if in a tail call.
if (MI.isCFIInstruction())
- // If current function has exception handling code, we can't outline &
- // strip these CFI instructions since it may break .eh_frame section
- // needed in unwinding.
- return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
- : outliner::InstrType::Invisible;
+ return outliner::InstrType::Legal;
if (cannotInsertTailCall(*MBB) &&
(MI.isReturn() || isMIModifiesReg(MI, TRI, RISCV::X5)))
@@ -3595,21 +3620,6 @@ void RISCVInstrInfo::buildOutlinedFrame(
MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const {
- // Strip out any CFI instructions
- bool Changed = true;
- while (Changed) {
- Changed = false;
- auto I = MBB.begin();
- auto E = MBB.end();
- for (; I != E; ++I) {
- if (I->isCFIInstruction()) {
- I->removeFromParent();
- Changed = true;
- break;
- }
- }
- }
-
if (OF.FrameConstructionID == MachineOutlinerTailCall)
return;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index 24ebbc3007ce..41071b29e5c9 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -654,8 +654,17 @@ foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c;
+ defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c;
+ let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
+
+ // Pattern for vredsum: 5/5/5/7/11/19/35
+ // Pattern for vredand, vredor, vredxor: 4/4/4/6/10/18/34
+ // They are grouped together, so we use the worst-case vredsum latency.
+ // TODO: split vredand, vredor, vredxor into separate scheduling classe.
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ }
}
}
@@ -663,7 +672,27 @@ foreach mx = SchedMxListWRed in {
foreach sew = SchedSEWSet<mx, 0, 1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c;
+ defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c;
+ let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, 1>.val in {
+ defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
+
+ // Latency for vfredmax.vs, vfredmin.vs: 12/12/15/21/33/57
+ // Latency for vfredusum.vs is slightly lower for e16/e32
+ // We use the worst-case
+ defvar VFRedLat = GetLMULValue<[12, 12, 12, 15, 21, 33, 57], mx>.c;
+ defvar VFRedOcc = GetLMULValue<[8, 8, 8, 8, 14, 20, 57], mx>.c;
+ let Latency = VFRedLat, ReleaseAtCycles = [VFRedOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
@@ -671,9 +700,20 @@ foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ // Compute latency based on SEW
+ defvar VFRedOV_FromLat = !cond(
+ !eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 12, mx>.c,
+ !eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c,
+ !eq(sew, 64) : ConstValueUntilLMULThenDouble<"M1", 12, mx>.c
+ );
+ defvar VFRedOV_FromOcc = !cond(
+ !eq(sew, 16) : GetLMULValue<[8, 8, 20, 24, 48, 96, 384], mx>.c,
+ !eq(sew, 32) : GetLMULValue<[8, 8, 8, 12, 24, 48, 192], mx>.c,
+ !eq(sew, 64) : GetLMULValue<[6, 6, 6, 6, 12, 24, 96], mx>.c
+ );
+ let Latency = VFRedOV_FromLat, ReleaseAtCycles = [VFRedOV_FromOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
@@ -681,8 +721,18 @@ foreach mx = SchedMxListFWRed in {
foreach sew = SchedSEWSet<mx, 1, 1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defvar VFRedOVLat = !cond(
+ !eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 16, mx>.c,
+ !eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 16, mx>.c,
+ );
+ defvar VFRedOVOcc = !cond(
+ !eq(sew, 16) : GetLMULValue<[11, 11, 27, 32, 64, 128, 512], mx>.c,
+ !eq(sew, 32) : GetLMULValue<[11, 11, 11, 16, 32, 64, 256], mx>.c,
+ );
+ let Latency = VFRedOVLat, ReleaseAtCycles = [VFRedOVOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index a151fd2fbdb7..599cc35ca2e9 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -767,6 +767,8 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
Type *RefTy = deduceElementTypeHelper(Ref->getPointerOperand(), Visited,
UnknownElemTypeI8);
maybeAssignPtrType(Ty, I, RefTy, UnknownElemTypeI8);
+ } else if (auto *Ref = dyn_cast<IntToPtrInst>(I)) {
+ maybeAssignPtrType(Ty, I, Ref->getDestTy(), UnknownElemTypeI8);
} else if (auto *Ref = dyn_cast<BitCastInst>(I)) {
if (Type *Src = Ref->getSrcTy(), *Dest = Ref->getDestTy();
isPointerTy(Src) && isPointerTy(Dest))
@@ -2149,7 +2151,9 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I,
for (const auto &Op : I->operands()) {
if (isa<ConstantPointerNull>(Op) || isa<UndefValue>(Op) ||
// Check GetElementPtrConstantExpr case.
- (isa<ConstantExpr>(Op) && isa<GEPOperator>(Op))) {
+ (isa<ConstantExpr>(Op) &&
+ (isa<GEPOperator>(Op) ||
+ (cast<ConstantExpr>(Op)->getOpcode() == CastInst::IntToPtr)))) {
setInsertPointSkippingPhis(B, I);
Type *OpTy = Op->getType();
if (isa<UndefValue>(Op) && OpTy->isAggregateType()) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 245e5a289460..fc87288a4a21 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -1210,8 +1210,16 @@ bool SPIRVInstructionSelector::selectUnOp(Register ResVReg,
for (MachineRegisterInfo::def_instr_iterator DefIt =
MRI->def_instr_begin(SrcReg);
DefIt != MRI->def_instr_end(); DefIt = std::next(DefIt)) {
- if ((*DefIt).getOpcode() == TargetOpcode::G_GLOBAL_VALUE ||
- (*DefIt).getOpcode() == SPIRV::OpVariable) {
+ unsigned DefOpCode = DefIt->getOpcode();
+ if (DefOpCode == SPIRV::ASSIGN_TYPE) {
+ // We need special handling to look through the type assignment and see
+ // if this is a constant or a global
+ if (auto *VRD = getVRegDef(*MRI, DefIt->getOperand(1).getReg()))
+ DefOpCode = VRD->getOpcode();
+ }
+ if (DefOpCode == TargetOpcode::G_GLOBAL_VALUE ||
+ DefOpCode == TargetOpcode::G_CONSTANT ||
+ DefOpCode == SPIRV::OpVariable || DefOpCode == SPIRV::OpConstantI) {
IsGV = true;
break;
}
@@ -3099,9 +3107,10 @@ bool SPIRVInstructionSelector::wrapIntoSpecConstantOp(
SmallPtrSet<SPIRVType *, 4> Visited;
if (!OpDefine || !OpType || isConstReg(MRI, OpDefine, Visited) ||
OpDefine->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST ||
+ OpDefine->getOpcode() == TargetOpcode::G_INTTOPTR ||
GR.isAggregateType(OpType)) {
// The case of G_ADDRSPACE_CAST inside spv_const_composite() is processed
- // by selectAddrSpaceCast()
+ // by selectAddrSpaceCast(), and G_INTTOPTR is processed by selectUnOp()
CompositeArgs.push_back(OpReg);
continue;
}
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index fa23656e23fc..2b83d575ace9 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86_H
#define LLVM_LIB_TARGET_X86_X86_H
+#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
#include "llvm/IR/Analysis.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/CodeGen.h"
@@ -104,7 +105,16 @@ FunctionPass *createX86LowerTileCopyPass();
/// CALL instruction. The pass does the same for each funclet as well. This
/// ensures that the open interval of function start and end PCs contains all
/// return addresses for the benefit of the Windows x64 unwinder.
-FunctionPass *createX86AvoidTrailingCallPass();
+class X86AvoidTrailingCallPass
+ : public PassInfoMixin<X86AvoidTrailingCallPass> {
+public:
+ X86AvoidTrailingCallPass() = default;
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+ static bool isRequired() { return true; }
+};
+
+FunctionPass *createX86AvoidTrailingCallLegacyPass();
/// Return a pass that optimizes the code-size of x86 call sequences. This is
/// done by replacing esp-relative movs with pushes.
@@ -222,7 +232,7 @@ void initializeX86FixupInstTuningPassPass(PassRegistry &);
void initializeX86FixupVectorConstantsPassPass(PassRegistry &);
void initializeWinEHStatePassPass(PassRegistry &);
void initializeX86AvoidSFBPassPass(PassRegistry &);
-void initializeX86AvoidTrailingCallPassPass(PassRegistry &);
+void initializeX86AvoidTrailingCallLegacyPassPass(PassRegistry &);
void initializeX86CallFrameOptimizationPass(PassRegistry &);
void initializeX86CmovConverterPassPass(PassRegistry &);
void initializeX86DAGToDAGISelLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp b/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp
index 2ecf49382d29..ebd4284f0f37 100644
--- a/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp
+++ b/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp
@@ -37,6 +37,8 @@
#include "X86Subtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/PassManager.h"
#define AVOIDCALL_DESC "X86 avoid trailing call pass"
#define AVOIDCALL_NAME "x86-avoid-trailing-call"
@@ -46,9 +48,9 @@
using namespace llvm;
namespace {
-class X86AvoidTrailingCallPass : public MachineFunctionPass {
+class X86AvoidTrailingCallLegacyPass : public MachineFunctionPass {
public:
- X86AvoidTrailingCallPass() : MachineFunctionPass(ID) {}
+ X86AvoidTrailingCallLegacyPass() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -59,13 +61,14 @@ private:
};
} // end anonymous namespace
-char X86AvoidTrailingCallPass::ID = 0;
+char X86AvoidTrailingCallLegacyPass::ID = 0;
-FunctionPass *llvm::createX86AvoidTrailingCallPass() {
- return new X86AvoidTrailingCallPass();
+FunctionPass *llvm::createX86AvoidTrailingCallLegacyPass() {
+ return new X86AvoidTrailingCallLegacyPass();
}
-INITIALIZE_PASS(X86AvoidTrailingCallPass, AVOIDCALL_NAME, AVOIDCALL_DESC, false, false)
+INITIALIZE_PASS(X86AvoidTrailingCallLegacyPass, AVOIDCALL_NAME, AVOIDCALL_DESC,
+ false, false)
// A real instruction is a non-meta, non-pseudo instruction. Some pseudos
// expand to nothing, and some expand to code. This logic conservatively assumes
@@ -79,7 +82,7 @@ static bool isCallInstruction(const MachineInstr &MI) {
return MI.isCall() && !MI.isReturn();
}
-bool X86AvoidTrailingCallPass::runOnMachineFunction(MachineFunction &MF) {
+bool UpdatedOnX86AvoidTrailingCallPass(MachineFunction &MF) {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86InstrInfo &TII = *STI.getInstrInfo();
assert(STI.isTargetWin64() && "pass only runs on Win64");
@@ -134,3 +137,19 @@ bool X86AvoidTrailingCallPass::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+
+bool X86AvoidTrailingCallLegacyPass::runOnMachineFunction(MachineFunction &MF) {
+ return UpdatedOnX86AvoidTrailingCallPass(MF);
+}
+
+PreservedAnalyses
+X86AvoidTrailingCallPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ bool Changed = UpdatedOnX86AvoidTrailingCallPass(MF);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 168e04109a0a..05a854a0bf3f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3454,6 +3454,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
return true;
+ // If we have a large vector type (even if illegal), don't bitcast to large
+ // (illegal) scalar types. Better to load fewer vectors and extract.
+ if (LoadVT.isVector() && !BitcastVT.isVector() && LoadVT.isInteger() &&
+ BitcastVT.isInteger() && (LoadVT.getSizeInBits() % 128) == 0)
+ return false;
+
return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
}
@@ -45016,11 +45022,16 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
case X86ISD::INSERTPS:
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
+ case X86ISD::VZEXT_MOVL:
case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::SHUFP:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VPERMILPV:
case X86ISD::VPERMILPI:
+ case X86ISD::VPERMI:
case X86ISD::VPERMV:
case X86ISD::VPERMV3: {
SmallVector<int, 8> Mask;
@@ -45046,6 +45057,16 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
}
break;
}
+ case X86ISD::VBROADCAST: {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ if (SrcVT.isVector()) {
+ APInt DemandedSrc = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0);
+ return DAG.isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrc, PoisonOnly,
+ Depth + 1);
+ }
+ return DAG.isGuaranteedNotToBeUndefOrPoison(Src, PoisonOnly, Depth + 1);
+ }
}
return TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
Op, DemandedElts, DAG, PoisonOnly, Depth);
@@ -45090,13 +45111,19 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
// SSE target shuffles.
case X86ISD::INSERTPS:
case X86ISD::PSHUFB:
+ case X86ISD::VZEXT_MOVL:
case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::SHUFP:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VPERMILPV:
case X86ISD::VPERMILPI:
+ case X86ISD::VPERMI:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
+ case X86ISD::VBROADCAST:
return false;
// SSE comparisons handle all icmp/fcmp cases.
// TODO: Add CMPM/MM with test coverage.
@@ -53354,6 +53381,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
// i32 sub value.
static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
using namespace SDPatternMatch;
SDValue StoredVal = St->getValue();
@@ -53451,6 +53479,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
if (!StoredVal.hasOneUse()) {
SDValue NewLoad =
DAG.getLoad(VT, DL, NewStore, Ld->getBasePtr(), Ld->getMemOperand());
+ for (SDNode *User : StoredVal->users())
+ DCI.AddToWorklist(User);
DAG.ReplaceAllUsesWith(StoredVal, NewLoad);
}
return NewStore;
@@ -53682,7 +53712,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
}
- if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget))
+ if (SDValue R = narrowBitOpRMW(St, dl, DAG, DCI, Subtarget))
return R;
// Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index db255940f882..52463622026d 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -29,6 +29,7 @@ DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
#ifndef MACHINE_FUNCTION_PASS
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
+MACHINE_FUNCTION_PASS("x86-avoid-trailing-call", X86AvoidTrailingCallPass())
MACHINE_FUNCTION_PASS("x86-isel", X86ISelDAGToDAGPass(*this))
#undef MACHINE_FUNCTION_PASS
@@ -36,7 +37,6 @@ MACHINE_FUNCTION_PASS("x86-isel", X86ISelDAGToDAGPass(*this))
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME)
#endif
DUMMY_MACHINE_FUNCTION_PASS("x86-avoid-SFB", X86AvoidSFBPass())
-DUMMY_MACHINE_FUNCTION_PASS("x86-avoid-trailing-call", X86AvoidTrailingCallPass())
DUMMY_MACHINE_FUNCTION_PASS("x86-cf-opt", X86CallFrameOptimization())
DUMMY_MACHINE_FUNCTION_PASS("x86-cmov-conversion", X86CmovConverterPass())
DUMMY_MACHINE_FUNCTION_PASS("x86-codege", FPS())
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 5f0bcab251e6..0c2bd7c302f3 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -90,7 +90,7 @@ extern "C" LLVM_C_ABI void LLVMInitializeX86Target() {
initializeX86ExecutionDomainFixPass(PR);
initializeX86DomainReassignmentPass(PR);
initializeX86AvoidSFBPassPass(PR);
- initializeX86AvoidTrailingCallPassPass(PR);
+ initializeX86AvoidTrailingCallLegacyPassPass(PR);
initializeX86SpeculativeLoadHardeningPassPass(PR);
initializeX86SpeculativeExecutionSideEffectSuppressionPass(PR);
initializeX86FlagsCopyLoweringPassPass(PR);
@@ -589,7 +589,7 @@ void X86PassConfig::addPreEmitPass2() {
// Insert extra int3 instructions after trailing call instructions to avoid
// issues in the unwinder.
if (TT.isOSWindows() && TT.isX86_64())
- addPass(createX86AvoidTrailingCallPass());
+ addPass(createX86AvoidTrailingCallLegacyPass());
// Verify basic block incoming and outgoing cfa offset and register values and
// correct CFA calculation rule where needed by inserting appropriate CFI
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index af53fa0bae46..02f06bebb8f0 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -734,7 +734,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
// Reserve bit 60-63 for other information purpose.
- FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+ FunctionHash &= NamedInstrProfRecord::FUNC_HASH_MASK;
if (IsCS)
NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 906fa2f857c2..b7224a33f47b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7933,6 +7933,26 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) {
(!Chain.ExtendB || ExtendIsOnlyUsedByPartialReductions(Chain.ExtendB)))
ScaledReductionMap.try_emplace(Chain.Reduction, Pair.second);
}
+
+ // Check that all partial reductions in a chain are only used by other
+ // partial reductions with the same scale factor. Otherwise we end up creating
+ // users of scaled reductions where the types of the other operands don't
+ // match.
+ for (const auto &[Chain, Scale] : PartialReductionChains) {
+ auto AllUsersPartialRdx = [ScaleVal = Scale, this](const User *U) {
+ auto *UI = cast<Instruction>(U);
+ if (isa<PHINode>(UI) && UI->getParent() == OrigLoop->getHeader()) {
+ return all_of(UI->users(), [ScaleVal, this](const User *U) {
+ auto *UI = cast<Instruction>(U);
+ return ScaledReductionMap.lookup_or(UI, 0) == ScaleVal;
+ });
+ }
+ return ScaledReductionMap.lookup_or(UI, 0) == ScaleVal ||
+ !OrigLoop->contains(UI->getParent());
+ };
+ if (!all_of(Chain.Reduction->users(), AllUsersPartialRdx))
+ ScaledReductionMap.erase(Chain.Reduction);
+ }
}
bool VPRecipeBuilder::getScaledReductions(
@@ -8116,11 +8136,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
return tryToWidenMemory(Instr, Operands, Range);
- if (std::optional<unsigned> ScaleFactor = getScalingForReduction(Instr)) {
- if (auto PartialRed =
- tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value()))
- return PartialRed;
- }
+ if (std::optional<unsigned> ScaleFactor = getScalingForReduction(Instr))
+ return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value());
if (!shouldWiden(Instr, Range))
return nullptr;
@@ -8154,9 +8171,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
isa<VPPartialReductionRecipe>(BinOpRecipe))
std::swap(BinOp, Accumulator);
- if (ScaleFactor !=
- vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()))
- return nullptr;
+ assert(ScaleFactor ==
+ vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) &&
+ "all accumulators in chain must have same scale factor");
unsigned ReductionOpcode = Reduction->getOpcode();
if (ReductionOpcode == Instruction::Sub) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bf3f52c51b64..df835a077f2a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -20996,6 +20996,15 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
return false;
}))
return std::nullopt;
+ if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
+ EI.UserTE->hasCopyableElements() &&
+ EI.UserTE->getMainOp()->getParent() == S.getMainOp()->getParent() &&
+ all_of(VL, [&](Value *V) {
+ if (S.isCopyableElement(V))
+ return true;
+ return isUsedOutsideBlock(V);
+ }))
+ return std::nullopt;
bool HasCopyables = S.areInstructionsWithCopyableElements();
if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 3062e1ca26af..5851b3ab7978 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1762,15 +1762,11 @@ struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags,
return getOperand(0);
}
- bool isInvariantCond() const {
- return getCond()->isDefinedOutsideLoopRegions();
- }
-
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
- return Op == getCond() && isInvariantCond();
+ return Op == getCond() && Op->isDefinedOutsideLoopRegions();
}
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 1a66d2049a8d..f405c40611fc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -823,7 +823,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
};
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
- VPReductionPHIRecipe *RedPhiR = nullptr;
+ SmallVector<std::pair<VPReductionPHIRecipe *, VPValue *>>
+ MinMaxNumReductionsToHandle;
bool HasUnsupportedPhi = false;
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe>(&R))
@@ -834,19 +835,20 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
HasUnsupportedPhi = true;
continue;
}
- // For now, only a single reduction is supported.
- // TODO: Support multiple MaxNum/MinNum reductions and other reductions.
- if (RedPhiR)
- return false;
if (!RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(
Cur->getRecurrenceKind())) {
HasUnsupportedPhi = true;
continue;
}
- RedPhiR = Cur;
+
+ VPValue *MinMaxOp = GetMinMaxCompareValue(Cur);
+ if (!MinMaxOp)
+ return false;
+
+ MinMaxNumReductionsToHandle.emplace_back(Cur, MinMaxOp);
}
- if (!RedPhiR)
+ if (MinMaxNumReductionsToHandle.empty())
return true;
// We won't be able to resume execution in the scalar tail, if there are
@@ -855,14 +857,6 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
if (HasUnsupportedPhi || !Plan.hasScalarTail())
return false;
- VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR);
- if (!MinMaxOp)
- return false;
-
- assert(RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(
- RedPhiR->getRecurrenceKind()) &&
- "unsupported reduction");
-
/// Check if the vector loop of \p Plan can early exit and restart
/// execution of last vector iteration in the scalar loop. This requires all
/// recipes up to early exit point be side-effect free as they are
@@ -879,52 +873,68 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
}
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
- VPBuilder Builder(LatchVPBB->getTerminator());
- auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
- assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
+ VPBuilder LatchBuilder(LatchVPBB->getTerminator());
+ VPValue *AllNaNLanes = nullptr;
+ SmallPtrSet<VPValue *, 2> RdxResults;
+ for (const auto &[_, MinMaxOp] : MinMaxNumReductionsToHandle) {
+ VPValue *RedNaNLanes =
+ LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
+ AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
+ : RedNaNLanes;
+ }
+
+ VPValue *AnyNaNLane =
+ LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
+ VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
+ VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
+ for (const auto &[RedPhiR, _] : MinMaxNumReductionsToHandle) {
+ assert(RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(
+ RedPhiR->getRecurrenceKind()) &&
+ "unsupported reduction");
+
+ // If we exit early due to NaNs, compute the final reduction result based on
+ // the reduction phi at the beginning of the last vector iteration.
+ auto *RdxResult = find_singleton<VPSingleDefRecipe>(
+ RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * {
+ auto *VPI = dyn_cast<VPInstruction>(U);
+ if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
+ return VPI;
+ return nullptr;
+ });
+
+ auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
+ RdxResult->getOperand(1));
+ RdxResult->setOperand(1, NewSel);
+ assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
+ RdxResults.insert(RdxResult);
+ }
+
+ auto *LatchExitingBranch = LatchVPBB->getTerminator();
+ assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
"Unexpected terminator");
- auto *IsLatchExitTaken =
- Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
- LatchExitingBranch->getOperand(1));
-
- VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
- VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN});
- auto *AnyExitTaken =
- Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
- Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
+ auto *IsLatchExitTaken = LatchBuilder.createICmp(
+ CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
+ LatchExitingBranch->getOperand(1));
+ auto *AnyExitTaken = LatchBuilder.createNaryOp(
+ Instruction::Or, {AnyNaNLane, IsLatchExitTaken});
+ LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
LatchExitingBranch->eraseFromParent();
- // If we exit early due to NaNs, compute the final reduction result based on
- // the reduction phi at the beginning of the last vector iteration.
- auto *RdxResult = find_singleton<VPSingleDefRecipe>(
- RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * {
- auto *VPI = dyn_cast<VPInstruction>(U);
- if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
- return VPI;
- return nullptr;
- });
-
- auto *MiddleVPBB = Plan.getMiddleBlock();
- Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin());
- auto *NewSel =
- Builder.createSelect(AnyNaN, RedPhiR, RdxResult->getOperand(1));
- RdxResult->setOperand(1, NewSel);
-
- auto *ScalarPH = Plan.getScalarPreheader();
- // Update resume phis for inductions in the scalar preheader. If AnyNaN is
+ // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
// true, the resume from the start of the last vector iteration via the
// canonical IV, otherwise from the original value.
- for (auto &R : ScalarPH->phis()) {
+ for (auto &R : Plan.getScalarPreheader()->phis()) {
auto *ResumeR = cast<VPPhi>(&R);
VPValue *VecV = ResumeR->getOperand(0);
- if (VecV == RdxResult)
+ if (RdxResults.contains(VecV))
continue;
if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
if (DerivedIV->getNumUsers() == 1 &&
DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
- auto *NewSel = Builder.createSelect(
- AnyNaN, LoopRegion->getCanonicalIV(), &Plan.getVectorTripCount());
- DerivedIV->moveAfter(&*Builder.getInsertPoint());
+ auto *NewSel =
+ MiddleBuilder.createSelect(AnyNaNLane, LoopRegion->getCanonicalIV(),
+ &Plan.getVectorTripCount());
+ DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
DerivedIV->setOperand(1, NewSel);
continue;
}
@@ -936,15 +946,16 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
"FMaxNum/FMinNum reduction.\n");
return false;
}
- auto *NewSel =
- Builder.createSelect(AnyNaN, LoopRegion->getCanonicalIV(), VecV);
+ auto *NewSel = MiddleBuilder.createSelect(
+ AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
ResumeR->setOperand(0, NewSel);
}
auto *MiddleTerm = MiddleVPBB->getTerminator();
- Builder.setInsertPoint(MiddleTerm);
+ MiddleBuilder.setInsertPoint(MiddleTerm);
VPValue *MiddleCond = MiddleTerm->getOperand(0);
- VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN));
+ VPValue *NewCond =
+ MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
MiddleTerm->setOperand(0, NewCond);
return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 48bd697397f4..634df51a1296 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1288,8 +1288,9 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
// Look through broadcast of single-scalar when used as select conditions; in
// that case the scalar condition can be used directly.
if (match(Def,
- m_Select(m_Broadcast(m_VPValue(C)), m_VPValue(), m_VPValue())) &&
- vputils::isSingleScalar(C)) {
+ m_Select(m_Broadcast(m_VPValue(C)), m_VPValue(), m_VPValue()))) {
+ assert(vputils::isSingleScalar(C) &&
+ "broadcast operand must be single-scalar");
Def->setOperand(0, C);
return;
}
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 27a8bbd5776b..ed3a0a0ab023 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -696,11 +696,11 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
/// shuffle.
bool VectorCombine::foldInsExtFNeg(Instruction &I) {
// Match an insert (op (extract)) pattern.
- Value *DestVec;
- uint64_t Index;
+ Value *DstVec;
+ uint64_t ExtIdx, InsIdx;
Instruction *FNeg;
- if (!match(&I, m_InsertElt(m_Value(DestVec), m_OneUse(m_Instruction(FNeg)),
- m_ConstantInt(Index))))
+ if (!match(&I, m_InsertElt(m_Value(DstVec), m_OneUse(m_Instruction(FNeg)),
+ m_ConstantInt(InsIdx))))
return false;
// Note: This handles the canonical fneg instruction and "fsub -0.0, X".
@@ -708,67 +708,74 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
Instruction *Extract;
if (!match(FNeg, m_FNeg(m_CombineAnd(
m_Instruction(Extract),
- m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index))))))
+ m_ExtractElt(m_Value(SrcVec), m_ConstantInt(ExtIdx))))))
return false;
- auto *VecTy = cast<FixedVectorType>(I.getType());
- auto *ScalarTy = VecTy->getScalarType();
+ auto *DstVecTy = cast<FixedVectorType>(DstVec->getType());
+ auto *DstVecScalarTy = DstVecTy->getScalarType();
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
- if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
+ if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
return false;
- // Ignore bogus insert/extract index.
- unsigned NumElts = VecTy->getNumElements();
- if (Index >= NumElts)
+ // Ignore if insert/extract index is out of bounds or destination vector has
+ // one element
+ unsigned NumDstElts = DstVecTy->getNumElements();
+ unsigned NumSrcElts = SrcVecTy->getNumElements();
+ if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
return false;
// We are inserting the negated element into the same lane that we extracted
// from. This is equivalent to a select-shuffle that chooses all but the
// negated element from the destination vector.
- SmallVector<int> Mask(NumElts);
+ SmallVector<int> Mask(NumDstElts);
std::iota(Mask.begin(), Mask.end(), 0);
- Mask[Index] = Index + NumElts;
+ Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
InstructionCost OldCost =
- TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy, CostKind) +
- TTI.getVectorInstrCost(I, VecTy, CostKind, Index);
+ TTI.getArithmeticInstrCost(Instruction::FNeg, DstVecScalarTy, CostKind) +
+ TTI.getVectorInstrCost(I, DstVecTy, CostKind, InsIdx);
// If the extract has one use, it will be eliminated, so count it in the
// original cost. If it has more than one use, ignore the cost because it will
// be the same before/after.
if (Extract->hasOneUse())
- OldCost += TTI.getVectorInstrCost(*Extract, VecTy, CostKind, Index);
+ OldCost += TTI.getVectorInstrCost(*Extract, SrcVecTy, CostKind, ExtIdx);
InstructionCost NewCost =
- TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy, CostKind) +
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, VecTy, VecTy,
- Mask, CostKind);
+ TTI.getArithmeticInstrCost(Instruction::FNeg, SrcVecTy, CostKind) +
+ TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, DstVecTy,
+ DstVecTy, Mask, CostKind);
- bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
+ bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
// If the lengths of the two vectors are not equal,
// we need to add a length-change vector. Add this cost.
SmallVector<int> SrcMask;
if (NeedLenChg) {
- SrcMask.assign(NumElts, PoisonMaskElem);
- SrcMask[Index] = Index;
+ SrcMask.assign(NumDstElts, PoisonMaskElem);
+ SrcMask[ExtIdx % NumDstElts] = ExtIdx;
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
- VecTy, SrcVecTy, SrcMask, CostKind);
+ DstVecTy, SrcVecTy, SrcMask, CostKind);
}
+ LLVM_DEBUG(dbgs() << "Found an insertion of (extract)fneg : " << I
+ << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
+ << "\n");
if (NewCost > OldCost)
return false;
- Value *NewShuf;
- // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index
+ Value *NewShuf, *LenChgShuf = nullptr;
+ // insertelt DstVec, (fneg (extractelt SrcVec, Index)), Index
Value *VecFNeg = Builder.CreateFNegFMF(SrcVec, FNeg);
if (NeedLenChg) {
- // shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask
- Value *LenChgShuf = Builder.CreateShuffleVector(VecFNeg, SrcMask);
- NewShuf = Builder.CreateShuffleVector(DestVec, LenChgShuf, Mask);
+ // shuffle DstVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask
+ LenChgShuf = Builder.CreateShuffleVector(VecFNeg, SrcMask);
+ NewShuf = Builder.CreateShuffleVector(DstVec, LenChgShuf, Mask);
+ Worklist.pushValue(LenChgShuf);
} else {
- // shuffle DestVec, (fneg SrcVec), Mask
- NewShuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
+ // shuffle DstVec, (fneg SrcVec), Mask
+ NewShuf = Builder.CreateShuffleVector(DstVec, VecFNeg, Mask);
}
+ Worklist.pushValue(VecFNeg);
replaceValue(I, *NewShuf);
return true;
}
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
index 1c4035489219..ec848c2c0830 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
@@ -5,214 +5,214 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define void @fadd() {
; CHECK-LABEL: 'fadd'
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <vscale x 4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <vscale x 8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <vscale x 16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of Invalid for: %V1F32 = fadd <vscale x 1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fadd <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fadd <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fadd <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fadd <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <vscale x 4 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <vscale x 8 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <vscale x 16 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %V1F32 = fadd <vscale x 1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fadd <vscale x 2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fadd <vscale x 4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fadd <vscale x 8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <vscale x 2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fadd <vscale x 4 x double> poison, poison
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %V4F16 = fadd <vscale x 4 x half> undef, undef
- %V8F16 = fadd <vscale x 8 x half> undef, undef
- %V16F16 = fadd <vscale x 16 x half> undef, undef
+ %V4F16 = fadd <vscale x 4 x half> poison, poison
+ %V8F16 = fadd <vscale x 8 x half> poison, poison
+ %V16F16 = fadd <vscale x 16 x half> poison, poison
- %V1F32 = fadd <vscale x 1 x float> undef, undef
- %V2F32 = fadd <vscale x 2 x float> undef, undef
- %V4F32 = fadd <vscale x 4 x float> undef, undef
- %V8F32 = fadd <vscale x 8 x float> undef, undef
+ %V1F32 = fadd <vscale x 1 x float> poison, poison
+ %V2F32 = fadd <vscale x 2 x float> poison, poison
+ %V4F32 = fadd <vscale x 4 x float> poison, poison
+ %V8F32 = fadd <vscale x 8 x float> poison, poison
- %V2F64 = fadd <vscale x 2 x double> undef, undef
- %V4F64 = fadd <vscale x 4 x double> undef, undef
+ %V2F64 = fadd <vscale x 2 x double> poison, poison
+ %V4F64 = fadd <vscale x 4 x double> poison, poison
ret void
}
define void @fsub() {
; CHECK-LABEL: 'fsub'
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <vscale x 4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <vscale x 8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <vscale x 16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of Invalid for: %V1F32 = fsub <vscale x 1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fsub <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fsub <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fsub <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fsub <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <vscale x 4 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <vscale x 8 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <vscale x 16 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %V1F32 = fsub <vscale x 1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fsub <vscale x 2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fsub <vscale x 4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fsub <vscale x 8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <vscale x 2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fsub <vscale x 4 x double> poison, poison
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %V4F16 = fsub <vscale x 4 x half> undef, undef
- %V8F16 = fsub <vscale x 8 x half> undef, undef
- %V16F16 = fsub <vscale x 16 x half> undef, undef
+ %V4F16 = fsub <vscale x 4 x half> poison, poison
+ %V8F16 = fsub <vscale x 8 x half> poison, poison
+ %V16F16 = fsub <vscale x 16 x half> poison, poison
- %V1F32 = fsub <vscale x 1 x float> undef, undef
- %V2F32 = fsub <vscale x 2 x float> undef, undef
- %V4F32 = fsub <vscale x 4 x float> undef, undef
- %V8F32 = fsub <vscale x 8 x float> undef, undef
+ %V1F32 = fsub <vscale x 1 x float> poison, poison
+ %V2F32 = fsub <vscale x 2 x float> poison, poison
+ %V4F32 = fsub <vscale x 4 x float> poison, poison
+ %V8F32 = fsub <vscale x 8 x float> poison, poison
- %V2F64 = fsub <vscale x 2 x double> undef, undef
- %V4F64 = fsub <vscale x 4 x double> undef, undef
+ %V2F64 = fsub <vscale x 2 x double> poison, poison
+ %V4F64 = fsub <vscale x 4 x double> poison, poison
ret void
}
define void @fneg() {
; CHECK-LABEL: 'fneg'
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <vscale x 2 x half> undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <vscale x 4 x half> undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <vscale x 8 x half> undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <vscale x 16 x half> undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fneg <vscale x 2 x float> undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fneg <vscale x 4 x float> undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fneg <vscale x 8 x float> undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fneg <vscale x 2 x double> undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fneg <vscale x 4 x double> undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <vscale x 2 x half> poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <vscale x 4 x half> poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <vscale x 8 x half> poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <vscale x 16 x half> poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fneg <vscale x 2 x float> poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fneg <vscale x 4 x float> poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fneg <vscale x 8 x float> poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fneg <vscale x 2 x double> poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fneg <vscale x 4 x double> poison
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %V2F16 = fneg <vscale x 2 x half> undef
- %V4F16 = fneg <vscale x 4 x half> undef
- %V8F16 = fneg <vscale x 8 x half> undef
- %V16F16 = fneg <vscale x 16 x half> undef
+ %V2F16 = fneg <vscale x 2 x half> poison
+ %V4F16 = fneg <vscale x 4 x half> poison
+ %V8F16 = fneg <vscale x 8 x half> poison
+ %V16F16 = fneg <vscale x 16 x half> poison
- %V2F32 = fneg <vscale x 2 x float> undef
- %V4F32 = fneg <vscale x 4 x float> undef
- %V8F32 = fneg <vscale x 8 x float> undef
+ %V2F32 = fneg <vscale x 2 x float> poison
+ %V4F32 = fneg <vscale x 4 x float> poison
+ %V8F32 = fneg <vscale x 8 x float> poison
- %V2F64 = fneg <vscale x 2 x double> undef
- %V4F64 = fneg <vscale x 4 x double> undef
+ %V2F64 = fneg <vscale x 2 x double> poison
+ %V4F64 = fneg <vscale x 4 x double> poison
ret void
}
define void @fmul() {
; CHECK-LABEL: 'fmul'
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <vscale x 4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <vscale x 8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <vscale x 16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fmul <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fmul <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fmul <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fmul <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fmul <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <vscale x 4 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <vscale x 8 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <vscale x 16 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fmul <vscale x 2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fmul <vscale x 4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fmul <vscale x 8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fmul <vscale x 2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fmul <vscale x 4 x double> poison, poison
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %V4F16 = fmul <vscale x 4 x half> undef, undef
- %V8F16 = fmul <vscale x 8 x half> undef, undef
- %V16F16 = fmul <vscale x 16 x half> undef, undef
+ %V4F16 = fmul <vscale x 4 x half> poison, poison
+ %V8F16 = fmul <vscale x 8 x half> poison, poison
+ %V16F16 = fmul <vscale x 16 x half> poison, poison
- %V2F32 = fmul <vscale x 2 x float> undef, undef
- %V4F32 = fmul <vscale x 4 x float> undef, undef
- %V8F32 = fmul <vscale x 8 x float> undef, undef
+ %V2F32 = fmul <vscale x 2 x float> poison, poison
+ %V4F32 = fmul <vscale x 4 x float> poison, poison
+ %V8F32 = fmul <vscale x 8 x float> poison, poison
- %V2F64 = fmul <vscale x 2 x double> undef, undef
- %V4F64 = fmul <vscale x 4 x double> undef, undef
+ %V2F64 = fmul <vscale x 2 x double> poison, poison
+ %V4F64 = fmul <vscale x 4 x double> poison, poison
ret void
}
define void @fdiv() {
; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <vscale x 4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <vscale x 8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16F16 = fdiv <vscale x 16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = fdiv <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = fdiv <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8F32 = fdiv <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = fdiv <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4F64 = fdiv <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <vscale x 4 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <vscale x 8 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16F16 = fdiv <vscale x 16 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = fdiv <vscale x 2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = fdiv <vscale x 4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8F32 = fdiv <vscale x 8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = fdiv <vscale x 2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4F64 = fdiv <vscale x 4 x double> poison, poison
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %V4F16 = fdiv <vscale x 4 x half> undef, undef
- %V8F16 = fdiv <vscale x 8 x half> undef, undef
- %V16F16 = fdiv <vscale x 16 x half> undef, undef
+ %V4F16 = fdiv <vscale x 4 x half> poison, poison
+ %V8F16 = fdiv <vscale x 8 x half> poison, poison
+ %V16F16 = fdiv <vscale x 16 x half> poison, poison
- %V2F32 = fdiv <vscale x 2 x float> undef, undef
- %V4F32 = fdiv <vscale x 4 x float> undef, undef
- %V8F32 = fdiv <vscale x 8 x float> undef, undef
+ %V2F32 = fdiv <vscale x 2 x float> poison, poison
+ %V4F32 = fdiv <vscale x 4 x float> poison, poison
+ %V8F32 = fdiv <vscale x 8 x float> poison, poison
- %V2F64 = fdiv <vscale x 2 x double> undef, undef
- %V4F64 = fdiv <vscale x 4 x double> undef, undef
+ %V2F64 = fdiv <vscale x 2 x double> poison, poison
+ %V4F64 = fdiv <vscale x 4 x double> poison, poison
ret void
}
define void @frem() {
; CHECK-LABEL: 'frem'
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <vscale x 4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <vscale x 8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <vscale x 16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = frem <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <vscale x 4 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <vscale x 8 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <vscale x 16 x half> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = frem <vscale x 2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <vscale x 4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <vscale x 8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <vscale x 2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <vscale x 4 x double> poison, poison
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %V4F16 = frem <vscale x 4 x half> undef, undef
- %V8F16 = frem <vscale x 8 x half> undef, undef
- %V16F16 = frem <vscale x 16 x half> undef, undef
+ %V4F16 = frem <vscale x 4 x half> poison, poison
+ %V8F16 = frem <vscale x 8 x half> poison, poison
+ %V16F16 = frem <vscale x 16 x half> poison, poison
- %V2F32 = frem <vscale x 2 x float> undef, undef
- %V4F32 = frem <vscale x 4 x float> undef, undef
- %V8F32 = frem <vscale x 8 x float> undef, undef
+ %V2F32 = frem <vscale x 2 x float> poison, poison
+ %V4F32 = frem <vscale x 4 x float> poison, poison
+ %V8F32 = frem <vscale x 8 x float> poison, poison
- %V2F64 = frem <vscale x 2 x double> undef, undef
- %V4F64 = frem <vscale x 4 x double> undef, undef
+ %V2F64 = frem <vscale x 2 x double> poison, poison
+ %V4F64 = frem <vscale x 4 x double> poison, poison
ret void
}
define void @fma() {
; CHECK-LABEL: 'fma'
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %V4F16 = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
- %V8F16 = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
- %V16F16 = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+ %V4F16 = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+ %V8F16 = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+ %V16F16 = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
- %V2F32 = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
- %V4F32 = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
- %V8F32 = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
+ %V2F32 = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
+ %V4F32 = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
+ %V8F32 = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison)
- %V2F64 = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
- %V4F64 = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+ %V2F64 = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison)
+ %V4F64 = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison)
ret void
}
define void @fmuladd() {
; CHECK-LABEL: 'fmuladd'
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.v4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
- %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.v8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
- %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.v16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+ %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.v4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+ %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.v8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+ %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.v16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
- %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.v2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
- %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.v4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
- %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.v8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
+ %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.v2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
+ %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.v4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
+ %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.v8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison)
- %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.v2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
- %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.v4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+ %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.v2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison)
+ %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.v4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison)
ret void
}
diff --git a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
index 4346507ba8f9..181a4494b036 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
@@ -210,7 +210,7 @@ define void @t3(i64 %n, i64 %m, i64 %lb, ptr %a) {
; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx6, align 4 --> Dst: %2 = load i32, ptr %arrayidx6, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx6, align 4 --> Dst: store i32 %2, ptr %arrayidx8, align 4
-; CHECK-NEXT: da analyze - consistent anti [1 -2]!
+; CHECK-NEXT: da analyze - anti [1 *]!
; CHECK-NEXT: Src: store i32 %2, ptr %arrayidx8, align 4 --> Dst: store i32 %2, ptr %arrayidx8, align 4
; CHECK-NEXT: da analyze - none!
;
diff --git a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
index 44bd9b772791..71b93826ac26 100644
--- a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
-; RUN: | FileCheck %s
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ALL
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa -da-enable-dependence-test=strong-siv 2>&1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-STRONG-SIV
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.6.0"
@@ -423,19 +425,33 @@ for.end: ; preds = %for.body
;; *B++ = A[i + 2*n];
define void @strong9(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
-; CHECK-LABEL: 'strong9'
-; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - none!
-; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
-; CHECK-NEXT: da analyze - none!
-; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT: da analyze - confused!
-; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
-; CHECK-NEXT: da analyze - none!
-; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT: da analyze - confused!
-; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-ALL-LABEL: 'strong9'
+; CHECK-ALL-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
+; CHECK-ALL-NEXT: da analyze - confused!
+; CHECK-ALL-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
+; CHECK-ALL-NEXT: da analyze - confused!
+; CHECK-ALL-NEXT: Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
+; CHECK-ALL-NEXT: da analyze - none!
+;
+; CHECK-STRONG-SIV-LABEL: 'strong9'
+; CHECK-STRONG-SIV-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
+; CHECK-STRONG-SIV-NEXT: da analyze - none!
+; CHECK-STRONG-SIV-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
+; CHECK-STRONG-SIV-NEXT: da analyze - flow [*|<]!
+; CHECK-STRONG-SIV-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
+; CHECK-STRONG-SIV-NEXT: da analyze - confused!
+; CHECK-STRONG-SIV-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
+; CHECK-STRONG-SIV-NEXT: da analyze - none!
+; CHECK-STRONG-SIV-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
+; CHECK-STRONG-SIV-NEXT: da analyze - confused!
+; CHECK-STRONG-SIV-NEXT: Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
+; CHECK-STRONG-SIV-NEXT: da analyze - none!
;
entry:
%cmp1 = icmp eq i64 %n, 0
@@ -512,3 +528,45 @@ for.body: ; preds = %entry, %for.body
for.end: ; preds = %for.body
ret void
}
+
+
+;; for (long unsigned i = 0; i < 9223372036854775806; i++)
+;; for (long unsigned j = 0; j < 2147483640; j++)
+;; if (i < 3000000000)
+;; A[i] = 0;
+;
+; FIXME: DependenceAnalysis fails to detect the dependency between A[i] and
+; itself, and the issue is not caused by the Strong SIV.
+define void @strong11(ptr %A) nounwind uwtable ssp {
+; CHECK-ALL-LABEL: 'strong11'
+; CHECK-ALL-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4
+; CHECK-ALL-NEXT: da analyze - none!
+;
+; CHECK-STRONG-SIV-LABEL: 'strong11'
+; CHECK-STRONG-SIV-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4
+; CHECK-STRONG-SIV-NEXT: da analyze - consistent output [0 S]!
+;
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.cond.cleanup3
+ %i.017 = phi i64 [ 0, %entry ], [ %inc8, %for.cond.cleanup3 ]
+ %cmp5 = icmp samesign ult i64 %i.017, 3000000000
+ %arrayidx = getelementptr inbounds nuw i32, ptr %A, i64 %i.017
+ br i1 %cmp5, label %for.body4.us, label %for.cond.cleanup3
+
+for.body4.us: ; preds = %for.cond1.preheader, %for.body4.us
+ %j.016.us = phi i64 [ %inc.us, %for.body4.us ], [ 0, %for.cond1.preheader ]
+ store i32 0, ptr %arrayidx, align 4
+ %inc.us = add nuw nsw i64 %j.016.us, 1
+ %exitcond.not = icmp eq i64 %inc.us, 2147483640
+ br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4.us
+
+for.cond.cleanup: ; preds = %for.cond.cleanup3
+ ret void
+
+for.cond.cleanup3: ; preds = %for.body4.us, %for.cond1.preheader
+ %inc8 = add nuw nsw i64 %i.017, 1
+ %exitcond19.not = icmp eq i64 %inc8, 9223372036854775806
+ br i1 %exitcond19.not, label %for.cond.cleanup, label %for.cond1.preheader
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/monotonicity-no-wrap-flags.ll b/llvm/test/Analysis/DependenceAnalysis/monotonicity-no-wrap-flags.ll
index 7411dc9f5c05..df42c757a3b6 100644
--- a/llvm/test/Analysis/DependenceAnalysis/monotonicity-no-wrap-flags.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/monotonicity-no-wrap-flags.ll
@@ -298,7 +298,8 @@ exit:
}
; The value of step reccurence is not invariant with respect to the outer most
-; loop (the i-loop).
+; loop (the i-loop). It is theoretically multivariate monotonic by definition,
+; but we cannot handle non-affine addrec for now.
;
; offset_i = 0;
; for (int i = 0; i < 100; i++) {
@@ -312,7 +313,8 @@ define void @step_is_variant(ptr %a) {
; CHECK-NEXT: Monotonicity check:
; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1
; CHECK-NEXT: Expr: {%offset.i,+,1}<nuw><nsw><%loop.j>
-; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic
+; CHECK-NEXT: Monotonicity: Unknown
+; CHECK-NEXT: Reason: %offset.i
; CHECK-EMPTY:
; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
; CHECK-NEXT: da analyze - confused!
@@ -346,6 +348,56 @@ exit:
ret void
}
+; The value of step reccurence is not invariant with respect to the outer most
+; loop (the i-loop). Actually, `offset_i` is not monotonic.
+;
+; offset_i = 0;
+; for (int i = 0; i < 100; i++) {
+; for (int j = 0; j < 100; j++)
+; a[offset_i + j] = 0;
+; offset_i += (i % 2 == 0) ? -1 : 3;
+; }
+;
+define void @step_is_variant2(ptr %a) {
+; CHECK-LABEL: 'step_is_variant2'
+; CHECK-NEXT: Monotonicity check:
+; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT: Expr: {%offset.i,+,1}<nsw><%loop.j>
+; CHECK-NEXT: Monotonicity: Unknown
+; CHECK-NEXT: Reason: %offset.i
+; CHECK-EMPTY:
+; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT: da analyze - confused!
+;
+entry:
+ br label %loop.i.header
+
+loop.i.header:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ]
+ %offset.i = phi i64 [ 0, %entry ], [ %offset.i.next, %loop.i.latch ]
+ %step.i.0 = phi i64 [ -1, %entry ], [ %step.i.1, %loop.i.latch ]
+ %step.i.1 = phi i64 [ 3, %entry ], [ %step.i.0, %loop.i.latch ]
+ br label %loop.j
+
+loop.j:
+ %j = phi i64 [ 0, %loop.i.header ], [ %j.inc, %loop.j ]
+ %offset = add nsw i64 %offset.i, %j
+ %idx = getelementptr inbounds i8, ptr %a, i64 %offset
+ store i8 0, ptr %idx
+ %j.inc = add nsw i64 %j, 1
+ %exitcond.j = icmp eq i64 %j.inc, 100
+ br i1 %exitcond.j, label %loop.i.latch, label %loop.j
+
+loop.i.latch:
+ %i.inc = add nsw i64 %i, 1
+ %offset.i.next = add nsw i64 %offset.i, %step.i.0
+ %exitcond.i = icmp eq i64 %i.inc, 100
+ br i1 %exitcond.i, label %exit, label %loop.i.header
+
+exit:
+ ret void
+}
+
; The AddRec doesn't have nsw flag for the j-loop, since the store may not be
; executed.
;
diff --git a/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll
index bf0fafcbfd6c..6fd71ac8fe41 100644
--- a/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/strong-siv-overflow.ll
@@ -12,19 +12,24 @@
; A[2*i - 4] = 2;
; }
;
-; FIXME: DependenceAnalysis currently detects no dependency between the two
-; stores, but it does exist. For example, each store will access A[0] when i
-; is 1 and 2 respectively.
-; The root cause is that the product of the BTC and the coefficient
-; ((1LL << 62) - 1 and 2) overflows in a signed sense.
+; FIXME: DependenceAnalysis fails to detect the dependency between the two
+; stores, and the issue is not caused by the Strong SIV.
define void @strongsiv_const_ovfl(ptr %A) {
-; CHECK-LABEL: 'strongsiv_const_ovfl'
-; CHECK-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
-; CHECK-NEXT: da analyze - none!
-; CHECK-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
-; CHECK-NEXT: da analyze - none!
-; CHECK-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
-; CHECK-NEXT: da analyze - none!
+; CHECK-ALL-LABEL: 'strongsiv_const_ovfl'
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-ALL-NEXT: da analyze - none!
+;
+; CHECK-STRONG-SIV-LABEL: 'strongsiv_const_ovfl'
+; CHECK-STRONG-SIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
+; CHECK-STRONG-SIV-NEXT: da analyze - none!
+; CHECK-STRONG-SIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-STRONG-SIV-NEXT: da analyze - consistent output [1]!
+; CHECK-STRONG-SIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
+; CHECK-STRONG-SIV-NEXT: da analyze - none!
;
entry:
br label %loop.header
@@ -64,5 +69,4 @@ exit:
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-ALL: {{.*}}
-; CHECK-STRONG-SIV: {{.*}}
+; CHECK: {{.*}}
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index f01422e3b099..e547c3429058 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -30,6 +30,7 @@ llvm_canonicalize_cmake_booleans(
LLVM_INCLUDE_SPIRV_TOOLS_TESTS
LLVM_APPEND_VC_REV
LLVM_HAS_LOGF128
+ LLVM_ENABLE_ONDISK_CAS
)
configure_lit_site_cfg(
@@ -81,6 +82,7 @@ set(LLVM_TEST_DEPENDS
llvm-bcanalyzer
llvm-bitcode-strip
llvm-c-test
+ llvm-cas
llvm-cat
llvm-cfi-verify
llvm-cgdata
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matmul-fp16.ll b/llvm/test/CodeGen/AArch64/aarch64-matmul-fp16.ll
new file mode 100644
index 000000000000..8d1abdd5380d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-matmul-fp16.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+f8f16mm < %s | FileCheck %s
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+f8f16mm -global-isel < %s | FileCheck %s
+
+define <8 x half> @fmmla.v8f16.v16i8(<8 x half> %r, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: fmmla.v8f16.v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmmla v0.8h, v1.16b, v2.16b
+; CHECK-NEXT: ret
+entry:
+ %vfmmla1.i = tail call <8 x half> @llvm.aarch64.neon.fmmla.v8f16.v16i8(<8 x half> %r, <16 x i8> %a, <16 x i8> %b) #3
+ ret <8 x half> %vfmmla1.i
+}
+
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matmul-fp32.ll b/llvm/test/CodeGen/AArch64/aarch64-matmul-fp32.ll
new file mode 100644
index 000000000000..4c3356773268
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-matmul-fp32.ll
@@ -0,0 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+f8f32mm < %s | FileCheck %s
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+f8f32mm -global-isel < %s | FileCheck %s
+
+define <4 x float> @fmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: fmmla.v4f32.v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmmla v0.4s, v1.16b, v2.16b
+; CHECK-NEXT: ret
+entry:
+ %vfmmla1.i = tail call <4 x float> @llvm.aarch64.neon.fmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %a, <16 x i8> %b) #3
+ ret <4 x float> %vfmmla1.i
+}
diff --git a/llvm/test/CodeGen/AArch64/seh-extended-spills.ll b/llvm/test/CodeGen/AArch64/seh-extended-spills.ll
new file mode 100644
index 000000000000..ecc22703ef58
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/seh-extended-spills.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s
+
+declare dso_local void @g(ptr noundef)
+define dso_local preserve_mostcc void @f(ptr noundef %p) #0 {
+entry:
+ %p.addr = alloca ptr, align 8
+ store ptr %p, ptr %p.addr, align 8
+ %0 = load ptr, ptr %p.addr, align 8
+ call void @g(ptr noundef %0)
+ ret void
+}
+
+attributes #0 = { nounwind uwtable(sync) }
+
+; CHECK: stp x9, x10, [sp, #[[OFFSET_0:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
+; CHECK: stp x11, x12, [sp, #[[OFFSET_1:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
+; CHECK: stp x13, x14, [sp, #[[OFFSET_2:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
+; CHECK: str x15, [sp, #[[OFFSET_3:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
+; CHECK: .seh_endprologue
+
+; CHECK: .seh_startepilogue
+; CHECK: ldr x15, [sp, #[[OFFSET_3]]]
+; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
+; CHECK: ldp x13, x14, [sp, #[[OFFSET_2]]]
+; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
+; CHECK: ldp x11, x12, [sp, #[[OFFSET_1]]]
+; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
+; CHECK: ldp x9, x10, [sp, #[[OFFSET_0]]]
+; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
+; CHECK: .seh_endepilogue
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-fdot.ll b/llvm/test/CodeGen/AArch64/sve2p1-fdot.ll
new file mode 100644
index 000000000000..9dbe096ebdb5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-fdot.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -global-isel -global-isel-abort=2 -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1
+; RUN: llc -global-isel -global-isel-abort=2 -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1
+
+target triple = "aarch64-linux-gnu"
+
+define <vscale x 4 x float> @fdot_wide_nxv4f32(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; SVE2-LABEL: fdot_wide_nxv4f32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: uunpklo z3.s, z1.h
+; SVE2-NEXT: uunpklo z4.s, z2.h
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: uunpkhi z1.s, z1.h
+; SVE2-NEXT: uunpkhi z2.s, z2.h
+; SVE2-NEXT: fcvt z3.s, p0/m, z3.h
+; SVE2-NEXT: fcvt z4.s, p0/m, z4.h
+; SVE2-NEXT: fcvt z1.s, p0/m, z1.h
+; SVE2-NEXT: fcvt z2.s, p0/m, z2.h
+; SVE2-NEXT: fmul z3.s, z3.s, z4.s
+; SVE2-NEXT: fmul z1.s, z1.s, z2.s
+; SVE2-NEXT: fadd z0.s, z0.s, z3.s
+; SVE2-NEXT: fadd z0.s, z0.s, z1.s
+; SVE2-NEXT: ret
+;
+; SVE2P1-LABEL: fdot_wide_nxv4f32:
+; SVE2P1: // %bb.0: // %entry
+; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
+; SVE2P1-NEXT: ret
+entry:
+ %a.wide = fpext <vscale x 8 x half> %a to <vscale x 8 x float>
+ %b.wide = fpext <vscale x 8 x half> %b to <vscale x 8 x float>
+ %mult = fmul <vscale x 8 x float> %a.wide, %b.wide
+ %partial.reduce = call <vscale x 4 x float> @llvm.vector.partial.reduce.fadd(<vscale x 4 x float> %acc, <vscale x 8 x float> %mult)
+ ret <vscale x 4 x float> %partial.reduce
+}
+
+define <vscale x 4 x float> @fdot_splat_nxv4f32(<vscale x 4 x float> %acc, <vscale x 8 x half> %a) {
+; SVE2-LABEL: fdot_splat_nxv4f32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: uunpklo z2.s, z1.h
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: uunpkhi z1.s, z1.h
+; SVE2-NEXT: fcvt z2.s, p0/m, z2.h
+; SVE2-NEXT: fcvt z1.s, p0/m, z1.h
+; SVE2-NEXT: fadd z0.s, z0.s, z2.s
+; SVE2-NEXT: fadd z0.s, z0.s, z1.s
+; SVE2-NEXT: ret
+;
+; SVE2P1-LABEL: fdot_splat_nxv4f32:
+; SVE2P1: // %bb.0: // %entry
+; SVE2P1-NEXT: fmov z2.h, #1.00000000
+; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
+; SVE2P1-NEXT: ret
+entry:
+ %a.wide = fpext <vscale x 8 x half> %a to <vscale x 8 x float>
+ %partial.reduce = call <vscale x 4 x float> @llvm.vector.partial.reduce.fadd(<vscale x 4 x float> %acc, <vscale x 8 x float> %a.wide)
+ ret <vscale x 4 x float> %partial.reduce
+}
+
+define <vscale x 8 x half> @partial_reduce_nxv8f16(<vscale x 8 x half> %acc, <vscale x 16 x half> %a) {
+; CHECK-LABEL: partial_reduce_nxv8f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fadd z0.h, z0.h, z1.h
+; CHECK-NEXT: fadd z0.h, z0.h, z2.h
+; CHECK-NEXT: ret
+entry:
+ %partial.reduce = call <vscale x 8 x half> @llvm.vector.partial.reduce.fadd(<vscale x 8 x half> %acc, <vscale x 16 x half> %a)
+ ret <vscale x 8 x half> %partial.reduce
+}
+
+define <vscale x 4 x float> @partial_reduce_nxv4f32(<vscale x 4 x float> %acc, <vscale x 8 x float> %a) {
+; CHECK-LABEL: partial_reduce_nxv4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fadd z0.s, z0.s, z1.s
+; CHECK-NEXT: fadd z0.s, z0.s, z2.s
+; CHECK-NEXT: ret
+entry:
+ %partial.reduce = call <vscale x 4 x float> @llvm.vector.partial.reduce.fadd(<vscale x 4 x float> %acc, <vscale x 8 x float> %a)
+ ret <vscale x 4 x float> %partial.reduce
+}
+
+define <vscale x 2 x double> @partial_reduce_nxv2f64(<vscale x 2 x double> %acc, <vscale x 4 x double> %a) {
+; CHECK-LABEL: partial_reduce_nxv2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fadd z0.d, z0.d, z1.d
+; CHECK-NEXT: fadd z0.d, z0.d, z2.d
+; CHECK-NEXT: ret
+entry:
+ %partial.reduce = call <vscale x 2 x double> @llvm.vector.partial.reduce.fadd(<vscale x 2 x double> %acc, <vscale x 4 x double> %a)
+ ret <vscale x 2 x double> %partial.reduce
+}
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-fixed-length-fdot.ll b/llvm/test/CodeGen/AArch64/sve2p1-fixed-length-fdot.ll
new file mode 100644
index 000000000000..89216ce2cb72
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-fixed-length-fdot.ll
@@ -0,0 +1,230 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1
+
+target triple = "aarch64-linux-gnu"
+
+define void @fdot_wide_v8f32(ptr %accptr, ptr %aptr, ptr %bptr) vscale_range(2,0) {
+; SVE2-LABEL: fdot_wide_v8f32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s, vl8
+; SVE2-NEXT: mov x8, #8 // =0x8
+; SVE2-NEXT: ld1h { z0.s }, p0/z, [x1]
+; SVE2-NEXT: ld1h { z1.s }, p0/z, [x2]
+; SVE2-NEXT: ld1h { z2.s }, p0/z, [x1, x8, lsl #1]
+; SVE2-NEXT: ld1h { z3.s }, p0/z, [x2, x8, lsl #1]
+; SVE2-NEXT: fcvt z0.s, p0/m, z0.h
+; SVE2-NEXT: fcvt z1.s, p0/m, z1.h
+; SVE2-NEXT: fcvt z2.s, p0/m, z2.h
+; SVE2-NEXT: fcvt z3.s, p0/m, z3.h
+; SVE2-NEXT: fmul z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ld1w { z1.s }, p0/z, [x0]
+; SVE2-NEXT: fmul z2.s, p0/m, z2.s, z3.s
+; SVE2-NEXT: fadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: fadd z0.s, p0/m, z0.s, z2.s
+; SVE2-NEXT: st1w { z0.s }, p0, [x0]
+; SVE2-NEXT: ret
+;
+; SVE2P1-LABEL: fdot_wide_v8f32:
+; SVE2P1: // %bb.0: // %entry
+; SVE2P1-NEXT: ptrue p0.s, vl8
+; SVE2P1-NEXT: ptrue p1.h, vl16
+; SVE2P1-NEXT: ld1w { z0.s }, p0/z, [x0]
+; SVE2P1-NEXT: ld1h { z1.h }, p1/z, [x1]
+; SVE2P1-NEXT: ld1h { z2.h }, p1/z, [x2]
+; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
+; SVE2P1-NEXT: st1w { z0.s }, p0, [x0]
+; SVE2P1-NEXT: ret
+entry:
+ %acc = load <8 x float>, ptr %accptr
+ %a = load <16 x half>, ptr %aptr
+ %b = load <16 x half>, ptr %bptr
+ %a.wide = fpext <16 x half> %a to <16 x float>
+ %b.wide = fpext <16 x half> %b to <16 x float>
+ %mult = fmul <16 x float> %a.wide, %b.wide
+ %partial.reduce = call <8 x float> @llvm.vector.partial.reduce.fadd(<8 x float> %acc, <16 x float> %mult)
+ store <8 x float> %partial.reduce, ptr %accptr
+ ret void
+}
+
+define void @fdot_wide_v16f32(ptr %accptr, ptr %aptr, ptr %bptr) vscale_range(4,0) {
+; SVE2-LABEL: fdot_wide_v16f32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s, vl16
+; SVE2-NEXT: mov x8, #16 // =0x10
+; SVE2-NEXT: ld1h { z0.s }, p0/z, [x1]
+; SVE2-NEXT: ld1h { z1.s }, p0/z, [x2]
+; SVE2-NEXT: ld1h { z2.s }, p0/z, [x1, x8, lsl #1]
+; SVE2-NEXT: ld1h { z3.s }, p0/z, [x2, x8, lsl #1]
+; SVE2-NEXT: fcvt z0.s, p0/m, z0.h
+; SVE2-NEXT: fcvt z1.s, p0/m, z1.h
+; SVE2-NEXT: fcvt z2.s, p0/m, z2.h
+; SVE2-NEXT: fcvt z3.s, p0/m, z3.h
+; SVE2-NEXT: fmul z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ld1w { z1.s }, p0/z, [x0]
+; SVE2-NEXT: fmul z2.s, p0/m, z2.s, z3.s
+; SVE2-NEXT: fadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: fadd z0.s, p0/m, z0.s, z2.s
+; SVE2-NEXT: st1w { z0.s }, p0, [x0]
+; SVE2-NEXT: ret
+;
+; SVE2P1-LABEL: fdot_wide_v16f32:
+; SVE2P1: // %bb.0: // %entry
+; SVE2P1-NEXT: ptrue p0.s, vl16
+; SVE2P1-NEXT: ptrue p1.h, vl32
+; SVE2P1-NEXT: ld1w { z0.s }, p0/z, [x0]
+; SVE2P1-NEXT: ld1h { z1.h }, p1/z, [x1]
+; SVE2P1-NEXT: ld1h { z2.h }, p1/z, [x2]
+; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
+; SVE2P1-NEXT: st1w { z0.s }, p0, [x0]
+; SVE2P1-NEXT: ret
+entry:
+ %acc = load <16 x float>, ptr %accptr
+ %a = load <32 x half>, ptr %aptr
+ %b = load <32 x half>, ptr %bptr
+ %a.wide = fpext <32 x half> %a to <32 x float>
+ %b.wide = fpext <32 x half> %b to <32 x float>
+ %mult = fmul <32 x float> %a.wide, %b.wide
+ %partial.reduce = call <16 x float> @llvm.vector.partial.reduce.fadd(<16 x float> %acc, <32 x float> %mult)
+ store <16 x float> %partial.reduce, ptr %accptr
+ ret void
+}
+
+define void @fdot_wide_v32f32(ptr %accptr, ptr %aptr, ptr %bptr) vscale_range(8,0) {
+; SVE2-LABEL: fdot_wide_v32f32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s, vl32
+; SVE2-NEXT: mov x8, #32 // =0x20
+; SVE2-NEXT: ld1h { z0.s }, p0/z, [x1]
+; SVE2-NEXT: ld1h { z1.s }, p0/z, [x2]
+; SVE2-NEXT: ld1h { z2.s }, p0/z, [x1, x8, lsl #1]
+; SVE2-NEXT: ld1h { z3.s }, p0/z, [x2, x8, lsl #1]
+; SVE2-NEXT: fcvt z0.s, p0/m, z0.h
+; SVE2-NEXT: fcvt z1.s, p0/m, z1.h
+; SVE2-NEXT: fcvt z2.s, p0/m, z2.h
+; SVE2-NEXT: fcvt z3.s, p0/m, z3.h
+; SVE2-NEXT: fmul z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ld1w { z1.s }, p0/z, [x0]
+; SVE2-NEXT: fmul z2.s, p0/m, z2.s, z3.s
+; SVE2-NEXT: fadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: fadd z0.s, p0/m, z0.s, z2.s
+; SVE2-NEXT: st1w { z0.s }, p0, [x0]
+; SVE2-NEXT: ret
+;
+; SVE2P1-LABEL: fdot_wide_v32f32:
+; SVE2P1: // %bb.0: // %entry
+; SVE2P1-NEXT: ptrue p0.s, vl32
+; SVE2P1-NEXT: ptrue p1.h, vl64
+; SVE2P1-NEXT: ld1w { z0.s }, p0/z, [x0]
+; SVE2P1-NEXT: ld1h { z1.h }, p1/z, [x1]
+; SVE2P1-NEXT: ld1h { z2.h }, p1/z, [x2]
+; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
+; SVE2P1-NEXT: st1w { z0.s }, p0, [x0]
+; SVE2P1-NEXT: ret
+entry:
+ %acc = load <32 x float>, ptr %accptr
+ %a = load <64 x half>, ptr %aptr
+ %b = load <64 x half>, ptr %bptr
+ %a.wide = fpext <64 x half> %a to <64 x float>
+ %b.wide = fpext <64 x half> %b to <64 x float>
+ %mult = fmul <64 x float> %a.wide, %b.wide
+ %partial.reduce = call <32 x float> @llvm.vector.partial.reduce.fadd(<32 x float> %acc, <64 x float> %mult)
+ store <32 x float> %partial.reduce, ptr %accptr
+ ret void
+}
+
+define void @fdot_wide_v64f32(ptr %accptr, ptr %aptr, ptr %bptr) vscale_range(16,0) {
+; SVE2-LABEL: fdot_wide_v64f32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s, vl64
+; SVE2-NEXT: mov x8, #64 // =0x40
+; SVE2-NEXT: ld1h { z0.s }, p0/z, [x1]
+; SVE2-NEXT: ld1h { z1.s }, p0/z, [x2]
+; SVE2-NEXT: ld1h { z2.s }, p0/z, [x1, x8, lsl #1]
+; SVE2-NEXT: ld1h { z3.s }, p0/z, [x2, x8, lsl #1]
+; SVE2-NEXT: fcvt z0.s, p0/m, z0.h
+; SVE2-NEXT: fcvt z1.s, p0/m, z1.h
+; SVE2-NEXT: fcvt z2.s, p0/m, z2.h
+; SVE2-NEXT: fcvt z3.s, p0/m, z3.h
+; SVE2-NEXT: fmul z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ld1w { z1.s }, p0/z, [x0]
+; SVE2-NEXT: fmul z2.s, p0/m, z2.s, z3.s
+; SVE2-NEXT: fadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: fadd z0.s, p0/m, z0.s, z2.s
+; SVE2-NEXT: st1w { z0.s }, p0, [x0]
+; SVE2-NEXT: ret
+;
+; SVE2P1-LABEL: fdot_wide_v64f32:
+; SVE2P1: // %bb.0: // %entry
+; SVE2P1-NEXT: ptrue p0.s, vl64
+; SVE2P1-NEXT: ptrue p1.h, vl128
+; SVE2P1-NEXT: ld1w { z0.s }, p0/z, [x0]
+; SVE2P1-NEXT: ld1h { z1.h }, p1/z, [x1]
+; SVE2P1-NEXT: ld1h { z2.h }, p1/z, [x2]
+; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
+; SVE2P1-NEXT: st1w { z0.s }, p0, [x0]
+; SVE2P1-NEXT: ret
+entry:
+ %acc = load <64 x float>, ptr %accptr
+ %a = load <128 x half>, ptr %aptr
+ %b = load <128 x half>, ptr %bptr
+ %a.wide = fpext <128 x half> %a to <128 x float>
+ %b.wide = fpext <128 x half> %b to <128 x float>
+ %mult = fmul <128 x float> %a.wide, %b.wide
+ %partial.reduce = call <64 x float> @llvm.vector.partial.reduce.fadd(<64 x float> %acc, <128 x float> %mult)
+ store <64 x float> %partial.reduce, ptr %accptr
+ ret void
+}
+
+define <4 x float> @fixed_fdot_wide(<4 x float> %acc, <8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: fixed_fdot_wide:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-NEXT: fcvtl v4.4s, v2.4h
+; CHECK-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-NEXT: fcvtl2 v2.4s, v2.8h
+; CHECK-NEXT: fmul v3.4s, v3.4s, v4.4s
+; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %a.wide = fpext <8 x half> %a to <8 x float>
+ %b.wide = fpext <8 x half> %b to <8 x float>
+ %mult = fmul <8 x float> %a.wide, %b.wide
+ %partial.reduce = call <4 x float> @llvm.vector.partial.reduce.fadd(<4 x float> %acc, <8 x float> %mult)
+ ret <4 x float> %partial.reduce
+}
+
+define <8 x half> @partial_reduce_half(<8 x half> %acc, <16 x half> %a) {
+; CHECK-LABEL: partial_reduce_half:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: fadd v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
+entry:
+ %partial.reduce = call <8 x half> @llvm.vector.partial.reduce.fadd(<8 x half> %acc, <16 x half> %a)
+ ret <8 x half> %partial.reduce
+}
+
+define <4 x float> @partial_reduce_float(<4 x float> %acc, <8 x float> %a) {
+; CHECK-LABEL: partial_reduce_float:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ret
+entry:
+ %partial.reduce = call <4 x float> @llvm.vector.partial.reduce.fadd(<4 x float> %acc, <8 x float> %a)
+ ret <4 x float> %partial.reduce
+}
+
+define <2 x double> @partial_reduce_double(<2 x double> %acc, <4 x double> %a) {
+; CHECK-LABEL: partial_reduce_double:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: fadd v0.2d, v0.2d, v2.2d
+; CHECK-NEXT: ret
+entry:
+ %partial.reduce = call <2 x double> @llvm.vector.partial.reduce.fadd(<2 x double> %acc, <4 x double> %a)
+ ret <2 x double> %partial.reduce
+}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index 51df8c34cc55..54b1554ae5d0 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -7772,7 +7772,6 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
; GFX6-NEXT: s_ashr_i32 s8, s1, 31
@@ -7782,8 +7781,8 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX6-NEXT: s_xor_b64 s[10:11], s[0:1], s[8:9]
; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s10
; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s11
-; GFX6-NEXT: s_sub_u32 s12, 0, s10
-; GFX6-NEXT: s_subb_u32 s13, 0, s11
+; GFX6-NEXT: s_sub_u32 s0, 0, s10
+; GFX6-NEXT: s_subb_u32 s1, 0, s11
; GFX6-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX6-NEXT: v_rcp_f32_e32 v0, v0
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -7792,128 +7791,121 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT: v_mul_hi_u32 v2, s12, v0
-; GFX6-NEXT: v_readfirstlane_b32 s14, v1
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: s_mul_i32 s1, s12, s14
-; GFX6-NEXT: v_readfirstlane_b32 s17, v2
-; GFX6-NEXT: s_mul_i32 s15, s13, s0
-; GFX6-NEXT: s_mul_i32 s16, s12, s0
-; GFX6-NEXT: s_add_i32 s1, s17, s1
-; GFX6-NEXT: v_mul_hi_u32 v3, v0, s16
-; GFX6-NEXT: s_add_i32 s1, s1, s15
-; GFX6-NEXT: v_mul_hi_u32 v0, v0, s1
-; GFX6-NEXT: v_mul_hi_u32 v4, v1, s16
-; GFX6-NEXT: v_readfirstlane_b32 s15, v3
-; GFX6-NEXT: s_mul_i32 s17, s0, s1
-; GFX6-NEXT: v_mul_hi_u32 v1, v1, s1
-; GFX6-NEXT: s_add_u32 s15, s15, s17
-; GFX6-NEXT: v_readfirstlane_b32 s17, v0
-; GFX6-NEXT: s_addc_u32 s17, 0, s17
-; GFX6-NEXT: s_mul_i32 s16, s14, s16
-; GFX6-NEXT: v_readfirstlane_b32 s18, v4
-; GFX6-NEXT: s_add_u32 s15, s15, s16
-; GFX6-NEXT: s_addc_u32 s15, s17, s18
-; GFX6-NEXT: v_readfirstlane_b32 s16, v1
-; GFX6-NEXT: s_addc_u32 s16, s16, 0
-; GFX6-NEXT: s_mul_i32 s1, s14, s1
-; GFX6-NEXT: s_add_u32 s1, s15, s1
-; GFX6-NEXT: s_addc_u32 s15, 0, s16
-; GFX6-NEXT: s_add_u32 s16, s0, s1
-; GFX6-NEXT: v_mov_b32_e32 v0, s16
-; GFX6-NEXT: v_mul_hi_u32 v0, s12, v0
-; GFX6-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GFX6-NEXT: s_or_b32 s0, s0, s1
-; GFX6-NEXT: s_addc_u32 s14, s14, s15
-; GFX6-NEXT: s_mul_i32 s0, s12, s14
-; GFX6-NEXT: v_readfirstlane_b32 s1, v0
-; GFX6-NEXT: s_add_i32 s0, s1, s0
-; GFX6-NEXT: s_mul_i32 s13, s13, s16
-; GFX6-NEXT: s_mul_i32 s1, s12, s16
-; GFX6-NEXT: s_add_i32 s0, s0, s13
-; GFX6-NEXT: v_mov_b32_e32 v2, s1
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: v_mul_hi_u32 v3, s14, v2
-; GFX6-NEXT: v_mul_hi_u32 v2, s16, v2
-; GFX6-NEXT: v_mul_hi_u32 v1, s14, v0
-; GFX6-NEXT: v_mul_hi_u32 v0, s16, v0
-; GFX6-NEXT: s_mul_i32 s13, s16, s0
-; GFX6-NEXT: v_readfirstlane_b32 s17, v2
-; GFX6-NEXT: s_add_u32 s13, s17, s13
-; GFX6-NEXT: v_readfirstlane_b32 s15, v0
-; GFX6-NEXT: s_mul_i32 s1, s14, s1
-; GFX6-NEXT: s_addc_u32 s15, 0, s15
-; GFX6-NEXT: v_readfirstlane_b32 s12, v3
-; GFX6-NEXT: s_add_u32 s1, s13, s1
-; GFX6-NEXT: s_addc_u32 s1, s15, s12
+; GFX6-NEXT: v_mul_hi_u32 v2, s0, v0
; GFX6-NEXT: v_readfirstlane_b32 s12, v1
-; GFX6-NEXT: s_addc_u32 s12, s12, 0
-; GFX6-NEXT: s_mul_i32 s0, s14, s0
-; GFX6-NEXT: s_add_u32 s0, s1, s0
-; GFX6-NEXT: s_addc_u32 s12, 0, s12
-; GFX6-NEXT: s_add_u32 s15, s16, s0
-; GFX6-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GFX6-NEXT: s_or_b32 s0, s0, s1
-; GFX6-NEXT: s_addc_u32 s14, s14, s12
+; GFX6-NEXT: v_readfirstlane_b32 s2, v0
+; GFX6-NEXT: s_mul_i32 s13, s0, s12
+; GFX6-NEXT: v_readfirstlane_b32 s16, v2
+; GFX6-NEXT: s_mul_i32 s14, s1, s2
+; GFX6-NEXT: s_mul_i32 s15, s0, s2
+; GFX6-NEXT: s_add_i32 s13, s16, s13
+; GFX6-NEXT: v_mul_hi_u32 v3, v0, s15
+; GFX6-NEXT: s_add_i32 s13, s13, s14
+; GFX6-NEXT: v_mul_hi_u32 v0, v0, s13
+; GFX6-NEXT: v_mul_hi_u32 v4, v1, s15
+; GFX6-NEXT: v_readfirstlane_b32 s14, v3
+; GFX6-NEXT: s_mul_i32 s16, s2, s13
+; GFX6-NEXT: v_mul_hi_u32 v1, v1, s13
+; GFX6-NEXT: s_add_u32 s14, s14, s16
+; GFX6-NEXT: v_readfirstlane_b32 s16, v0
+; GFX6-NEXT: s_mul_i32 s15, s12, s15
+; GFX6-NEXT: s_addc_u32 s16, 0, s16
+; GFX6-NEXT: v_readfirstlane_b32 s17, v4
+; GFX6-NEXT: s_add_u32 s14, s14, s15
+; GFX6-NEXT: s_addc_u32 s14, s16, s17
+; GFX6-NEXT: v_readfirstlane_b32 s15, v1
+; GFX6-NEXT: s_addc_u32 s15, s15, 0
+; GFX6-NEXT: s_mul_i32 s13, s12, s13
+; GFX6-NEXT: s_add_u32 s13, s14, s13
+; GFX6-NEXT: s_addc_u32 s14, 0, s15
+; GFX6-NEXT: s_add_u32 s13, s2, s13
+; GFX6-NEXT: v_mov_b32_e32 v0, s13
+; GFX6-NEXT: v_mul_hi_u32 v0, s0, v0
+; GFX6-NEXT: s_addc_u32 s12, s12, s14
+; GFX6-NEXT: s_mul_i32 s14, s0, s12
+; GFX6-NEXT: s_mul_i32 s1, s1, s13
+; GFX6-NEXT: v_readfirstlane_b32 s15, v0
+; GFX6-NEXT: s_add_i32 s14, s15, s14
+; GFX6-NEXT: s_mul_i32 s0, s0, s13
+; GFX6-NEXT: s_add_i32 s1, s14, s1
+; GFX6-NEXT: v_mov_b32_e32 v2, s0
+; GFX6-NEXT: v_mov_b32_e32 v0, s1
+; GFX6-NEXT: v_mul_hi_u32 v3, s12, v2
+; GFX6-NEXT: v_mul_hi_u32 v2, s13, v2
+; GFX6-NEXT: v_mul_hi_u32 v1, s12, v0
+; GFX6-NEXT: v_mul_hi_u32 v0, s13, v0
+; GFX6-NEXT: s_mul_i32 s15, s13, s1
+; GFX6-NEXT: v_readfirstlane_b32 s17, v2
+; GFX6-NEXT: s_add_u32 s15, s17, s15
+; GFX6-NEXT: v_readfirstlane_b32 s16, v0
+; GFX6-NEXT: s_mul_i32 s0, s12, s0
+; GFX6-NEXT: s_addc_u32 s16, 0, s16
+; GFX6-NEXT: v_readfirstlane_b32 s14, v3
+; GFX6-NEXT: s_add_u32 s0, s15, s0
+; GFX6-NEXT: s_addc_u32 s0, s16, s14
+; GFX6-NEXT: v_readfirstlane_b32 s14, v1
+; GFX6-NEXT: s_addc_u32 s14, s14, 0
+; GFX6-NEXT: s_mul_i32 s1, s12, s1
+; GFX6-NEXT: s_add_u32 s0, s0, s1
+; GFX6-NEXT: s_addc_u32 s1, 0, s14
+; GFX6-NEXT: s_add_u32 s14, s13, s0
+; GFX6-NEXT: s_addc_u32 s15, s12, s1
; GFX6-NEXT: s_ashr_i32 s12, s7, 31
; GFX6-NEXT: s_add_u32 s0, s6, s12
; GFX6-NEXT: s_mov_b32 s13, s12
; GFX6-NEXT: s_addc_u32 s1, s7, s12
; GFX6-NEXT: s_xor_b64 s[6:7], s[0:1], s[12:13]
-; GFX6-NEXT: v_mov_b32_e32 v0, s14
+; GFX6-NEXT: v_mov_b32_e32 v0, s15
; GFX6-NEXT: v_mul_hi_u32 v1, s6, v0
-; GFX6-NEXT: v_mov_b32_e32 v2, s15
+; GFX6-NEXT: v_mov_b32_e32 v2, s14
; GFX6-NEXT: v_mul_hi_u32 v3, s6, v2
; GFX6-NEXT: s_mov_b32 s0, s4
; GFX6-NEXT: v_readfirstlane_b32 s4, v1
; GFX6-NEXT: v_mul_hi_u32 v1, s7, v2
-; GFX6-NEXT: s_mul_i32 s1, s6, s14
+; GFX6-NEXT: s_mul_i32 s1, s6, s15
; GFX6-NEXT: v_readfirstlane_b32 s16, v3
; GFX6-NEXT: v_mul_hi_u32 v0, s7, v0
; GFX6-NEXT: s_add_u32 s1, s16, s1
; GFX6-NEXT: s_addc_u32 s4, 0, s4
-; GFX6-NEXT: s_mul_i32 s15, s7, s15
+; GFX6-NEXT: s_mul_i32 s14, s7, s14
; GFX6-NEXT: v_readfirstlane_b32 s16, v1
-; GFX6-NEXT: s_add_u32 s1, s1, s15
+; GFX6-NEXT: s_add_u32 s1, s1, s14
; GFX6-NEXT: s_addc_u32 s1, s4, s16
; GFX6-NEXT: v_readfirstlane_b32 s4, v0
; GFX6-NEXT: s_addc_u32 s4, s4, 0
-; GFX6-NEXT: s_mul_i32 s14, s7, s14
-; GFX6-NEXT: s_add_u32 s16, s1, s14
-; GFX6-NEXT: v_mov_b32_e32 v0, s16
+; GFX6-NEXT: s_mul_i32 s14, s7, s15
+; GFX6-NEXT: s_add_u32 s14, s1, s14
+; GFX6-NEXT: v_mov_b32_e32 v0, s14
; GFX6-NEXT: v_mul_hi_u32 v0, s10, v0
-; GFX6-NEXT: s_addc_u32 s17, 0, s4
+; GFX6-NEXT: s_addc_u32 s15, 0, s4
; GFX6-NEXT: s_mov_b32 s1, s5
-; GFX6-NEXT: s_mul_i32 s4, s10, s17
+; GFX6-NEXT: s_mul_i32 s4, s10, s15
; GFX6-NEXT: v_readfirstlane_b32 s5, v0
; GFX6-NEXT: s_add_i32 s4, s5, s4
-; GFX6-NEXT: s_mul_i32 s5, s11, s16
-; GFX6-NEXT: s_add_i32 s18, s4, s5
-; GFX6-NEXT: s_sub_i32 s14, s7, s18
-; GFX6-NEXT: s_mul_i32 s4, s10, s16
+; GFX6-NEXT: s_mul_i32 s5, s11, s14
+; GFX6-NEXT: s_add_i32 s16, s4, s5
+; GFX6-NEXT: s_sub_i32 s17, s7, s16
+; GFX6-NEXT: s_mul_i32 s4, s10, s14
; GFX6-NEXT: s_sub_u32 s6, s6, s4
; GFX6-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GFX6-NEXT: s_or_b32 s15, s4, s5
-; GFX6-NEXT: s_subb_u32 s19, s14, s11
-; GFX6-NEXT: s_sub_u32 s20, s6, s10
-; GFX6-NEXT: s_cselect_b64 s[14:15], -1, 0
-; GFX6-NEXT: s_or_b32 s14, s14, s15
-; GFX6-NEXT: s_subb_u32 s14, s19, 0
-; GFX6-NEXT: s_cmp_ge_u32 s14, s11
-; GFX6-NEXT: s_cselect_b32 s15, -1, 0
-; GFX6-NEXT: s_cmp_ge_u32 s20, s10
+; GFX6-NEXT: s_subb_u32 s17, s17, s11
+; GFX6-NEXT: s_sub_u32 s18, s6, s10
+; GFX6-NEXT: s_subb_u32 s17, s17, 0
+; GFX6-NEXT: s_cmp_ge_u32 s17, s11
; GFX6-NEXT: s_cselect_b32 s19, -1, 0
-; GFX6-NEXT: s_cmp_eq_u32 s14, s11
-; GFX6-NEXT: s_cselect_b32 s14, s19, s15
-; GFX6-NEXT: s_add_u32 s15, s16, 1
-; GFX6-NEXT: s_addc_u32 s19, s17, 0
-; GFX6-NEXT: s_add_u32 s20, s16, 2
-; GFX6-NEXT: s_addc_u32 s21, s17, 0
-; GFX6-NEXT: s_cmp_lg_u32 s14, 0
-; GFX6-NEXT: s_cselect_b32 s14, s20, s15
-; GFX6-NEXT: s_cselect_b32 s15, s21, s19
+; GFX6-NEXT: s_cmp_ge_u32 s18, s10
+; GFX6-NEXT: s_cselect_b32 s18, -1, 0
+; GFX6-NEXT: s_cmp_eq_u32 s17, s11
+; GFX6-NEXT: s_cselect_b32 s17, s18, s19
+; GFX6-NEXT: s_add_u32 s18, s14, 1
+; GFX6-NEXT: s_addc_u32 s19, s15, 0
+; GFX6-NEXT: s_add_u32 s20, s14, 2
+; GFX6-NEXT: s_addc_u32 s21, s15, 0
+; GFX6-NEXT: s_cmp_lg_u32 s17, 0
+; GFX6-NEXT: s_cselect_b32 s17, s20, s18
+; GFX6-NEXT: s_cselect_b32 s18, s21, s19
; GFX6-NEXT: s_or_b32 s4, s4, s5
-; GFX6-NEXT: s_subb_u32 s4, s7, s18
+; GFX6-NEXT: s_subb_u32 s4, s7, s16
; GFX6-NEXT: s_cmp_ge_u32 s4, s11
; GFX6-NEXT: s_cselect_b32 s5, -1, 0
; GFX6-NEXT: s_cmp_ge_u32 s6, s10
@@ -7921,13 +7913,14 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX6-NEXT: s_cmp_eq_u32 s4, s11
; GFX6-NEXT: s_cselect_b32 s4, s6, s5
; GFX6-NEXT: s_cmp_lg_u32 s4, 0
-; GFX6-NEXT: s_cselect_b32 s5, s15, s17
-; GFX6-NEXT: s_cselect_b32 s4, s14, s16
+; GFX6-NEXT: s_cselect_b32 s5, s18, s15
+; GFX6-NEXT: s_cselect_b32 s4, s17, s14
; GFX6-NEXT: s_xor_b64 s[6:7], s[12:13], s[8:9]
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
; GFX6-NEXT: s_sub_u32 s4, s4, s6
; GFX6-NEXT: s_subb_u32 s5, s5, s7
; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
@@ -8278,8 +8271,8 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[2:3]
; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s6
; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s7
-; GFX6-NEXT: s_sub_u32 s14, 0, s6
-; GFX6-NEXT: s_subb_u32 s15, 0, s7
+; GFX6-NEXT: s_sub_u32 s12, 0, s6
+; GFX6-NEXT: s_subb_u32 s13, 0, s7
; GFX6-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GFX6-NEXT: v_rcp_f32_e32 v0, v0
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -8288,69 +8281,65 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT: v_mul_hi_u32 v2, s14, v0
-; GFX6-NEXT: v_readfirstlane_b32 s16, v1
-; GFX6-NEXT: v_readfirstlane_b32 s12, v0
-; GFX6-NEXT: s_mul_i32 s13, s14, s16
+; GFX6-NEXT: v_mul_hi_u32 v2, s12, v0
+; GFX6-NEXT: v_readfirstlane_b32 s14, v1
+; GFX6-NEXT: v_readfirstlane_b32 s15, v0
+; GFX6-NEXT: s_mul_i32 s16, s12, s14
; GFX6-NEXT: v_readfirstlane_b32 s19, v2
-; GFX6-NEXT: s_mul_i32 s17, s15, s12
-; GFX6-NEXT: s_mul_i32 s18, s14, s12
-; GFX6-NEXT: s_add_i32 s13, s19, s13
+; GFX6-NEXT: s_mul_i32 s17, s13, s15
+; GFX6-NEXT: s_mul_i32 s18, s12, s15
+; GFX6-NEXT: s_add_i32 s16, s19, s16
; GFX6-NEXT: v_mul_hi_u32 v3, v0, s18
-; GFX6-NEXT: s_add_i32 s13, s13, s17
-; GFX6-NEXT: v_mul_hi_u32 v0, v0, s13
+; GFX6-NEXT: s_add_i32 s16, s16, s17
+; GFX6-NEXT: v_mul_hi_u32 v0, v0, s16
; GFX6-NEXT: v_mul_hi_u32 v4, v1, s18
; GFX6-NEXT: v_readfirstlane_b32 s17, v3
-; GFX6-NEXT: s_mul_i32 s20, s12, s13
-; GFX6-NEXT: v_mul_hi_u32 v1, v1, s13
+; GFX6-NEXT: s_mul_i32 s20, s15, s16
+; GFX6-NEXT: v_mul_hi_u32 v1, v1, s16
; GFX6-NEXT: s_add_u32 s17, s17, s20
; GFX6-NEXT: v_readfirstlane_b32 s20, v0
-; GFX6-NEXT: s_mul_i32 s18, s16, s18
+; GFX6-NEXT: s_mul_i32 s18, s14, s18
; GFX6-NEXT: s_addc_u32 s20, 0, s20
; GFX6-NEXT: v_readfirstlane_b32 s19, v4
; GFX6-NEXT: s_add_u32 s17, s17, s18
; GFX6-NEXT: s_addc_u32 s17, s20, s19
; GFX6-NEXT: v_readfirstlane_b32 s18, v1
; GFX6-NEXT: s_addc_u32 s18, s18, 0
-; GFX6-NEXT: s_mul_i32 s13, s16, s13
-; GFX6-NEXT: s_add_u32 s13, s17, s13
+; GFX6-NEXT: s_mul_i32 s16, s14, s16
+; GFX6-NEXT: s_add_u32 s16, s17, s16
; GFX6-NEXT: s_addc_u32 s17, 0, s18
-; GFX6-NEXT: s_add_u32 s18, s12, s13
-; GFX6-NEXT: v_mov_b32_e32 v0, s18
-; GFX6-NEXT: v_mul_hi_u32 v0, s14, v0
-; GFX6-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GFX6-NEXT: s_or_b32 s12, s12, s13
-; GFX6-NEXT: s_addc_u32 s16, s16, s17
-; GFX6-NEXT: s_mul_i32 s12, s14, s16
-; GFX6-NEXT: v_readfirstlane_b32 s13, v0
-; GFX6-NEXT: s_add_i32 s12, s13, s12
-; GFX6-NEXT: s_mul_i32 s15, s15, s18
-; GFX6-NEXT: s_mul_i32 s13, s14, s18
-; GFX6-NEXT: s_add_i32 s12, s12, s15
-; GFX6-NEXT: v_mov_b32_e32 v2, s13
-; GFX6-NEXT: v_mov_b32_e32 v0, s12
-; GFX6-NEXT: v_mul_hi_u32 v3, s16, v2
-; GFX6-NEXT: v_mul_hi_u32 v2, s18, v2
-; GFX6-NEXT: v_mul_hi_u32 v1, s16, v0
-; GFX6-NEXT: v_mul_hi_u32 v0, s18, v0
-; GFX6-NEXT: s_mul_i32 s15, s18, s12
-; GFX6-NEXT: v_readfirstlane_b32 s19, v2
-; GFX6-NEXT: s_add_u32 s15, s19, s15
+; GFX6-NEXT: s_add_u32 s15, s15, s16
+; GFX6-NEXT: v_mov_b32_e32 v0, s15
+; GFX6-NEXT: v_mul_hi_u32 v0, s12, v0
+; GFX6-NEXT: s_addc_u32 s14, s14, s17
+; GFX6-NEXT: s_mul_i32 s16, s12, s14
+; GFX6-NEXT: s_mul_i32 s13, s13, s15
; GFX6-NEXT: v_readfirstlane_b32 s17, v0
-; GFX6-NEXT: s_mul_i32 s13, s16, s13
-; GFX6-NEXT: s_addc_u32 s17, 0, s17
-; GFX6-NEXT: v_readfirstlane_b32 s14, v3
-; GFX6-NEXT: s_add_u32 s13, s15, s13
-; GFX6-NEXT: s_addc_u32 s13, s17, s14
-; GFX6-NEXT: v_readfirstlane_b32 s14, v1
-; GFX6-NEXT: s_addc_u32 s14, s14, 0
-; GFX6-NEXT: s_mul_i32 s12, s16, s12
-; GFX6-NEXT: s_add_u32 s12, s13, s12
-; GFX6-NEXT: s_addc_u32 s14, 0, s14
-; GFX6-NEXT: s_add_u32 s15, s18, s12
-; GFX6-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GFX6-NEXT: s_or_b32 s12, s12, s13
-; GFX6-NEXT: s_addc_u32 s14, s16, s14
+; GFX6-NEXT: s_add_i32 s16, s17, s16
+; GFX6-NEXT: s_mul_i32 s12, s12, s15
+; GFX6-NEXT: s_add_i32 s13, s16, s13
+; GFX6-NEXT: v_mov_b32_e32 v2, s12
+; GFX6-NEXT: v_mov_b32_e32 v0, s13
+; GFX6-NEXT: v_mul_hi_u32 v3, s14, v2
+; GFX6-NEXT: v_mul_hi_u32 v2, s15, v2
+; GFX6-NEXT: v_mul_hi_u32 v1, s14, v0
+; GFX6-NEXT: v_mul_hi_u32 v0, s15, v0
+; GFX6-NEXT: s_mul_i32 s17, s15, s13
+; GFX6-NEXT: v_readfirstlane_b32 s19, v2
+; GFX6-NEXT: s_add_u32 s17, s19, s17
+; GFX6-NEXT: v_readfirstlane_b32 s18, v0
+; GFX6-NEXT: s_mul_i32 s12, s14, s12
+; GFX6-NEXT: s_addc_u32 s18, 0, s18
+; GFX6-NEXT: v_readfirstlane_b32 s16, v3
+; GFX6-NEXT: s_add_u32 s12, s17, s12
+; GFX6-NEXT: s_addc_u32 s12, s18, s16
+; GFX6-NEXT: v_readfirstlane_b32 s16, v1
+; GFX6-NEXT: s_addc_u32 s16, s16, 0
+; GFX6-NEXT: s_mul_i32 s13, s14, s13
+; GFX6-NEXT: s_add_u32 s12, s12, s13
+; GFX6-NEXT: s_addc_u32 s13, 0, s16
+; GFX6-NEXT: s_add_u32 s15, s15, s12
+; GFX6-NEXT: s_addc_u32 s14, s14, s13
; GFX6-NEXT: s_ashr_i32 s12, s9, 31
; GFX6-NEXT: s_add_u32 s8, s8, s12
; GFX6-NEXT: s_mov_b32 s13, s12
@@ -8374,40 +8363,37 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: v_readfirstlane_b32 s16, v0
; GFX6-NEXT: s_addc_u32 s16, s16, 0
; GFX6-NEXT: s_mul_i32 s14, s9, s14
-; GFX6-NEXT: s_add_u32 s18, s15, s14
-; GFX6-NEXT: v_mov_b32_e32 v0, s18
+; GFX6-NEXT: s_add_u32 s17, s15, s14
+; GFX6-NEXT: v_mov_b32_e32 v0, s17
; GFX6-NEXT: v_mul_hi_u32 v0, s6, v0
-; GFX6-NEXT: s_addc_u32 s19, 0, s16
-; GFX6-NEXT: s_mul_i32 s14, s6, s19
+; GFX6-NEXT: s_addc_u32 s16, 0, s16
+; GFX6-NEXT: s_mul_i32 s14, s6, s16
; GFX6-NEXT: v_readfirstlane_b32 s15, v0
; GFX6-NEXT: s_add_i32 s14, s15, s14
-; GFX6-NEXT: s_mul_i32 s15, s7, s18
-; GFX6-NEXT: s_add_i32 s20, s14, s15
-; GFX6-NEXT: s_sub_i32 s16, s9, s20
-; GFX6-NEXT: s_mul_i32 s14, s6, s18
+; GFX6-NEXT: s_mul_i32 s15, s7, s17
+; GFX6-NEXT: s_add_i32 s18, s14, s15
+; GFX6-NEXT: s_sub_i32 s19, s9, s18
+; GFX6-NEXT: s_mul_i32 s14, s6, s17
; GFX6-NEXT: s_sub_u32 s8, s8, s14
; GFX6-NEXT: s_cselect_b64 s[14:15], -1, 0
-; GFX6-NEXT: s_or_b32 s17, s14, s15
-; GFX6-NEXT: s_subb_u32 s21, s16, s7
-; GFX6-NEXT: s_sub_u32 s22, s8, s6
-; GFX6-NEXT: s_cselect_b64 s[16:17], -1, 0
-; GFX6-NEXT: s_or_b32 s16, s16, s17
-; GFX6-NEXT: s_subb_u32 s16, s21, 0
-; GFX6-NEXT: s_cmp_ge_u32 s16, s7
-; GFX6-NEXT: s_cselect_b32 s17, -1, 0
-; GFX6-NEXT: s_cmp_ge_u32 s22, s6
+; GFX6-NEXT: s_subb_u32 s19, s19, s7
+; GFX6-NEXT: s_sub_u32 s20, s8, s6
+; GFX6-NEXT: s_subb_u32 s19, s19, 0
+; GFX6-NEXT: s_cmp_ge_u32 s19, s7
; GFX6-NEXT: s_cselect_b32 s21, -1, 0
-; GFX6-NEXT: s_cmp_eq_u32 s16, s7
-; GFX6-NEXT: s_cselect_b32 s16, s21, s17
-; GFX6-NEXT: s_add_u32 s17, s18, 1
-; GFX6-NEXT: s_addc_u32 s21, s19, 0
-; GFX6-NEXT: s_add_u32 s22, s18, 2
-; GFX6-NEXT: s_addc_u32 s23, s19, 0
-; GFX6-NEXT: s_cmp_lg_u32 s16, 0
-; GFX6-NEXT: s_cselect_b32 s16, s22, s17
-; GFX6-NEXT: s_cselect_b32 s17, s23, s21
+; GFX6-NEXT: s_cmp_ge_u32 s20, s6
+; GFX6-NEXT: s_cselect_b32 s20, -1, 0
+; GFX6-NEXT: s_cmp_eq_u32 s19, s7
+; GFX6-NEXT: s_cselect_b32 s19, s20, s21
+; GFX6-NEXT: s_add_u32 s20, s17, 1
+; GFX6-NEXT: s_addc_u32 s21, s16, 0
+; GFX6-NEXT: s_add_u32 s22, s17, 2
+; GFX6-NEXT: s_addc_u32 s23, s16, 0
+; GFX6-NEXT: s_cmp_lg_u32 s19, 0
+; GFX6-NEXT: s_cselect_b32 s19, s22, s20
+; GFX6-NEXT: s_cselect_b32 s20, s23, s21
; GFX6-NEXT: s_or_b32 s14, s14, s15
-; GFX6-NEXT: s_subb_u32 s9, s9, s20
+; GFX6-NEXT: s_subb_u32 s9, s9, s18
; GFX6-NEXT: s_cmp_ge_u32 s9, s7
; GFX6-NEXT: s_cselect_b32 s14, -1, 0
; GFX6-NEXT: s_cmp_ge_u32 s8, s6
@@ -8415,12 +8401,12 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: s_cmp_eq_u32 s9, s7
; GFX6-NEXT: s_cselect_b32 s6, s6, s14
; GFX6-NEXT: s_cmp_lg_u32 s6, 0
-; GFX6-NEXT: s_cselect_b32 s7, s17, s19
-; GFX6-NEXT: s_cselect_b32 s6, s16, s18
+; GFX6-NEXT: s_cselect_b32 s7, s20, s16
+; GFX6-NEXT: s_cselect_b32 s6, s19, s17
; GFX6-NEXT: s_xor_b64 s[2:3], s[12:13], s[2:3]
; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[2:3]
-; GFX6-NEXT: s_sub_u32 s16, s6, s2
-; GFX6-NEXT: s_subb_u32 s17, s7, s3
+; GFX6-NEXT: s_sub_u32 s14, s6, s2
+; GFX6-NEXT: s_subb_u32 s15, s7, s3
; GFX6-NEXT: s_ashr_i32 s6, s1, 31
; GFX6-NEXT: s_add_u32 s0, s0, s6
; GFX6-NEXT: s_mov_b32 s7, s6
@@ -8428,8 +8414,8 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: s_xor_b64 s[8:9], s[0:1], s[6:7]
; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s8
; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s9
-; GFX6-NEXT: s_sub_u32 s12, 0, s8
-; GFX6-NEXT: s_subb_u32 s13, 0, s9
+; GFX6-NEXT: s_sub_u32 s2, 0, s8
+; GFX6-NEXT: s_subb_u32 s3, 0, s9
; GFX6-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GFX6-NEXT: v_rcp_f32_e32 v0, v0
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -8438,128 +8424,121 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT: v_mul_hi_u32 v2, s12, v0
-; GFX6-NEXT: v_readfirstlane_b32 s14, v1
-; GFX6-NEXT: v_readfirstlane_b32 s2, v0
-; GFX6-NEXT: s_mul_i32 s1, s12, s14
-; GFX6-NEXT: v_readfirstlane_b32 s3, v2
-; GFX6-NEXT: s_mul_i32 s0, s13, s2
-; GFX6-NEXT: s_add_i32 s1, s3, s1
-; GFX6-NEXT: s_add_i32 s3, s1, s0
-; GFX6-NEXT: s_mul_i32 s15, s12, s2
-; GFX6-NEXT: v_mul_hi_u32 v2, v0, s3
-; GFX6-NEXT: v_mul_hi_u32 v0, v0, s15
-; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GFX6-NEXT: s_mul_i32 s4, s2, s3
-; GFX6-NEXT: v_readfirstlane_b32 s5, v2
+; GFX6-NEXT: v_mul_hi_u32 v2, s2, v0
+; GFX6-NEXT: v_readfirstlane_b32 s12, v1
+; GFX6-NEXT: v_readfirstlane_b32 s0, v0
+; GFX6-NEXT: s_mul_i32 s13, s2, s12
+; GFX6-NEXT: v_readfirstlane_b32 s16, v2
+; GFX6-NEXT: s_mul_i32 s1, s3, s0
+; GFX6-NEXT: s_add_i32 s13, s16, s13
+; GFX6-NEXT: s_add_i32 s13, s13, s1
+; GFX6-NEXT: s_mul_i32 s1, s2, s0
+; GFX6-NEXT: v_mul_hi_u32 v2, v0, s13
+; GFX6-NEXT: v_mul_hi_u32 v0, v0, s1
+; GFX6-NEXT: s_mul_i32 s16, s0, s13
+; GFX6-NEXT: v_readfirstlane_b32 s17, v2
; GFX6-NEXT: v_readfirstlane_b32 s18, v0
-; GFX6-NEXT: v_mul_hi_u32 v0, v1, s15
-; GFX6-NEXT: v_mul_hi_u32 v1, v1, s3
-; GFX6-NEXT: s_add_u32 s4, s18, s4
-; GFX6-NEXT: s_addc_u32 s5, 0, s5
-; GFX6-NEXT: s_mul_i32 s15, s14, s15
+; GFX6-NEXT: v_mul_hi_u32 v0, v1, s1
+; GFX6-NEXT: v_mul_hi_u32 v1, v1, s13
+; GFX6-NEXT: s_add_u32 s16, s18, s16
+; GFX6-NEXT: s_addc_u32 s17, 0, s17
+; GFX6-NEXT: s_mul_i32 s1, s12, s1
; GFX6-NEXT: v_readfirstlane_b32 s18, v0
-; GFX6-NEXT: s_add_u32 s4, s4, s15
-; GFX6-NEXT: s_addc_u32 s4, s5, s18
-; GFX6-NEXT: v_readfirstlane_b32 s5, v1
-; GFX6-NEXT: s_addc_u32 s5, s5, 0
-; GFX6-NEXT: s_mul_i32 s3, s14, s3
-; GFX6-NEXT: s_add_u32 s3, s4, s3
-; GFX6-NEXT: s_addc_u32 s4, 0, s5
-; GFX6-NEXT: s_add_u32 s5, s2, s3
-; GFX6-NEXT: v_mov_b32_e32 v0, s5
-; GFX6-NEXT: v_mul_hi_u32 v0, s12, v0
-; GFX6-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GFX6-NEXT: s_or_b32 s2, s2, s3
-; GFX6-NEXT: s_addc_u32 s4, s14, s4
-; GFX6-NEXT: s_mul_i32 s2, s12, s4
-; GFX6-NEXT: v_readfirstlane_b32 s3, v0
-; GFX6-NEXT: s_add_i32 s2, s3, s2
-; GFX6-NEXT: s_mul_i32 s13, s13, s5
-; GFX6-NEXT: s_mul_i32 s3, s12, s5
-; GFX6-NEXT: s_add_i32 s2, s2, s13
-; GFX6-NEXT: v_mov_b32_e32 v2, s3
-; GFX6-NEXT: v_mov_b32_e32 v0, s2
+; GFX6-NEXT: s_add_u32 s1, s16, s1
+; GFX6-NEXT: s_addc_u32 s1, s17, s18
+; GFX6-NEXT: v_readfirstlane_b32 s16, v1
+; GFX6-NEXT: s_addc_u32 s16, s16, 0
+; GFX6-NEXT: s_mul_i32 s13, s12, s13
+; GFX6-NEXT: s_add_u32 s1, s1, s13
+; GFX6-NEXT: s_addc_u32 s13, 0, s16
+; GFX6-NEXT: s_add_u32 s16, s0, s1
+; GFX6-NEXT: v_mov_b32_e32 v0, s16
+; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
+; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX6-NEXT: s_addc_u32 s4, s12, s13
+; GFX6-NEXT: s_mul_i32 s5, s2, s4
+; GFX6-NEXT: v_readfirstlane_b32 s12, v0
+; GFX6-NEXT: s_add_i32 s5, s12, s5
+; GFX6-NEXT: s_mul_i32 s3, s3, s16
+; GFX6-NEXT: s_mul_i32 s2, s2, s16
+; GFX6-NEXT: s_add_i32 s3, s5, s3
+; GFX6-NEXT: v_mov_b32_e32 v2, s2
+; GFX6-NEXT: v_mov_b32_e32 v0, s3
; GFX6-NEXT: v_mul_hi_u32 v3, s4, v2
-; GFX6-NEXT: v_mul_hi_u32 v2, s5, v2
+; GFX6-NEXT: v_mul_hi_u32 v2, s16, v2
; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0
-; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
-; GFX6-NEXT: s_mul_i32 s13, s5, s2
-; GFX6-NEXT: v_readfirstlane_b32 s15, v2
-; GFX6-NEXT: s_add_u32 s13, s15, s13
-; GFX6-NEXT: v_readfirstlane_b32 s14, v0
-; GFX6-NEXT: s_mul_i32 s3, s4, s3
-; GFX6-NEXT: s_addc_u32 s14, 0, s14
-; GFX6-NEXT: v_readfirstlane_b32 s12, v3
-; GFX6-NEXT: s_add_u32 s3, s13, s3
-; GFX6-NEXT: s_addc_u32 s3, s14, s12
-; GFX6-NEXT: v_readfirstlane_b32 s12, v1
-; GFX6-NEXT: s_addc_u32 s12, s12, 0
+; GFX6-NEXT: v_mul_hi_u32 v0, s16, v0
+; GFX6-NEXT: s_mul_i32 s12, s16, s3
+; GFX6-NEXT: v_readfirstlane_b32 s17, v2
+; GFX6-NEXT: s_add_u32 s12, s17, s12
+; GFX6-NEXT: v_readfirstlane_b32 s13, v0
; GFX6-NEXT: s_mul_i32 s2, s4, s2
-; GFX6-NEXT: s_add_u32 s2, s3, s2
-; GFX6-NEXT: s_addc_u32 s12, 0, s12
-; GFX6-NEXT: s_add_u32 s13, s5, s2
-; GFX6-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GFX6-NEXT: s_or_b32 s2, s2, s3
-; GFX6-NEXT: s_addc_u32 s12, s4, s12
+; GFX6-NEXT: s_addc_u32 s13, 0, s13
+; GFX6-NEXT: v_readfirstlane_b32 s5, v3
+; GFX6-NEXT: s_add_u32 s2, s12, s2
+; GFX6-NEXT: s_addc_u32 s2, s13, s5
+; GFX6-NEXT: v_readfirstlane_b32 s5, v1
+; GFX6-NEXT: s_addc_u32 s5, s5, 0
+; GFX6-NEXT: s_mul_i32 s3, s4, s3
+; GFX6-NEXT: s_add_u32 s2, s2, s3
+; GFX6-NEXT: s_addc_u32 s3, 0, s5
+; GFX6-NEXT: s_add_u32 s12, s16, s2
+; GFX6-NEXT: s_addc_u32 s13, s4, s3
; GFX6-NEXT: s_ashr_i32 s4, s11, 31
; GFX6-NEXT: s_add_u32 s2, s10, s4
; GFX6-NEXT: s_mov_b32 s5, s4
; GFX6-NEXT: s_addc_u32 s3, s11, s4
; GFX6-NEXT: s_xor_b64 s[10:11], s[2:3], s[4:5]
-; GFX6-NEXT: v_mov_b32_e32 v0, s12
+; GFX6-NEXT: v_mov_b32_e32 v0, s13
; GFX6-NEXT: v_mul_hi_u32 v1, s10, v0
-; GFX6-NEXT: v_mov_b32_e32 v2, s13
+; GFX6-NEXT: v_mov_b32_e32 v2, s12
; GFX6-NEXT: v_mul_hi_u32 v3, s10, v2
-; GFX6-NEXT: s_mul_i32 s2, s10, s12
-; GFX6-NEXT: v_readfirstlane_b32 s14, v1
+; GFX6-NEXT: s_mul_i32 s2, s10, s13
+; GFX6-NEXT: v_readfirstlane_b32 s16, v1
; GFX6-NEXT: v_mul_hi_u32 v1, s11, v2
-; GFX6-NEXT: v_readfirstlane_b32 s15, v3
+; GFX6-NEXT: v_readfirstlane_b32 s17, v3
; GFX6-NEXT: v_mul_hi_u32 v0, s11, v0
-; GFX6-NEXT: s_add_u32 s2, s15, s2
-; GFX6-NEXT: s_addc_u32 s14, 0, s14
-; GFX6-NEXT: s_mul_i32 s13, s11, s13
-; GFX6-NEXT: v_readfirstlane_b32 s15, v1
-; GFX6-NEXT: s_add_u32 s2, s2, s13
-; GFX6-NEXT: s_addc_u32 s2, s14, s15
-; GFX6-NEXT: v_readfirstlane_b32 s13, v0
-; GFX6-NEXT: s_addc_u32 s13, s13, 0
+; GFX6-NEXT: s_add_u32 s2, s17, s2
+; GFX6-NEXT: s_addc_u32 s16, 0, s16
; GFX6-NEXT: s_mul_i32 s12, s11, s12
-; GFX6-NEXT: s_add_u32 s18, s2, s12
-; GFX6-NEXT: v_mov_b32_e32 v0, s18
+; GFX6-NEXT: v_readfirstlane_b32 s17, v1
+; GFX6-NEXT: s_add_u32 s2, s2, s12
+; GFX6-NEXT: s_addc_u32 s2, s16, s17
+; GFX6-NEXT: v_readfirstlane_b32 s12, v0
+; GFX6-NEXT: s_addc_u32 s12, s12, 0
+; GFX6-NEXT: s_mul_i32 s13, s11, s13
+; GFX6-NEXT: s_add_u32 s16, s2, s13
+; GFX6-NEXT: v_mov_b32_e32 v0, s16
; GFX6-NEXT: v_mul_hi_u32 v0, s8, v0
-; GFX6-NEXT: s_addc_u32 s19, 0, s13
-; GFX6-NEXT: s_mul_i32 s12, s8, s19
+; GFX6-NEXT: s_addc_u32 s17, 0, s12
+; GFX6-NEXT: s_mul_i32 s12, s8, s17
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_readfirstlane_b32 s13, v0
; GFX6-NEXT: s_add_i32 s12, s13, s12
-; GFX6-NEXT: s_mul_i32 s13, s9, s18
-; GFX6-NEXT: s_add_i32 s20, s12, s13
-; GFX6-NEXT: s_sub_i32 s14, s11, s20
-; GFX6-NEXT: s_mul_i32 s12, s8, s18
+; GFX6-NEXT: s_mul_i32 s13, s9, s16
+; GFX6-NEXT: s_add_i32 s18, s12, s13
+; GFX6-NEXT: s_sub_i32 s19, s11, s18
+; GFX6-NEXT: s_mul_i32 s12, s8, s16
; GFX6-NEXT: s_sub_u32 s10, s10, s12
; GFX6-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GFX6-NEXT: s_or_b32 s15, s12, s13
-; GFX6-NEXT: s_subb_u32 s21, s14, s9
-; GFX6-NEXT: s_sub_u32 s22, s10, s8
-; GFX6-NEXT: s_cselect_b64 s[14:15], -1, 0
-; GFX6-NEXT: s_or_b32 s14, s14, s15
-; GFX6-NEXT: s_subb_u32 s14, s21, 0
-; GFX6-NEXT: s_cmp_ge_u32 s14, s9
-; GFX6-NEXT: s_cselect_b32 s15, -1, 0
-; GFX6-NEXT: s_cmp_ge_u32 s22, s8
+; GFX6-NEXT: s_subb_u32 s19, s19, s9
+; GFX6-NEXT: s_sub_u32 s20, s10, s8
+; GFX6-NEXT: s_subb_u32 s19, s19, 0
+; GFX6-NEXT: s_cmp_ge_u32 s19, s9
; GFX6-NEXT: s_cselect_b32 s21, -1, 0
-; GFX6-NEXT: s_cmp_eq_u32 s14, s9
-; GFX6-NEXT: s_cselect_b32 s14, s21, s15
-; GFX6-NEXT: s_add_u32 s15, s18, 1
-; GFX6-NEXT: s_addc_u32 s21, s19, 0
-; GFX6-NEXT: s_add_u32 s22, s18, 2
-; GFX6-NEXT: s_addc_u32 s23, s19, 0
-; GFX6-NEXT: s_cmp_lg_u32 s14, 0
-; GFX6-NEXT: s_cselect_b32 s14, s22, s15
-; GFX6-NEXT: s_cselect_b32 s15, s23, s21
+; GFX6-NEXT: s_cmp_ge_u32 s20, s8
+; GFX6-NEXT: s_cselect_b32 s20, -1, 0
+; GFX6-NEXT: s_cmp_eq_u32 s19, s9
+; GFX6-NEXT: s_cselect_b32 s19, s20, s21
+; GFX6-NEXT: s_add_u32 s20, s16, 1
+; GFX6-NEXT: s_addc_u32 s21, s17, 0
+; GFX6-NEXT: s_add_u32 s22, s16, 2
+; GFX6-NEXT: s_addc_u32 s23, s17, 0
+; GFX6-NEXT: s_cmp_lg_u32 s19, 0
+; GFX6-NEXT: s_cselect_b32 s19, s22, s20
+; GFX6-NEXT: s_cselect_b32 s20, s23, s21
; GFX6-NEXT: s_or_b32 s12, s12, s13
-; GFX6-NEXT: s_subb_u32 s11, s11, s20
+; GFX6-NEXT: s_subb_u32 s11, s11, s18
; GFX6-NEXT: s_cmp_ge_u32 s11, s9
; GFX6-NEXT: s_cselect_b32 s12, -1, 0
; GFX6-NEXT: s_cmp_ge_u32 s10, s8
@@ -8567,15 +8546,15 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: s_cmp_eq_u32 s11, s9
; GFX6-NEXT: s_cselect_b32 s8, s8, s12
; GFX6-NEXT: s_cmp_lg_u32 s8, 0
-; GFX6-NEXT: s_cselect_b32 s9, s15, s19
-; GFX6-NEXT: s_cselect_b32 s8, s14, s18
+; GFX6-NEXT: s_cselect_b32 s9, s20, s17
+; GFX6-NEXT: s_cselect_b32 s8, s19, s16
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
; GFX6-NEXT: s_xor_b64 s[6:7], s[8:9], s[4:5]
; GFX6-NEXT: s_sub_u32 s4, s6, s4
; GFX6-NEXT: s_subb_u32 s5, s7, s5
; GFX6-NEXT: s_mov_b32 s2, -1
-; GFX6-NEXT: v_mov_b32_e32 v0, s16
-; GFX6-NEXT: v_mov_b32_e32 v1, s17
+; GFX6-NEXT: v_mov_b32_e32 v0, s14
+; GFX6-NEXT: v_mov_b32_e32 v1, s15
; GFX6-NEXT: v_mov_b32_e32 v2, s4
; GFX6-NEXT: v_mov_b32_e32 v3, s5
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
@@ -9015,105 +8994,100 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX6-NEXT: s_xor_b64 s[8:9], s[0:1], s[2:3]
; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s8
; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s9
-; GFX6-NEXT: s_sub_u32 s10, 0, s8
-; GFX6-NEXT: s_subb_u32 s11, 0, s9
+; GFX6-NEXT: s_sub_u32 s0, 0, s8
+; GFX6-NEXT: s_subb_u32 s1, 0, s9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX6-NEXT: v_rcp_f32_e32 v0, v0
-; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX6-NEXT: v_trunc_f32_e32 v1, v1
; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT: v_mul_hi_u32 v2, s10, v0
-; GFX6-NEXT: v_readfirstlane_b32 s12, v1
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: s_mul_i32 s1, s10, s12
-; GFX6-NEXT: v_readfirstlane_b32 s15, v2
-; GFX6-NEXT: s_mul_i32 s13, s11, s0
-; GFX6-NEXT: s_mul_i32 s14, s10, s0
-; GFX6-NEXT: s_add_i32 s1, s15, s1
-; GFX6-NEXT: v_mul_hi_u32 v3, v0, s14
-; GFX6-NEXT: s_add_i32 s1, s1, s13
-; GFX6-NEXT: v_mul_hi_u32 v0, v0, s1
-; GFX6-NEXT: v_mul_hi_u32 v4, v1, s14
-; GFX6-NEXT: v_readfirstlane_b32 s13, v3
-; GFX6-NEXT: s_mul_i32 s15, s0, s1
-; GFX6-NEXT: v_mul_hi_u32 v1, v1, s1
-; GFX6-NEXT: s_add_u32 s13, s13, s15
-; GFX6-NEXT: v_readfirstlane_b32 s15, v0
-; GFX6-NEXT: s_addc_u32 s15, 0, s15
-; GFX6-NEXT: s_mul_i32 s14, s12, s14
-; GFX6-NEXT: v_readfirstlane_b32 s16, v4
-; GFX6-NEXT: s_add_u32 s13, s13, s14
-; GFX6-NEXT: s_addc_u32 s13, s15, s16
-; GFX6-NEXT: v_readfirstlane_b32 s14, v1
-; GFX6-NEXT: s_addc_u32 s14, s14, 0
-; GFX6-NEXT: s_mul_i32 s1, s12, s1
-; GFX6-NEXT: s_add_u32 s1, s13, s1
-; GFX6-NEXT: s_addc_u32 s13, 0, s14
-; GFX6-NEXT: s_add_u32 s14, s0, s1
-; GFX6-NEXT: v_mov_b32_e32 v0, s14
-; GFX6-NEXT: v_mul_hi_u32 v0, s10, v0
-; GFX6-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GFX6-NEXT: s_or_b32 s0, s0, s1
-; GFX6-NEXT: s_addc_u32 s12, s12, s13
-; GFX6-NEXT: s_mul_i32 s0, s10, s12
-; GFX6-NEXT: v_readfirstlane_b32 s1, v0
-; GFX6-NEXT: s_add_i32 s0, s1, s0
-; GFX6-NEXT: s_mul_i32 s11, s11, s14
-; GFX6-NEXT: s_mul_i32 s1, s10, s14
-; GFX6-NEXT: s_add_i32 s0, s0, s11
-; GFX6-NEXT: v_mov_b32_e32 v2, s1
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: v_mul_hi_u32 v3, s12, v2
-; GFX6-NEXT: v_mul_hi_u32 v2, s14, v2
-; GFX6-NEXT: v_mul_hi_u32 v1, s12, v0
-; GFX6-NEXT: v_mul_hi_u32 v0, s14, v0
-; GFX6-NEXT: s_mul_i32 s11, s14, s0
-; GFX6-NEXT: v_readfirstlane_b32 s15, v2
-; GFX6-NEXT: s_add_u32 s11, s15, s11
-; GFX6-NEXT: v_readfirstlane_b32 s13, v0
-; GFX6-NEXT: s_mul_i32 s1, s12, s1
-; GFX6-NEXT: s_addc_u32 s13, 0, s13
-; GFX6-NEXT: v_readfirstlane_b32 s10, v3
-; GFX6-NEXT: s_add_u32 s1, s11, s1
-; GFX6-NEXT: s_addc_u32 s1, s13, s10
+; GFX6-NEXT: v_mul_hi_u32 v2, s0, v0
; GFX6-NEXT: v_readfirstlane_b32 s10, v1
-; GFX6-NEXT: s_addc_u32 s10, s10, 0
-; GFX6-NEXT: s_mul_i32 s0, s12, s0
-; GFX6-NEXT: s_add_u32 s0, s1, s0
-; GFX6-NEXT: s_addc_u32 s10, 0, s10
-; GFX6-NEXT: s_add_u32 s13, s14, s0
-; GFX6-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GFX6-NEXT: s_or_b32 s0, s0, s1
-; GFX6-NEXT: s_addc_u32 s12, s12, s10
+; GFX6-NEXT: v_readfirstlane_b32 s2, v0
+; GFX6-NEXT: s_mul_i32 s11, s0, s10
+; GFX6-NEXT: v_readfirstlane_b32 s14, v2
+; GFX6-NEXT: s_mul_i32 s12, s1, s2
+; GFX6-NEXT: s_mul_i32 s13, s0, s2
+; GFX6-NEXT: s_add_i32 s11, s14, s11
+; GFX6-NEXT: v_mul_hi_u32 v3, v0, s13
+; GFX6-NEXT: s_add_i32 s11, s11, s12
+; GFX6-NEXT: v_mul_hi_u32 v0, v0, s11
+; GFX6-NEXT: v_mul_hi_u32 v4, v1, s13
+; GFX6-NEXT: v_readfirstlane_b32 s12, v3
+; GFX6-NEXT: s_mul_i32 s14, s2, s11
+; GFX6-NEXT: v_mul_hi_u32 v1, v1, s11
+; GFX6-NEXT: s_add_u32 s12, s12, s14
+; GFX6-NEXT: v_readfirstlane_b32 s14, v0
+; GFX6-NEXT: s_mul_i32 s13, s10, s13
+; GFX6-NEXT: s_addc_u32 s14, 0, s14
+; GFX6-NEXT: v_readfirstlane_b32 s15, v4
+; GFX6-NEXT: s_add_u32 s12, s12, s13
+; GFX6-NEXT: s_addc_u32 s12, s14, s15
+; GFX6-NEXT: v_readfirstlane_b32 s13, v1
+; GFX6-NEXT: s_addc_u32 s13, s13, 0
+; GFX6-NEXT: s_mul_i32 s11, s10, s11
+; GFX6-NEXT: s_add_u32 s11, s12, s11
+; GFX6-NEXT: s_addc_u32 s12, 0, s13
+; GFX6-NEXT: s_add_u32 s11, s2, s11
+; GFX6-NEXT: v_mov_b32_e32 v0, s11
+; GFX6-NEXT: v_mul_hi_u32 v0, s0, v0
+; GFX6-NEXT: s_addc_u32 s10, s10, s12
+; GFX6-NEXT: s_mul_i32 s12, s0, s10
+; GFX6-NEXT: s_mul_i32 s1, s1, s11
+; GFX6-NEXT: v_readfirstlane_b32 s13, v0
+; GFX6-NEXT: s_add_i32 s12, s13, s12
+; GFX6-NEXT: s_mul_i32 s0, s0, s11
+; GFX6-NEXT: s_add_i32 s1, s12, s1
+; GFX6-NEXT: v_mov_b32_e32 v2, s0
+; GFX6-NEXT: v_mov_b32_e32 v0, s1
+; GFX6-NEXT: v_mul_hi_u32 v3, s10, v2
+; GFX6-NEXT: v_mul_hi_u32 v2, s11, v2
+; GFX6-NEXT: v_mul_hi_u32 v1, s10, v0
+; GFX6-NEXT: v_mul_hi_u32 v0, s11, v0
+; GFX6-NEXT: s_mul_i32 s13, s11, s1
+; GFX6-NEXT: v_readfirstlane_b32 s15, v2
+; GFX6-NEXT: s_add_u32 s13, s15, s13
+; GFX6-NEXT: v_readfirstlane_b32 s14, v0
+; GFX6-NEXT: s_mul_i32 s0, s10, s0
+; GFX6-NEXT: s_addc_u32 s14, 0, s14
+; GFX6-NEXT: v_readfirstlane_b32 s12, v3
+; GFX6-NEXT: s_add_u32 s0, s13, s0
+; GFX6-NEXT: s_addc_u32 s0, s14, s12
+; GFX6-NEXT: v_readfirstlane_b32 s12, v1
+; GFX6-NEXT: s_addc_u32 s12, s12, 0
+; GFX6-NEXT: s_mul_i32 s1, s10, s1
+; GFX6-NEXT: s_add_u32 s0, s0, s1
+; GFX6-NEXT: s_addc_u32 s1, 0, s12
+; GFX6-NEXT: s_add_u32 s12, s11, s0
+; GFX6-NEXT: s_addc_u32 s13, s10, s1
; GFX6-NEXT: s_ashr_i32 s10, s7, 31
; GFX6-NEXT: s_add_u32 s0, s6, s10
; GFX6-NEXT: s_mov_b32 s11, s10
; GFX6-NEXT: s_addc_u32 s1, s7, s10
; GFX6-NEXT: s_xor_b64 s[6:7], s[0:1], s[10:11]
-; GFX6-NEXT: v_mov_b32_e32 v0, s12
+; GFX6-NEXT: v_mov_b32_e32 v0, s13
; GFX6-NEXT: v_mul_hi_u32 v1, s6, v0
-; GFX6-NEXT: v_mov_b32_e32 v2, s13
+; GFX6-NEXT: v_mov_b32_e32 v2, s12
; GFX6-NEXT: v_mul_hi_u32 v3, s6, v2
; GFX6-NEXT: s_mov_b32 s0, s4
; GFX6-NEXT: v_readfirstlane_b32 s4, v1
; GFX6-NEXT: v_mul_hi_u32 v1, s7, v2
-; GFX6-NEXT: s_mul_i32 s1, s6, s12
+; GFX6-NEXT: s_mul_i32 s1, s6, s13
; GFX6-NEXT: v_readfirstlane_b32 s14, v3
; GFX6-NEXT: v_mul_hi_u32 v0, s7, v0
; GFX6-NEXT: s_add_u32 s1, s14, s1
; GFX6-NEXT: s_addc_u32 s4, 0, s4
-; GFX6-NEXT: s_mul_i32 s13, s7, s13
+; GFX6-NEXT: s_mul_i32 s12, s7, s12
; GFX6-NEXT: v_readfirstlane_b32 s14, v1
-; GFX6-NEXT: s_add_u32 s1, s1, s13
+; GFX6-NEXT: s_add_u32 s1, s1, s12
; GFX6-NEXT: s_addc_u32 s1, s4, s14
; GFX6-NEXT: v_readfirstlane_b32 s4, v0
; GFX6-NEXT: s_addc_u32 s4, s4, 0
-; GFX6-NEXT: s_mul_i32 s12, s7, s12
+; GFX6-NEXT: s_mul_i32 s12, s7, s13
; GFX6-NEXT: s_add_u32 s12, s1, s12
; GFX6-NEXT: v_mov_b32_e32 v0, s12
; GFX6-NEXT: v_mul_hi_u32 v0, s8, v0
@@ -9128,11 +9102,9 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX6-NEXT: s_mul_i32 s4, s8, s12
; GFX6-NEXT: s_sub_u32 s6, s6, s4
; GFX6-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GFX6-NEXT: s_or_b32 s12, s4, s5
; GFX6-NEXT: s_subb_u32 s15, s13, s9
; GFX6-NEXT: s_sub_u32 s16, s6, s8
; GFX6-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GFX6-NEXT: s_or_b32 s17, s12, s13
; GFX6-NEXT: s_subb_u32 s17, s15, 0
; GFX6-NEXT: s_cmp_ge_u32 s17, s9
; GFX6-NEXT: s_cselect_b32 s18, -1, 0
@@ -9141,13 +9113,11 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX6-NEXT: s_cmp_eq_u32 s17, s9
; GFX6-NEXT: s_cselect_b32 s18, s19, s18
; GFX6-NEXT: s_or_b32 s12, s12, s13
-; GFX6-NEXT: s_subb_u32 s15, s15, s9
-; GFX6-NEXT: s_sub_u32 s19, s16, s8
-; GFX6-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GFX6-NEXT: s_or_b32 s12, s12, s13
-; GFX6-NEXT: s_subb_u32 s12, s15, 0
+; GFX6-NEXT: s_subb_u32 s12, s15, s9
+; GFX6-NEXT: s_sub_u32 s13, s16, s8
+; GFX6-NEXT: s_subb_u32 s12, s12, 0
; GFX6-NEXT: s_cmp_lg_u32 s18, 0
-; GFX6-NEXT: s_cselect_b32 s13, s19, s16
+; GFX6-NEXT: s_cselect_b32 s13, s13, s16
; GFX6-NEXT: s_cselect_b32 s12, s12, s17
; GFX6-NEXT: s_or_b32 s4, s4, s5
; GFX6-NEXT: s_subb_u32 s4, s7, s14
@@ -9164,6 +9134,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX6-NEXT: s_sub_u32 s4, s4, s10
; GFX6-NEXT: s_subb_u32 s5, s5, s10
; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
@@ -9405,8 +9376,8 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7]
; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s2
; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s3
-; GFX6-NEXT: s_sub_u32 s12, 0, s2
-; GFX6-NEXT: s_subb_u32 s13, 0, s3
+; GFX6-NEXT: s_sub_u32 s6, 0, s2
+; GFX6-NEXT: s_subb_u32 s7, 0, s3
; GFX6-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GFX6-NEXT: v_rcp_f32_e32 v0, v0
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -9415,69 +9386,65 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT: v_mul_hi_u32 v2, s12, v0
-; GFX6-NEXT: v_readfirstlane_b32 s14, v1
-; GFX6-NEXT: v_readfirstlane_b32 s6, v0
-; GFX6-NEXT: s_mul_i32 s7, s12, s14
+; GFX6-NEXT: v_mul_hi_u32 v2, s6, v0
+; GFX6-NEXT: v_readfirstlane_b32 s12, v1
+; GFX6-NEXT: v_readfirstlane_b32 s13, v0
+; GFX6-NEXT: s_mul_i32 s14, s6, s12
; GFX6-NEXT: v_readfirstlane_b32 s17, v2
-; GFX6-NEXT: s_mul_i32 s15, s13, s6
-; GFX6-NEXT: s_mul_i32 s16, s12, s6
-; GFX6-NEXT: s_add_i32 s7, s17, s7
+; GFX6-NEXT: s_mul_i32 s15, s7, s13
+; GFX6-NEXT: s_mul_i32 s16, s6, s13
+; GFX6-NEXT: s_add_i32 s14, s17, s14
; GFX6-NEXT: v_mul_hi_u32 v3, v0, s16
-; GFX6-NEXT: s_add_i32 s7, s7, s15
-; GFX6-NEXT: v_mul_hi_u32 v0, v0, s7
+; GFX6-NEXT: s_add_i32 s14, s14, s15
+; GFX6-NEXT: v_mul_hi_u32 v0, v0, s14
; GFX6-NEXT: v_mul_hi_u32 v4, v1, s16
; GFX6-NEXT: v_readfirstlane_b32 s15, v3
-; GFX6-NEXT: s_mul_i32 s18, s6, s7
-; GFX6-NEXT: v_mul_hi_u32 v1, v1, s7
+; GFX6-NEXT: s_mul_i32 s18, s13, s14
+; GFX6-NEXT: v_mul_hi_u32 v1, v1, s14
; GFX6-NEXT: s_add_u32 s15, s15, s18
; GFX6-NEXT: v_readfirstlane_b32 s18, v0
-; GFX6-NEXT: s_mul_i32 s16, s14, s16
+; GFX6-NEXT: s_mul_i32 s16, s12, s16
; GFX6-NEXT: s_addc_u32 s18, 0, s18
; GFX6-NEXT: v_readfirstlane_b32 s17, v4
; GFX6-NEXT: s_add_u32 s15, s15, s16
; GFX6-NEXT: s_addc_u32 s15, s18, s17
; GFX6-NEXT: v_readfirstlane_b32 s16, v1
; GFX6-NEXT: s_addc_u32 s16, s16, 0
-; GFX6-NEXT: s_mul_i32 s7, s14, s7
-; GFX6-NEXT: s_add_u32 s7, s15, s7
+; GFX6-NEXT: s_mul_i32 s14, s12, s14
+; GFX6-NEXT: s_add_u32 s14, s15, s14
; GFX6-NEXT: s_addc_u32 s15, 0, s16
-; GFX6-NEXT: s_add_u32 s16, s6, s7
-; GFX6-NEXT: v_mov_b32_e32 v0, s16
-; GFX6-NEXT: v_mul_hi_u32 v0, s12, v0
-; GFX6-NEXT: s_cselect_b64 s[6:7], -1, 0
-; GFX6-NEXT: s_or_b32 s6, s6, s7
-; GFX6-NEXT: s_addc_u32 s14, s14, s15
-; GFX6-NEXT: s_mul_i32 s6, s12, s14
-; GFX6-NEXT: v_readfirstlane_b32 s7, v0
-; GFX6-NEXT: s_add_i32 s6, s7, s6
-; GFX6-NEXT: s_mul_i32 s13, s13, s16
-; GFX6-NEXT: s_mul_i32 s7, s12, s16
-; GFX6-NEXT: s_add_i32 s6, s6, s13
-; GFX6-NEXT: v_mov_b32_e32 v2, s7
-; GFX6-NEXT: v_mov_b32_e32 v0, s6
-; GFX6-NEXT: v_mul_hi_u32 v3, s14, v2
-; GFX6-NEXT: v_mul_hi_u32 v2, s16, v2
-; GFX6-NEXT: v_mul_hi_u32 v1, s14, v0
-; GFX6-NEXT: v_mul_hi_u32 v0, s16, v0
-; GFX6-NEXT: s_mul_i32 s13, s16, s6
-; GFX6-NEXT: v_readfirstlane_b32 s17, v2
-; GFX6-NEXT: s_add_u32 s13, s17, s13
+; GFX6-NEXT: s_add_u32 s13, s13, s14
+; GFX6-NEXT: v_mov_b32_e32 v0, s13
+; GFX6-NEXT: v_mul_hi_u32 v0, s6, v0
+; GFX6-NEXT: s_addc_u32 s12, s12, s15
+; GFX6-NEXT: s_mul_i32 s14, s6, s12
+; GFX6-NEXT: s_mul_i32 s7, s7, s13
; GFX6-NEXT: v_readfirstlane_b32 s15, v0
-; GFX6-NEXT: s_mul_i32 s7, s14, s7
-; GFX6-NEXT: s_addc_u32 s15, 0, s15
-; GFX6-NEXT: v_readfirstlane_b32 s12, v3
-; GFX6-NEXT: s_add_u32 s7, s13, s7
-; GFX6-NEXT: s_addc_u32 s7, s15, s12
-; GFX6-NEXT: v_readfirstlane_b32 s12, v1
-; GFX6-NEXT: s_addc_u32 s12, s12, 0
-; GFX6-NEXT: s_mul_i32 s6, s14, s6
-; GFX6-NEXT: s_add_u32 s6, s7, s6
-; GFX6-NEXT: s_addc_u32 s12, 0, s12
-; GFX6-NEXT: s_add_u32 s13, s16, s6
-; GFX6-NEXT: s_cselect_b64 s[6:7], -1, 0
-; GFX6-NEXT: s_or_b32 s6, s6, s7
-; GFX6-NEXT: s_addc_u32 s12, s14, s12
+; GFX6-NEXT: s_add_i32 s14, s15, s14
+; GFX6-NEXT: s_mul_i32 s6, s6, s13
+; GFX6-NEXT: s_add_i32 s7, s14, s7
+; GFX6-NEXT: v_mov_b32_e32 v2, s6
+; GFX6-NEXT: v_mov_b32_e32 v0, s7
+; GFX6-NEXT: v_mul_hi_u32 v3, s12, v2
+; GFX6-NEXT: v_mul_hi_u32 v2, s13, v2
+; GFX6-NEXT: v_mul_hi_u32 v1, s12, v0
+; GFX6-NEXT: v_mul_hi_u32 v0, s13, v0
+; GFX6-NEXT: s_mul_i32 s15, s13, s7
+; GFX6-NEXT: v_readfirstlane_b32 s17, v2
+; GFX6-NEXT: s_add_u32 s15, s17, s15
+; GFX6-NEXT: v_readfirstlane_b32 s16, v0
+; GFX6-NEXT: s_mul_i32 s6, s12, s6
+; GFX6-NEXT: s_addc_u32 s16, 0, s16
+; GFX6-NEXT: v_readfirstlane_b32 s14, v3
+; GFX6-NEXT: s_add_u32 s6, s15, s6
+; GFX6-NEXT: s_addc_u32 s6, s16, s14
+; GFX6-NEXT: v_readfirstlane_b32 s14, v1
+; GFX6-NEXT: s_addc_u32 s14, s14, 0
+; GFX6-NEXT: s_mul_i32 s7, s12, s7
+; GFX6-NEXT: s_add_u32 s6, s6, s7
+; GFX6-NEXT: s_addc_u32 s7, 0, s14
+; GFX6-NEXT: s_add_u32 s13, s13, s6
+; GFX6-NEXT: s_addc_u32 s12, s12, s7
; GFX6-NEXT: s_ashr_i32 s6, s9, 31
; GFX6-NEXT: s_add_u32 s8, s8, s6
; GFX6-NEXT: s_mov_b32 s7, s6
@@ -9514,11 +9481,9 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: s_mul_i32 s12, s2, s12
; GFX6-NEXT: s_sub_u32 s8, s8, s12
; GFX6-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GFX6-NEXT: s_or_b32 s15, s12, s13
; GFX6-NEXT: s_subb_u32 s17, s14, s3
; GFX6-NEXT: s_sub_u32 s18, s8, s2
; GFX6-NEXT: s_cselect_b64 s[14:15], -1, 0
-; GFX6-NEXT: s_or_b32 s19, s14, s15
; GFX6-NEXT: s_subb_u32 s19, s17, 0
; GFX6-NEXT: s_cmp_ge_u32 s19, s3
; GFX6-NEXT: s_cselect_b32 s20, -1, 0
@@ -9527,13 +9492,11 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: s_cmp_eq_u32 s19, s3
; GFX6-NEXT: s_cselect_b32 s20, s21, s20
; GFX6-NEXT: s_or_b32 s14, s14, s15
-; GFX6-NEXT: s_subb_u32 s17, s17, s3
-; GFX6-NEXT: s_sub_u32 s21, s18, s2
-; GFX6-NEXT: s_cselect_b64 s[14:15], -1, 0
-; GFX6-NEXT: s_or_b32 s14, s14, s15
-; GFX6-NEXT: s_subb_u32 s14, s17, 0
+; GFX6-NEXT: s_subb_u32 s14, s17, s3
+; GFX6-NEXT: s_sub_u32 s15, s18, s2
+; GFX6-NEXT: s_subb_u32 s14, s14, 0
; GFX6-NEXT: s_cmp_lg_u32 s20, 0
-; GFX6-NEXT: s_cselect_b32 s15, s21, s18
+; GFX6-NEXT: s_cselect_b32 s15, s15, s18
; GFX6-NEXT: s_cselect_b32 s14, s14, s19
; GFX6-NEXT: s_or_b32 s12, s12, s13
; GFX6-NEXT: s_subb_u32 s9, s9, s16
@@ -9556,8 +9519,8 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: s_xor_b64 s[6:7], s[0:1], s[2:3]
; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s6
; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s7
-; GFX6-NEXT: s_sub_u32 s8, 0, s6
-; GFX6-NEXT: s_subb_u32 s9, 0, s7
+; GFX6-NEXT: s_sub_u32 s2, 0, s6
+; GFX6-NEXT: s_subb_u32 s3, 0, s7
; GFX6-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GFX6-NEXT: v_rcp_f32_e32 v0, v0
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -9566,70 +9529,66 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT: v_mul_hi_u32 v2, s8, v0
-; GFX6-NEXT: v_readfirstlane_b32 s12, v1
-; GFX6-NEXT: v_readfirstlane_b32 s2, v0
-; GFX6-NEXT: s_mul_i32 s1, s8, s12
-; GFX6-NEXT: v_readfirstlane_b32 s3, v2
-; GFX6-NEXT: s_mul_i32 s0, s9, s2
-; GFX6-NEXT: s_add_i32 s1, s3, s1
-; GFX6-NEXT: s_add_i32 s3, s1, s0
-; GFX6-NEXT: s_mul_i32 s13, s8, s2
-; GFX6-NEXT: v_mul_hi_u32 v2, v0, s3
-; GFX6-NEXT: v_mul_hi_u32 v0, v0, s13
-; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GFX6-NEXT: s_mul_i32 s4, s2, s3
-; GFX6-NEXT: v_readfirstlane_b32 s5, v2
+; GFX6-NEXT: v_mul_hi_u32 v2, s2, v0
+; GFX6-NEXT: v_readfirstlane_b32 s8, v1
+; GFX6-NEXT: v_readfirstlane_b32 s0, v0
+; GFX6-NEXT: s_mul_i32 s9, s2, s8
+; GFX6-NEXT: v_readfirstlane_b32 s12, v2
+; GFX6-NEXT: s_mul_i32 s1, s3, s0
+; GFX6-NEXT: s_add_i32 s9, s12, s9
+; GFX6-NEXT: s_add_i32 s9, s9, s1
+; GFX6-NEXT: s_mul_i32 s1, s2, s0
+; GFX6-NEXT: v_mul_hi_u32 v2, v0, s9
+; GFX6-NEXT: v_mul_hi_u32 v0, v0, s1
+; GFX6-NEXT: s_mul_i32 s12, s0, s9
+; GFX6-NEXT: v_readfirstlane_b32 s13, v2
; GFX6-NEXT: v_readfirstlane_b32 s16, v0
-; GFX6-NEXT: v_mul_hi_u32 v0, v1, s13
-; GFX6-NEXT: v_mul_hi_u32 v1, v1, s3
-; GFX6-NEXT: s_add_u32 s4, s16, s4
-; GFX6-NEXT: s_addc_u32 s5, 0, s5
-; GFX6-NEXT: s_mul_i32 s13, s12, s13
+; GFX6-NEXT: v_mul_hi_u32 v0, v1, s1
+; GFX6-NEXT: v_mul_hi_u32 v1, v1, s9
+; GFX6-NEXT: s_add_u32 s12, s16, s12
+; GFX6-NEXT: s_addc_u32 s13, 0, s13
+; GFX6-NEXT: s_mul_i32 s1, s8, s1
; GFX6-NEXT: v_readfirstlane_b32 s16, v0
-; GFX6-NEXT: s_add_u32 s4, s4, s13
-; GFX6-NEXT: s_addc_u32 s4, s5, s16
-; GFX6-NEXT: v_readfirstlane_b32 s5, v1
-; GFX6-NEXT: s_addc_u32 s5, s5, 0
-; GFX6-NEXT: s_mul_i32 s3, s12, s3
-; GFX6-NEXT: s_add_u32 s3, s4, s3
-; GFX6-NEXT: s_addc_u32 s4, 0, s5
-; GFX6-NEXT: s_add_u32 s5, s2, s3
-; GFX6-NEXT: v_mov_b32_e32 v0, s5
-; GFX6-NEXT: v_mul_hi_u32 v0, s8, v0
-; GFX6-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GFX6-NEXT: s_or_b32 s2, s2, s3
-; GFX6-NEXT: s_addc_u32 s4, s12, s4
-; GFX6-NEXT: s_mul_i32 s2, s8, s4
-; GFX6-NEXT: v_readfirstlane_b32 s3, v0
-; GFX6-NEXT: s_add_i32 s2, s3, s2
-; GFX6-NEXT: s_mul_i32 s9, s9, s5
-; GFX6-NEXT: s_mul_i32 s3, s8, s5
-; GFX6-NEXT: s_add_i32 s2, s2, s9
-; GFX6-NEXT: v_mov_b32_e32 v2, s3
-; GFX6-NEXT: v_mov_b32_e32 v0, s2
+; GFX6-NEXT: s_add_u32 s1, s12, s1
+; GFX6-NEXT: s_addc_u32 s1, s13, s16
+; GFX6-NEXT: v_readfirstlane_b32 s12, v1
+; GFX6-NEXT: s_addc_u32 s12, s12, 0
+; GFX6-NEXT: s_mul_i32 s9, s8, s9
+; GFX6-NEXT: s_add_u32 s1, s1, s9
+; GFX6-NEXT: s_addc_u32 s9, 0, s12
+; GFX6-NEXT: s_add_u32 s12, s0, s1
+; GFX6-NEXT: v_mov_b32_e32 v0, s12
+; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
+; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX6-NEXT: s_addc_u32 s4, s8, s9
+; GFX6-NEXT: s_mul_i32 s5, s2, s4
+; GFX6-NEXT: v_readfirstlane_b32 s8, v0
+; GFX6-NEXT: s_add_i32 s5, s8, s5
+; GFX6-NEXT: s_mul_i32 s3, s3, s12
+; GFX6-NEXT: s_mul_i32 s2, s2, s12
+; GFX6-NEXT: s_add_i32 s3, s5, s3
+; GFX6-NEXT: v_mov_b32_e32 v2, s2
+; GFX6-NEXT: v_mov_b32_e32 v0, s3
; GFX6-NEXT: v_mul_hi_u32 v3, s4, v2
-; GFX6-NEXT: v_mul_hi_u32 v2, s5, v2
+; GFX6-NEXT: v_mul_hi_u32 v2, s12, v2
; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0
-; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
-; GFX6-NEXT: s_mul_i32 s9, s5, s2
+; GFX6-NEXT: v_mul_hi_u32 v0, s12, v0
+; GFX6-NEXT: s_mul_i32 s8, s12, s3
; GFX6-NEXT: v_readfirstlane_b32 s13, v2
-; GFX6-NEXT: s_add_u32 s9, s13, s9
-; GFX6-NEXT: v_readfirstlane_b32 s12, v0
-; GFX6-NEXT: s_mul_i32 s3, s4, s3
-; GFX6-NEXT: s_addc_u32 s12, 0, s12
-; GFX6-NEXT: v_readfirstlane_b32 s8, v3
-; GFX6-NEXT: s_add_u32 s3, s9, s3
-; GFX6-NEXT: s_addc_u32 s3, s12, s8
-; GFX6-NEXT: v_readfirstlane_b32 s8, v1
-; GFX6-NEXT: s_addc_u32 s8, s8, 0
+; GFX6-NEXT: s_add_u32 s8, s13, s8
+; GFX6-NEXT: v_readfirstlane_b32 s9, v0
; GFX6-NEXT: s_mul_i32 s2, s4, s2
-; GFX6-NEXT: s_add_u32 s2, s3, s2
-; GFX6-NEXT: s_addc_u32 s8, 0, s8
-; GFX6-NEXT: s_add_u32 s12, s5, s2
-; GFX6-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GFX6-NEXT: s_or_b32 s2, s2, s3
-; GFX6-NEXT: s_addc_u32 s13, s4, s8
+; GFX6-NEXT: s_addc_u32 s9, 0, s9
+; GFX6-NEXT: v_readfirstlane_b32 s5, v3
+; GFX6-NEXT: s_add_u32 s2, s8, s2
+; GFX6-NEXT: s_addc_u32 s2, s9, s5
+; GFX6-NEXT: v_readfirstlane_b32 s5, v1
+; GFX6-NEXT: s_addc_u32 s5, s5, 0
+; GFX6-NEXT: s_mul_i32 s3, s4, s3
+; GFX6-NEXT: s_add_u32 s2, s2, s3
+; GFX6-NEXT: s_addc_u32 s3, 0, s5
+; GFX6-NEXT: s_add_u32 s12, s12, s2
+; GFX6-NEXT: s_addc_u32 s13, s4, s3
; GFX6-NEXT: s_ashr_i32 s4, s11, 31
; GFX6-NEXT: s_add_u32 s2, s10, s4
; GFX6-NEXT: s_mov_b32 s5, s4
@@ -9667,11 +9626,9 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: s_mul_i32 s10, s6, s11
; GFX6-NEXT: s_sub_u32 s8, s8, s10
; GFX6-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GFX6-NEXT: s_or_b32 s13, s10, s11
; GFX6-NEXT: s_subb_u32 s17, s12, s7
; GFX6-NEXT: s_sub_u32 s18, s8, s6
; GFX6-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GFX6-NEXT: s_or_b32 s19, s12, s13
; GFX6-NEXT: s_subb_u32 s19, s17, 0
; GFX6-NEXT: s_cmp_ge_u32 s19, s7
; GFX6-NEXT: s_cselect_b32 s20, -1, 0
@@ -9680,13 +9637,11 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
; GFX6-NEXT: s_cmp_eq_u32 s19, s7
; GFX6-NEXT: s_cselect_b32 s20, s21, s20
; GFX6-NEXT: s_or_b32 s12, s12, s13
-; GFX6-NEXT: s_subb_u32 s17, s17, s7
-; GFX6-NEXT: s_sub_u32 s21, s18, s6
-; GFX6-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GFX6-NEXT: s_or_b32 s12, s12, s13
-; GFX6-NEXT: s_subb_u32 s12, s17, 0
+; GFX6-NEXT: s_subb_u32 s12, s17, s7
+; GFX6-NEXT: s_sub_u32 s13, s18, s6
+; GFX6-NEXT: s_subb_u32 s12, s12, 0
; GFX6-NEXT: s_cmp_lg_u32 s20, 0
-; GFX6-NEXT: s_cselect_b32 s13, s21, s18
+; GFX6-NEXT: s_cselect_b32 s13, s13, s18
; GFX6-NEXT: s_cselect_b32 s12, s12, s19
; GFX6-NEXT: s_or_b32 s10, s10, s11
; GFX6-NEXT: s_subb_u32 s9, s9, s16
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll
index c962c05d24ad..5d79696572cf 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll
@@ -239,7 +239,8 @@ define protected amdgpu_kernel void @trivial_uniform_waterfall(ptr addrspace(1)
; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0
; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF:.*]]
; PASS-CHECK: [[IF]]:
-; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 0, 0
+; PASS-CHECK-NEXT: [[FIRST_ACTIVE_ID:%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 0)
+; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 0, [[FIRST_ACTIVE_ID]]
; PASS-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID]], label %[[WORK:.*]], label %[[TAIL]]
; PASS-CHECK: [[WORK]]:
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
@@ -308,7 +309,8 @@ define protected amdgpu_kernel void @uniform_waterfall(ptr addrspace(1) %out, i3
; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0
; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF:.*]]
; PASS-CHECK: [[IF]]:
-; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 [[MYMASK]], [[MYMASK]]
+; PASS-CHECK-NEXT: [[FIRST_ACTIVE_ID:%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 [[MYMASK]])
+; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 [[MYMASK]], [[FIRST_ACTIVE_ID]]
; PASS-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID]], label %[[WORK:.*]], label %[[TAIL]]
; PASS-CHECK: [[WORK]]:
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll
index a7e828c95d69..402ccd91fed8 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll
@@ -248,12 +248,14 @@ define amdgpu_kernel void @readfirstlane_constant(ptr addrspace(1) %out) {
;
; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_constant(
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
-; PASS-CHECK-NEXT: store i32 7, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 7)
+; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4
; PASS-CHECK-NEXT: ret void
;
; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_constant(
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
-; DCE-CHECK-NEXT: store i32 7, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 7)
+; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4
; DCE-CHECK-NEXT: ret void
;
%v = call i32 @llvm.amdgcn.readfirstlane(i32 7)
@@ -269,12 +271,14 @@ define amdgpu_kernel void @readfirstlane_with_argument(ptr addrspace(1) %out, i3
;
; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_argument(
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[SRC0:%.*]]) #[[ATTR0]] {
-; PASS-CHECK-NEXT: store i32 [[SRC0]], ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[SRC0]])
+; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4
; PASS-CHECK-NEXT: ret void
;
; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_argument(
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[SRC0:%.*]]) #[[ATTR0]] {
-; DCE-CHECK-NEXT: store i32 [[SRC0]], ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[SRC0]])
+; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4
; DCE-CHECK-NEXT: ret void
;
%v = call i32 @llvm.amdgcn.readfirstlane(i32 %src0)
@@ -360,12 +364,16 @@ define amdgpu_kernel void @readfirstlane_with_readfirstlane(ptr addrspace(1) %ou
;
; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_readfirstlane(
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
-; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: [[V1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 5)
+; PASS-CHECK-NEXT: [[V2:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[V1]])
+; PASS-CHECK-NEXT: store i32 [[V2]], ptr addrspace(1) [[OUT]], align 4
; PASS-CHECK-NEXT: ret void
;
; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_readfirstlane(
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
-; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: [[V1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 5)
+; DCE-CHECK-NEXT: [[V2:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[V1]])
+; DCE-CHECK-NEXT: store i32 [[V2]], ptr addrspace(1) [[OUT]], align 4
; DCE-CHECK-NEXT: ret void
;
%v1 = call i32 @llvm.amdgcn.readfirstlane(i32 5)
@@ -388,7 +396,8 @@ define amdgpu_kernel void @readfirstlane_with_readlane(ptr addrspace(1) %out) {
; PASS-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; PASS-CHECK-NEXT: [[TIDY:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; PASS-CHECK-NEXT: [[V1:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]])
-; PASS-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: [[V2:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[V1]])
+; PASS-CHECK-NEXT: store i32 [[V2]], ptr addrspace(1) [[OUT]], align 4
; PASS-CHECK-NEXT: ret void
;
; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_readlane(
@@ -396,7 +405,8 @@ define amdgpu_kernel void @readfirstlane_with_readlane(ptr addrspace(1) %out) {
; DCE-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; DCE-CHECK-NEXT: [[TIDY:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; DCE-CHECK-NEXT: [[V1:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]])
-; DCE-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: [[V2:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[V1]])
+; DCE-CHECK-NEXT: store i32 [[V2]], ptr addrspace(1) [[OUT]], align 4
; DCE-CHECK-NEXT: ret void
;
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
@@ -537,13 +547,15 @@ define amdgpu_kernel void @readfirstlane_random(ptr addrspace(1) %out) {
; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_random(
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
; PASS-CHECK-NEXT: [[RANDOM:%.*]] = xor i32 123, 456
-; PASS-CHECK-NEXT: store i32 [[RANDOM]], ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[RANDOM]])
+; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4
; PASS-CHECK-NEXT: ret void
;
; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_random(
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
; DCE-CHECK-NEXT: [[RANDOM:%.*]] = xor i32 123, 456
-; DCE-CHECK-NEXT: store i32 [[RANDOM]], ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[RANDOM]])
+; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4
; DCE-CHECK-NEXT: ret void
;
%random = xor i32 123, 456
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll
index f8655a702180..f465e3c505c0 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll
@@ -280,7 +280,7 @@ bb0:
br i1 %tmp, label %bb2, label %bb3
bb2:
- store volatile i32 17, ptr addrspace(1) undef
+ store volatile i32 17, ptr addrspace(1) poison
br label %bb4
bb3:
@@ -375,7 +375,7 @@ bb0:
br i1 %cmp0, label %bb2, label %bb1
bb1:
- %val = load volatile i32, ptr addrspace(4) undef
+ %val = load volatile i32, ptr addrspace(4) poison
%cmp1 = icmp eq i32 %val, 3
br i1 %cmp1, label %bb3, label %bb2
@@ -512,7 +512,7 @@ loop_body:
br label %loop
ret:
- store volatile i32 7, ptr addrspace(1) undef
+ store volatile i32 7, ptr addrspace(1) poison
ret void
}
@@ -622,7 +622,7 @@ bb14: ; preds = %bb13, %bb9
br label %bb19
bb19: ; preds = %bb14, %bb13, %bb9
- %tmp20 = phi i32 [ undef, %bb9 ], [ undef, %bb13 ], [ %tmp18, %bb14 ]
+ %tmp20 = phi i32 [ poison, %bb9 ], [ poison, %bb13 ], [ %tmp18, %bb14 ]
%tmp21 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %arg5
store i32 %tmp20, ptr addrspace(1) %tmp21, align 4
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll
new file mode 100644
index 000000000000..04e472419ca6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll
@@ -0,0 +1,130 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
+
+; We've separated this file from call-args-inreg.ll since GlobalISel does not support the bfloat type.
+; Ideally, we should merge the two files once that support lands.
+
+declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
+declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0
+
+define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
+; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s17, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-NEXT: v_writelane_b32 v40, s17, 2
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
+; GFX9-NEXT: s_mov_b32 s0, s16
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s4, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b32 s33, s4
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_mov_b32 s1, s33
+; GFX11-NEXT: s_mov_b32 s33, s32
+; GFX11-NEXT: s_or_saveexec_b32 s2, -1
+; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s2
+; GFX11-NEXT: v_writelane_b32 v40, s1, 2
+; GFX11-NEXT: s_add_i32 s32, s32, 16
+; GFX11-NEXT: s_getpc_b64 s[2:3]
+; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
+; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
+; GFX11-NEXT: v_writelane_b32 v40, s30, 0
+; GFX11-NEXT: v_writelane_b32 v40, s31, 1
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_readlane_b32 s31, v40, 1
+; GFX11-NEXT: v_readlane_b32 s30, v40, 0
+; GFX11-NEXT: s_mov_b32 s32, s33
+; GFX11-NEXT: v_readlane_b32 s0, v40, 2
+; GFX11-NEXT: s_or_saveexec_b32 s1, -1
+; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-NEXT: s_mov_b32 s33, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ call void @external_void_func_bf16_inreg(bfloat inreg %arg)
+ ret void
+}
+
+define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 {
+; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s17, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-NEXT: v_writelane_b32 v40, s17, 2
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
+; GFX9-NEXT: s_mov_b32 s0, s16
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s4, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b32 s33, s4
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_mov_b32 s1, s33
+; GFX11-NEXT: s_mov_b32 s33, s32
+; GFX11-NEXT: s_or_saveexec_b32 s2, -1
+; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s2
+; GFX11-NEXT: v_writelane_b32 v40, s1, 2
+; GFX11-NEXT: s_add_i32 s32, s32, 16
+; GFX11-NEXT: s_getpc_b64 s[2:3]
+; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
+; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
+; GFX11-NEXT: v_writelane_b32 v40, s30, 0
+; GFX11-NEXT: v_writelane_b32 v40, s31, 1
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_readlane_b32 s31, v40, 1
+; GFX11-NEXT: v_readlane_b32 s30, v40, 0
+; GFX11-NEXT: s_mov_b32 s32, s33
+; GFX11-NEXT: v_readlane_b32 s0, v40, 2
+; GFX11-NEXT: s_or_saveexec_b32 s1, -1
+; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-NEXT: s_mov_b32 s33, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
index d1cede64ce71..f96007ae513b 100644
--- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GISEL %s
declare hidden void @external_void_func_i8_inreg(i8 inreg) #0
declare hidden void @external_void_func_i16_inreg(i32 inreg) #0
@@ -12,11 +14,9 @@ declare hidden void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0
declare hidden void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0
declare hidden void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0
declare hidden void @external_void_func_f16_inreg(half inreg) #0
-declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
declare hidden void @external_void_func_f32_inreg(float inreg) #0
declare hidden void @external_void_func_f64_inreg(double inreg) #0
declare hidden void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
-declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0
declare hidden void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
declare hidden void @external_void_func_v4f16_inreg(<4 x half> inreg) #0
@@ -212,35 +212,6 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
}
define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_i64_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s18, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-NEXT: v_writelane_b32 v40, s18, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[18:19]
-; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -273,35 +244,6 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
}
define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v2i32_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s18, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-NEXT: v_writelane_b32 v40, s18, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[18:19]
-; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_v2i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -334,36 +276,6 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
}
define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v3i32_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s19, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-NEXT: v_writelane_b32 v40, s19, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[20:21]
-; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v3i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v3i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s2, s18
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_v3i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -396,37 +308,6 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 {
}
define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v4i32_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s20, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-NEXT: v_writelane_b32 v40, s20, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[20:21]
-; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v4i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v4i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s3, s19
-; GFX9-NEXT: s_mov_b32 s2, s18
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_v4i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -459,41 +340,6 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 {
}
define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v8i32_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s24, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[26:27], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[26:27]
-; GFX9-NEXT: v_writelane_b32 v40, s24, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[24:25]
-; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_v8i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_v8i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s3, s19
-; GFX9-NEXT: s_mov_b32 s2, s18
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: s_mov_b32 s16, s20
-; GFX9-NEXT: s_mov_b32 s17, s21
-; GFX9-NEXT: s_mov_b32 s18, s22
-; GFX9-NEXT: s_mov_b32 s19, s23
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[24:25]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_v8i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -585,66 +431,6 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
ret void
}
-define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s17, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-NEXT: v_writelane_b32 v40, s17, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[18:19]
-; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s1, s33
-; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_or_saveexec_b32 s2, -1
-; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s2
-; GFX11-NEXT: v_writelane_b32 v40, s1, 2
-; GFX11-NEXT: s_add_i32 s32, s32, 16
-; GFX11-NEXT: s_getpc_b64 s[2:3]
-; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
-; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
-; GFX11-NEXT: v_writelane_b32 v40, s30, 0
-; GFX11-NEXT: v_writelane_b32 v40, s31, 1
-; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_readlane_b32 s31, v40, 1
-; GFX11-NEXT: v_readlane_b32 s30, v40, 0
-; GFX11-NEXT: s_mov_b32 s32, s33
-; GFX11-NEXT: v_readlane_b32 s0, v40, 2
-; GFX11-NEXT: s_or_saveexec_b32 s1, -1
-; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s1
-; GFX11-NEXT: s_mov_b32 s33, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- call void @external_void_func_bf16_inreg(bfloat inreg %arg)
- ret void
-}
-
define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
; GFX9-LABEL: test_call_external_void_func_f32_inreg:
; GFX9: ; %bb.0:
@@ -706,35 +492,6 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
}
define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_f64_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s18, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-NEXT: v_writelane_b32 v40, s18, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[18:19]
-; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f64_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_f64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -826,97 +583,7 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
ret void
}
-
-define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s17, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-NEXT: v_writelane_b32 v40, s17, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[18:19]
-; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s1, s33
-; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_or_saveexec_b32 s2, -1
-; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s2
-; GFX11-NEXT: v_writelane_b32 v40, s1, 2
-; GFX11-NEXT: s_add_i32 s32, s32, 16
-; GFX11-NEXT: s_getpc_b64 s[2:3]
-; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
-; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
-; GFX11-NEXT: v_writelane_b32 v40, s30, 0
-; GFX11-NEXT: v_writelane_b32 v40, s31, 1
-; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_readlane_b32 s31, v40, 1
-; GFX11-NEXT: v_readlane_b32 s30, v40, 0
-; GFX11-NEXT: s_mov_b32 s32, s33
-; GFX11-NEXT: v_readlane_b32 s0, v40, 2
-; GFX11-NEXT: s_or_saveexec_b32 s1, -1
-; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s1
-; GFX11-NEXT: s_mov_b32 s33, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
- ret void
-}
-
define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s18, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-NEXT: v_writelane_b32 v40, s18, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[18:19]
-; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v3f16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v3f16_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_v3f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -949,35 +616,6 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0
}
define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v4f16_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s18, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-NEXT: v_writelane_b32 v40, s18, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[18:19]
-; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v4f16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v4f16_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_v4f16_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1010,35 +648,6 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0
}
define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_p0_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s18, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-NEXT: v_writelane_b32 v40, s18, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[18:19]
-; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p0_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p0_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_p0_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1071,35 +680,6 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
}
define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_p1_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s18, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-NEXT: v_writelane_b32 v40, s18, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[18:19]
-; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p1_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p1_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_p1_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1192,37 +772,6 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg)
}
define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v2p1_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s20, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-NEXT: v_writelane_b32 v40, s20, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[20:21]
-; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v2p1_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v2p1_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s3, s19
-; GFX9-NEXT: s_mov_b32 s2, s18
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_v2p1_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1255,35 +804,6 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre
}
define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v2p5_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s18, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-NEXT: v_writelane_b32 v40, s18, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[18:19]
-; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2p5_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2p5_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_v2p5_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1316,38 +836,6 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre
}
define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2) #0 {
-; GFX9-LABEL: test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s21, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-NEXT: v_writelane_b32 v40, s21, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[22:23]
-; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s3, s19
-; GFX9-NEXT: s_mov_b32 s2, s18
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: s_mov_b32 s16, s20
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1380,46 +868,6 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre
}
define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #0 {
-; GFX9-LABEL: test_call_external_void_func_a15i32_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s29, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[40:41], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[40:41]
-; GFX9-NEXT: v_writelane_b32 v40, s29, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[40:41]
-; GFX9-NEXT: s_add_u32 s40, s40, external_void_func_a15i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s41, s41, external_void_func_a15i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s3, s19
-; GFX9-NEXT: s_mov_b32 s2, s18
-; GFX9-NEXT: s_mov_b32 s1, s17
-; GFX9-NEXT: s_mov_b32 s0, s16
-; GFX9-NEXT: s_mov_b32 s16, s20
-; GFX9-NEXT: s_mov_b32 s17, s21
-; GFX9-NEXT: s_mov_b32 s18, s22
-; GFX9-NEXT: s_mov_b32 s19, s23
-; GFX9-NEXT: s_mov_b32 s20, s24
-; GFX9-NEXT: s_mov_b32 s21, s25
-; GFX9-NEXT: s_mov_b32 s22, s26
-; GFX9-NEXT: s_mov_b32 s23, s27
-; GFX9-NEXT: s_mov_b32 s24, s28
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[40:41]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_a15i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1454,47 +902,6 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #
; FIXME: This should also fail
define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inreg %arg0, i32 inreg %arg1) #1 {
-; GFX9-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s21, s33
-; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-NEXT: v_writelane_b32 v40, s21, 2
-; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[22:23]
-; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12
-; GFX9-NEXT: s_mov_b32 s3, s7
-; GFX9-NEXT: s_mov_b32 s2, s6
-; GFX9-NEXT: s_mov_b32 s1, s5
-; GFX9-NEXT: s_mov_b32 s0, s4
-; GFX9-NEXT: s_mov_b32 s4, s8
-; GFX9-NEXT: s_mov_b32 s5, s9
-; GFX9-NEXT: s_mov_b32 s6, s10
-; GFX9-NEXT: s_mov_b32 s7, s11
-; GFX9-NEXT: s_mov_b32 s8, s15
-; GFX9-NEXT: s_mov_b32 s9, s16
-; GFX9-NEXT: s_mov_b32 s10, s17
-; GFX9-NEXT: s_mov_b32 s11, s18
-; GFX9-NEXT: s_mov_b32 s15, s19
-; GFX9-NEXT: s_mov_b32 s16, s20
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23]
-; GFX9-NEXT: v_readlane_b32 s31, v40, 1
-; GFX9-NEXT: v_readlane_b32 s30, v40, 0
-; GFX9-NEXT: s_mov_b32 s32, s33
-; GFX9-NEXT: v_readlane_b32 s4, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-NEXT: s_mov_b32 s33, s4
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
; GFX11-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1529,3 +936,6 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre
attributes #0 = { nounwind }
attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GISEL: {{.*}}
+; SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 832e43f1e197..c407f7645315 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -1,10 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=CI %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn -global-isel=0 -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=HSA %s
+; RUN: llc -mtriple=amdgcn -global-isel=1 -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GISEL %s
declare hidden void @external_void_func_i1(i1) #0
declare hidden void @external_void_func_i1_signext(i1 signext) #0
@@ -100,24 +101,24 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i1_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 1
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i1_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 1
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i1_imm:
; GFX11: ; %bb.0:
@@ -145,6 +146,25 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i1_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_i1(i1 true)
ret void
}
@@ -196,28 +216,28 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i1_signext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i1_signext:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: v_bfe_i32 v0, v0, 0, 1
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i1_signext:
; GFX11: ; %bb.0:
@@ -253,6 +273,29 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; HSA-NEXT: v_bfe_i32 v0, v0, 0, 1
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i1_signext:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s2, -1
+; GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GISEL-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: v_bfe_i32 v0, v0, 0, 1
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%var = load volatile i1, ptr addrspace(1) poison
call void @external_void_func_i1_signext(i1 signext %var)
ret void
@@ -306,28 +349,28 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i1_zeroext:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i1_zeroext:
; GFX11: ; %bb.0:
@@ -363,6 +406,29 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; HSA-NEXT: v_and_b32_e32 v0, 1, v0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i1_zeroext:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s2, -1
+; GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GISEL-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%var = load volatile i1, ptr addrspace(1) poison
call void @external_void_func_i1_zeroext(i1 zeroext %var)
ret void
@@ -407,24 +473,24 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i8_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i8_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0x7b
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_imm:
; GFX11-TRUE16: ; %bb.0:
@@ -463,6 +529,25 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i8_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_i8(i8 123)
ret void
}
@@ -513,27 +598,27 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i8_signext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i8_signext:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i8_signext:
; GFX11: ; %bb.0:
@@ -567,6 +652,28 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i8_signext:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s2, -1
+; GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GISEL-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%var = load volatile i8, ptr addrspace(1) poison
call void @external_void_func_i8_signext(i8 signext %var)
ret void
@@ -617,27 +724,27 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i8_zeroext:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i8_zeroext:
; GFX11: ; %bb.0:
@@ -671,6 +778,28 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i8_zeroext:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s2, -1
+; GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GISEL-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%var = load volatile i8, ptr addrspace(1) poison
call void @external_void_func_i8_zeroext(i8 zeroext %var)
ret void
@@ -715,24 +844,24 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i16_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i16_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0x7b
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-TRUE16-LABEL: test_call_external_void_func_i16_imm:
; GFX11-TRUE16: ; %bb.0:
@@ -771,6 +900,25 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i16_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_i16(i16 123)
ret void
}
@@ -820,27 +968,27 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i16_signext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i16_signext:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i16_signext:
; GFX11: ; %bb.0:
@@ -874,6 +1022,28 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i16_signext:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s2, -1
+; GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GISEL-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%var = load volatile i16, ptr addrspace(1) poison
call void @external_void_func_i16_signext(i16 signext %var)
ret void
@@ -924,27 +1094,27 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i16_zeroext:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i16_zeroext:
; GFX11: ; %bb.0:
@@ -978,6 +1148,28 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i16_zeroext:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s2, -1
+; GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GISEL-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%var = load volatile i16, ptr addrspace(1) poison
call void @external_void_func_i16_zeroext(i16 zeroext %var)
ret void
@@ -1022,24 +1214,24 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 42
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 42
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i32_imm:
; GFX11: ; %bb.0:
@@ -1067,6 +1259,25 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 42
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_i32(i32 42)
ret void
}
@@ -1112,25 +1323,25 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_i64_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i64_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0x7b
+; SDAG-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i64_imm:
; GFX11: ; %bb.0:
@@ -1159,6 +1370,26 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i64_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_i64(i64 123)
ret void
}
@@ -1208,27 +1439,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v2i64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], 0
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2i64:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], 0
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v2i64:
; GFX11: ; %bb.0:
@@ -1262,6 +1493,31 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2i64:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], 0
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GISEL-NEXT: v_mov_b32_e32 v3, s3
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%val = load <2 x i64>, ptr addrspace(1) null
call void @external_void_func_v2i64(<2 x i64> %val)
ret void
@@ -1312,27 +1568,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 1
-; GFX9-NEXT: v_mov_b32_e32 v1, 2
-; GFX9-NEXT: v_mov_b32_e32 v2, 3
-; GFX9-NEXT: v_mov_b32_e32 v3, 4
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2i64_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 1
+; SDAG-NEXT: v_mov_b32_e32 v1, 2
+; SDAG-NEXT: v_mov_b32_e32 v2, 3
+; SDAG-NEXT: v_mov_b32_e32 v3, 4
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v2i64_imm:
; GFX11: ; %bb.0:
@@ -1364,6 +1620,28 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2i64_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GISEL-NEXT: v_mov_b32_e32 v1, 2
+; GISEL-NEXT: v_mov_b32_e32 v2, 3
+; GISEL-NEXT: v_mov_b32_e32 v3, 4
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
ret void
}
@@ -1417,29 +1695,29 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v3i64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], 0
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v4, 1
-; GFX9-NEXT: v_mov_b32_e32 v5, 2
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3i64:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], 0
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v4, 1
+; SDAG-NEXT: v_mov_b32_e32 v5, 2
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v3i64:
; GFX11: ; %bb.0:
@@ -1476,6 +1754,33 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3i64:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], 0
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v4, 1
+; GISEL-NEXT: v_mov_b32_e32 v5, 2
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GISEL-NEXT: v_mov_b32_e32 v3, s3
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%load = load <2 x i64>, ptr addrspace(1) null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
@@ -1536,31 +1841,31 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v4i64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], 0
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v4, 1
-; GFX9-NEXT: v_mov_b32_e32 v5, 2
-; GFX9-NEXT: v_mov_b32_e32 v6, 3
-; GFX9-NEXT: v_mov_b32_e32 v7, 4
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v4i64:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], 0
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v4, 1
+; SDAG-NEXT: v_mov_b32_e32 v5, 2
+; SDAG-NEXT: v_mov_b32_e32 v6, 3
+; SDAG-NEXT: v_mov_b32_e32 v7, 4
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v4i64:
; GFX11: ; %bb.0:
@@ -1600,6 +1905,35 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v4i64:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], 0
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v4, 1
+; GISEL-NEXT: v_mov_b32_e32 v5, 2
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GISEL-NEXT: v_mov_b32_e32 v3, s3
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: v_mov_b32_e32 v6, 3
+; GISEL-NEXT: v_mov_b32_e32 v7, 4
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%load = load <2 x i64>, ptr addrspace(1) null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void @external_void_func_v4i64(<4 x i64> %val)
@@ -1645,24 +1979,24 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_f16_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_f16_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0x4400
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-TRUE16-LABEL: test_call_external_void_func_f16_imm:
; GFX11-TRUE16: ; %bb.0:
@@ -1701,6 +2035,25 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_f16_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x4400
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_f16(half 4.0)
ret void
}
@@ -1744,24 +2097,24 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_f32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 4.0
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_f32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 4.0
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_f32_imm:
; GFX11: ; %bb.0:
@@ -1789,6 +2142,25 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_f32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 4.0
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_f32(float 4.0)
ret void
}
@@ -1834,25 +2206,25 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2f32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 1.0
+; SDAG-NEXT: v_mov_b32_e32 v1, 2.0
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v2f32_imm:
; GFX11: ; %bb.0:
@@ -1881,6 +2253,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2f32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 1.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 2.0
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
ret void
}
@@ -1928,26 +2320,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
-; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3f32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 1.0
+; SDAG-NEXT: v_mov_b32_e32 v1, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v2, 4.0
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v3f32_imm:
; GFX11: ; %bb.0:
@@ -1978,6 +2370,27 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3f32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 1.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v2, 4.0
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
ret void
}
@@ -2029,28 +2442,28 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
-; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
-; GFX9-NEXT: v_mov_b32_e32 v3, -1.0
-; GFX9-NEXT: v_mov_b32_e32 v4, 0.5
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v5f32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 1.0
+; SDAG-NEXT: v_mov_b32_e32 v1, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v2, 4.0
+; SDAG-NEXT: v_mov_b32_e32 v3, -1.0
+; SDAG-NEXT: v_mov_b32_e32 v4, 0.5
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v5f32_imm:
; GFX11: ; %bb.0:
@@ -2084,6 +2497,29 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v5f32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 1.0
+; GISEL-NEXT: v_mov_b32_e32 v1, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v2, 4.0
+; GISEL-NEXT: v_mov_b32_e32 v3, -1.0
+; GISEL-NEXT: v_mov_b32_e32 v4, 0.5
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
ret void
}
@@ -2129,25 +2565,25 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_f64_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_f64_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v1, 0x40100000
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_f64_imm:
; GFX11: ; %bb.0:
@@ -2176,6 +2612,26 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_f64_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0x40100000
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_f64(double 4.0)
ret void
}
@@ -2225,27 +2681,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2f64_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v1, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-NEXT: v_mov_b32_e32 v3, 0x40100000
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v2f64_imm:
; GFX11: ; %bb.0:
@@ -2277,6 +2733,28 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2f64_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mov_b32_e32 v1, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-NEXT: v_mov_b32_e32 v3, 0x40100000
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
ret void
}
@@ -2330,29 +2808,29 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3f64_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v1, 2.0
+; SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-NEXT: v_mov_b32_e32 v3, 0x40100000
+; SDAG-NEXT: v_mov_b32_e32 v4, 0
+; SDAG-NEXT: v_mov_b32_e32 v5, 0x40200000
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v3f64_imm:
; GFX11: ; %bb.0:
@@ -2387,6 +2865,30 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3f64_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mov_b32_e32 v1, 2.0
+; GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-NEXT: v_mov_b32_e32 v3, 0x40100000
+; GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GISEL-NEXT: v_mov_b32_e32 v5, 0x40200000
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
ret void
}
@@ -2436,26 +2938,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v2i16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2i16:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v2i16:
; GFX11: ; %bb.0:
@@ -2487,6 +2989,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2i16:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_load_dword s8, s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s8
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%val = load <2 x i16>, ptr addrspace(1) poison
call void @external_void_func_v2i16(<2 x i16> %val)
ret void
@@ -2539,26 +3062,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v3i16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3i16:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v3i16:
; GFX11: ; %bb.0:
@@ -2590,6 +3113,28 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3i16:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%val = load <3 x i16>, ptr addrspace(1) poison
call void @external_void_func_v3i16(<3 x i16> %val)
ret void
@@ -2643,26 +3188,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v3f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3f16:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v3f16:
; GFX11: ; %bb.0:
@@ -2694,6 +3239,28 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3f16:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%val = load <3 x half>, ptr addrspace(1) poison
call void @external_void_func_v3f16(<3 x half> %val)
ret void
@@ -2741,25 +3308,25 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
-; GFX9-NEXT: v_mov_b32_e32 v1, 3
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3i16_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0x20001
+; SDAG-NEXT: v_mov_b32_e32 v1, 3
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v3i16_imm:
; GFX11: ; %bb.0:
@@ -2788,6 +3355,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3i16_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x20001
+; GISEL-NEXT: v_mov_b32_e32 v1, 3
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
ret void
}
@@ -2834,25 +3421,25 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3f16_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0x40003c00
+; SDAG-NEXT: v_mov_b32_e32 v1, 0x4400
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v3f16_imm:
; GFX11: ; %bb.0:
@@ -2882,6 +3469,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3f16_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x40003c00
+; GISEL-NEXT: v_mov_b32_e32 v1, 0x4400
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
ret void
}
@@ -2934,26 +3541,26 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v4i16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v4i16:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v4i16:
; GFX11: ; %bb.0:
@@ -2985,6 +3592,28 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v4i16:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%val = load <4 x i16>, ptr addrspace(1) poison
call void @external_void_func_v4i16(<4 x i16> %val)
ret void
@@ -3033,25 +3662,25 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v4i16_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0x20001
+; SDAG-NEXT: v_mov_b32_e32 v1, 0x40003
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v4i16_imm:
; GFX11: ; %bb.0:
@@ -3081,6 +3710,26 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v4i16_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x20001
+; GISEL-NEXT: v_mov_b32_e32 v1, 0x40003
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
ret void
}
@@ -3132,26 +3781,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v2f16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2f16:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v2f16:
; GFX11: ; %bb.0:
@@ -3183,6 +3832,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2f16:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_load_dword s8, s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s8
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%val = load <2 x half>, ptr addrspace(1) poison
call void @external_void_func_v2f16(<2 x half> %val)
ret void
@@ -3231,26 +3901,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v2i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2i32:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v2i32:
; GFX11: ; %bb.0:
@@ -3282,6 +3952,28 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%val = load <2 x i32>, ptr addrspace(1) poison
call void @external_void_func_v2i32(<2 x i32> %val)
ret void
@@ -3328,25 +4020,25 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 1
-; GFX9-NEXT: v_mov_b32_e32 v1, 2
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2i32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 1
+; SDAG-NEXT: v_mov_b32_e32 v1, 2
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v2i32_imm:
; GFX11: ; %bb.0:
@@ -3375,6 +4067,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2i32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GISEL-NEXT: v_mov_b32_e32 v1, 2
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
ret void
}
@@ -3422,26 +4134,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 3
-; GFX9-NEXT: v_mov_b32_e32 v1, 4
-; GFX9-NEXT: v_mov_b32_e32 v2, 5
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3i32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 3
+; SDAG-NEXT: v_mov_b32_e32 v1, 4
+; SDAG-NEXT: v_mov_b32_e32 v2, 5
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v3i32_imm:
; GFX11: ; %bb.0:
@@ -3472,6 +4184,27 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3i32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 3
+; GISEL-NEXT: v_mov_b32_e32 v1, 4
+; GISEL-NEXT: v_mov_b32_e32 v2, 5
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
ret void
}
@@ -3521,27 +4254,27 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 3
-; GFX9-NEXT: v_mov_b32_e32 v1, 4
-; GFX9-NEXT: v_mov_b32_e32 v2, 5
-; GFX9-NEXT: v_mov_b32_e32 v3, 6
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3i32_i32:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 3
+; SDAG-NEXT: v_mov_b32_e32 v1, 4
+; SDAG-NEXT: v_mov_b32_e32 v2, 5
+; SDAG-NEXT: v_mov_b32_e32 v3, 6
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v3i32_i32:
; GFX11: ; %bb.0:
@@ -3573,6 +4306,28 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3i32_i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 3
+; GISEL-NEXT: v_mov_b32_e32 v1, 4
+; GISEL-NEXT: v_mov_b32_e32 v2, 5
+; GISEL-NEXT: v_mov_b32_e32 v3, 6
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
ret void
}
@@ -3620,26 +4375,26 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v4i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v4i32:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v4i32:
; GFX11: ; %bb.0:
@@ -3671,6 +4426,30 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v4i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GISEL-NEXT: v_mov_b32_e32 v3, s3
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%val = load <4 x i32>, ptr addrspace(1) poison
call void @external_void_func_v4i32(<4 x i32> %val)
ret void
@@ -3721,27 +4500,27 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 1
-; GFX9-NEXT: v_mov_b32_e32 v1, 2
-; GFX9-NEXT: v_mov_b32_e32 v2, 3
-; GFX9-NEXT: v_mov_b32_e32 v3, 4
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v4i32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 1
+; SDAG-NEXT: v_mov_b32_e32 v1, 2
+; SDAG-NEXT: v_mov_b32_e32 v2, 3
+; SDAG-NEXT: v_mov_b32_e32 v3, 4
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v4i32_imm:
; GFX11: ; %bb.0:
@@ -3773,6 +4552,28 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v4i32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GISEL-NEXT: v_mov_b32_e32 v1, 2
+; GISEL-NEXT: v_mov_b32_e32 v2, 3
+; GISEL-NEXT: v_mov_b32_e32 v3, 4
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
ret void
}
@@ -3824,28 +4625,28 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 1
-; GFX9-NEXT: v_mov_b32_e32 v1, 2
-; GFX9-NEXT: v_mov_b32_e32 v2, 3
-; GFX9-NEXT: v_mov_b32_e32 v3, 4
-; GFX9-NEXT: v_mov_b32_e32 v4, 5
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v5i32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 1
+; SDAG-NEXT: v_mov_b32_e32 v1, 2
+; SDAG-NEXT: v_mov_b32_e32 v2, 3
+; SDAG-NEXT: v_mov_b32_e32 v3, 4
+; SDAG-NEXT: v_mov_b32_e32 v4, 5
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v5i32_imm:
; GFX11: ; %bb.0:
@@ -3879,6 +4680,29 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v5i32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GISEL-NEXT: v_mov_b32_e32 v1, 2
+; GISEL-NEXT: v_mov_b32_e32 v2, 3
+; GISEL-NEXT: v_mov_b32_e32 v3, 4
+; GISEL-NEXT: v_mov_b32_e32 v4, 5
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
ret void
}
@@ -3932,29 +4756,29 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v8i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v8i32:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v8i32:
; GFX11: ; %bb.0:
@@ -3993,6 +4817,36 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v8i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_load_dwordx8 s[8:15], s[0:1], 0x0
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s8
+; GISEL-NEXT: v_mov_b32_e32 v1, s9
+; GISEL-NEXT: v_mov_b32_e32 v2, s10
+; GISEL-NEXT: v_mov_b32_e32 v3, s11
+; GISEL-NEXT: v_mov_b32_e32 v4, s12
+; GISEL-NEXT: v_mov_b32_e32 v5, s13
+; GISEL-NEXT: v_mov_b32_e32 v6, s14
+; GISEL-NEXT: v_mov_b32_e32 v7, s15
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
%val = load <8 x i32>, ptr addrspace(1) %ptr
call void @external_void_func_v8i32(<8 x i32> %val)
@@ -4052,31 +4906,31 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: v_mov_b32_e32 v0, 1
-; GFX9-NEXT: v_mov_b32_e32 v1, 2
-; GFX9-NEXT: v_mov_b32_e32 v2, 3
-; GFX9-NEXT: v_mov_b32_e32 v3, 4
-; GFX9-NEXT: v_mov_b32_e32 v4, 5
-; GFX9-NEXT: v_mov_b32_e32 v5, 6
-; GFX9-NEXT: v_mov_b32_e32 v6, 7
-; GFX9-NEXT: v_mov_b32_e32 v7, 8
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v8i32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b32_e32 v0, 1
+; SDAG-NEXT: v_mov_b32_e32 v1, 2
+; SDAG-NEXT: v_mov_b32_e32 v2, 3
+; SDAG-NEXT: v_mov_b32_e32 v3, 4
+; SDAG-NEXT: v_mov_b32_e32 v4, 5
+; SDAG-NEXT: v_mov_b32_e32 v5, 6
+; SDAG-NEXT: v_mov_b32_e32 v6, 7
+; SDAG-NEXT: v_mov_b32_e32 v7, 8
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v8i32_imm:
; GFX11: ; %bb.0:
@@ -4114,6 +4968,32 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v8i32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GISEL-NEXT: v_mov_b32_e32 v1, 2
+; GISEL-NEXT: v_mov_b32_e32 v2, 3
+; GISEL-NEXT: v_mov_b32_e32 v3, 4
+; GISEL-NEXT: v_mov_b32_e32 v4, 5
+; GISEL-NEXT: v_mov_b32_e32 v5, 6
+; GISEL-NEXT: v_mov_b32_e32 v6, 7
+; GISEL-NEXT: v_mov_b32_e32 v7, 8
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
ret void
}
@@ -4171,31 +5051,31 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v16i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
-; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v16i32:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; SDAG-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
+; SDAG-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v16i32:
; GFX11: ; %bb.0:
@@ -4238,6 +5118,44 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v16i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x0
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s8
+; GISEL-NEXT: v_mov_b32_e32 v1, s9
+; GISEL-NEXT: v_mov_b32_e32 v2, s10
+; GISEL-NEXT: v_mov_b32_e32 v3, s11
+; GISEL-NEXT: v_mov_b32_e32 v4, s12
+; GISEL-NEXT: v_mov_b32_e32 v5, s13
+; GISEL-NEXT: v_mov_b32_e32 v6, s14
+; GISEL-NEXT: v_mov_b32_e32 v7, s15
+; GISEL-NEXT: v_mov_b32_e32 v8, s16
+; GISEL-NEXT: v_mov_b32_e32 v9, s17
+; GISEL-NEXT: v_mov_b32_e32 v10, s18
+; GISEL-NEXT: v_mov_b32_e32 v11, s19
+; GISEL-NEXT: v_mov_b32_e32 v12, s20
+; GISEL-NEXT: v_mov_b32_e32 v13, s21
+; GISEL-NEXT: v_mov_b32_e32 v14, s22
+; GISEL-NEXT: v_mov_b32_e32 v15, s23
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
%val = load <16 x i32>, ptr addrspace(1) %ptr
call void @external_void_func_v16i32(<16 x i32> %val)
@@ -4309,37 +5227,37 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[8:9]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v32i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s7, 0xf000
-; GFX9-NEXT: s_mov_b32 s6, -1
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
-; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
-; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
-; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
-; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
-; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
-; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_getpc_b64 s[8:9]
-; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v32i32:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SDAG-NEXT: s_mov_b32 s6, -1
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
+; SDAG-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
+; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
+; SDAG-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
+; SDAG-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
+; SDAG-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
+; SDAG-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_getpc_b64 s[8:9]
+; SDAG-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_waitcnt vmcnt(6)
+; SDAG-NEXT: buffer_store_dword v31, off, s[36:39], s32
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v32i32:
; GFX11: ; %bb.0:
@@ -4394,6 +5312,62 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32
; HSA-NEXT: s_swappc_b64 s[30:31], s[12:13]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v32i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s54, -1
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x40
+; GISEL-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s55, 0xe00000
+; GISEL-NEXT: s_add_u32 s52, s52, s3
+; GISEL-NEXT: s_addc_u32 s53, s53, 0
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s23
+; GISEL-NEXT: s_mov_b64 s[0:1], s[52:53]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12
+; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32
+; GISEL-NEXT: v_mov_b32_e32 v0, s36
+; GISEL-NEXT: v_mov_b32_e32 v1, s37
+; GISEL-NEXT: v_mov_b32_e32 v2, s38
+; GISEL-NEXT: v_mov_b32_e32 v3, s39
+; GISEL-NEXT: v_mov_b32_e32 v4, s40
+; GISEL-NEXT: v_mov_b32_e32 v5, s41
+; GISEL-NEXT: v_mov_b32_e32 v6, s42
+; GISEL-NEXT: v_mov_b32_e32 v7, s43
+; GISEL-NEXT: v_mov_b32_e32 v8, s44
+; GISEL-NEXT: v_mov_b32_e32 v9, s45
+; GISEL-NEXT: v_mov_b32_e32 v10, s46
+; GISEL-NEXT: v_mov_b32_e32 v11, s47
+; GISEL-NEXT: v_mov_b32_e32 v12, s48
+; GISEL-NEXT: v_mov_b32_e32 v13, s49
+; GISEL-NEXT: v_mov_b32_e32 v14, s50
+; GISEL-NEXT: v_mov_b32_e32 v15, s51
+; GISEL-NEXT: v_mov_b32_e32 v16, s8
+; GISEL-NEXT: v_mov_b32_e32 v17, s9
+; GISEL-NEXT: v_mov_b32_e32 v18, s10
+; GISEL-NEXT: v_mov_b32_e32 v19, s11
+; GISEL-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GISEL-NEXT: v_mov_b32_e32 v20, s12
+; GISEL-NEXT: v_mov_b32_e32 v21, s13
+; GISEL-NEXT: v_mov_b32_e32 v22, s14
+; GISEL-NEXT: v_mov_b32_e32 v23, s15
+; GISEL-NEXT: v_mov_b32_e32 v24, s16
+; GISEL-NEXT: v_mov_b32_e32 v25, s17
+; GISEL-NEXT: v_mov_b32_e32 v26, s18
+; GISEL-NEXT: v_mov_b32_e32 v27, s19
+; GISEL-NEXT: v_mov_b32_e32 v28, s20
+; GISEL-NEXT: v_mov_b32_e32 v29, s21
+; GISEL-NEXT: v_mov_b32_e32 v30, s22
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
%val = load <32 x i32>, ptr addrspace(1) %ptr
call void @external_void_func_v32i32(<32 x i32> %val)
@@ -4471,40 +5445,40 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s7, 0xf000
-; GFX9-NEXT: s_mov_b32 s6, -1
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dword v32, off, s[4:7], 0
-; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
-; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
-; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
-; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
-; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
-; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
-; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
-; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_waitcnt vmcnt(8)
-; GFX9-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
-; GFX9-NEXT: s_waitcnt vmcnt(8)
-; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v32i32_i32:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SDAG-NEXT: s_mov_b32 s6, -1
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: buffer_load_dword v32, off, s[4:7], 0
+; SDAG-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
+; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
+; SDAG-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
+; SDAG-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
+; SDAG-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
+; SDAG-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
+; SDAG-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_waitcnt vmcnt(8)
+; SDAG-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
+; SDAG-NEXT: s_waitcnt vmcnt(8)
+; SDAG-NEXT: buffer_store_dword v31, off, s[36:39], s32
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v32i32_i32:
; GFX11: ; %bb.0:
@@ -4566,6 +5540,67 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v32i32_i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s54, -1
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x40
+; GISEL-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x0
+; GISEL-NEXT: s_load_dword s2, s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s55, 0xe00000
+; GISEL-NEXT: s_add_u32 s52, s52, s5
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_addc_u32 s53, s53, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GISEL-NEXT: ; kill: killed $sgpr0_sgpr1
+; GISEL-NEXT: ; kill: killed $sgpr0_sgpr1
+; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
+; GISEL-NEXT: v_mov_b32_e32 v0, s23
+; GISEL-NEXT: s_mov_b64 s[0:1], s[52:53]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
+; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32
+; GISEL-NEXT: v_mov_b32_e32 v0, s36
+; GISEL-NEXT: v_mov_b32_e32 v1, s37
+; GISEL-NEXT: v_mov_b32_e32 v2, s38
+; GISEL-NEXT: v_mov_b32_e32 v3, s39
+; GISEL-NEXT: v_mov_b32_e32 v4, s40
+; GISEL-NEXT: v_mov_b32_e32 v5, s41
+; GISEL-NEXT: v_mov_b32_e32 v6, s42
+; GISEL-NEXT: v_mov_b32_e32 v7, s43
+; GISEL-NEXT: v_mov_b32_e32 v8, s44
+; GISEL-NEXT: v_mov_b32_e32 v9, s45
+; GISEL-NEXT: v_mov_b32_e32 v10, s46
+; GISEL-NEXT: v_mov_b32_e32 v11, s47
+; GISEL-NEXT: v_mov_b32_e32 v12, s48
+; GISEL-NEXT: v_mov_b32_e32 v13, s49
+; GISEL-NEXT: v_mov_b32_e32 v14, s50
+; GISEL-NEXT: v_mov_b32_e32 v15, s51
+; GISEL-NEXT: v_mov_b32_e32 v16, s8
+; GISEL-NEXT: v_mov_b32_e32 v17, s9
+; GISEL-NEXT: v_mov_b32_e32 v18, s10
+; GISEL-NEXT: v_mov_b32_e32 v19, s11
+; GISEL-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GISEL-NEXT: v_mov_b32_e32 v20, s12
+; GISEL-NEXT: v_mov_b32_e32 v21, s13
+; GISEL-NEXT: v_mov_b32_e32 v22, s14
+; GISEL-NEXT: v_mov_b32_e32 v23, s15
+; GISEL-NEXT: v_mov_b32_e32 v24, s16
+; GISEL-NEXT: v_mov_b32_e32 v25, s17
+; GISEL-NEXT: v_mov_b32_e32 v26, s18
+; GISEL-NEXT: v_mov_b32_e32 v27, s19
+; GISEL-NEXT: v_mov_b32_e32 v28, s20
+; GISEL-NEXT: v_mov_b32_e32 v29, s21
+; GISEL-NEXT: v_mov_b32_e32 v30, s22
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
%val0 = load <32 x i32>, ptr addrspace(1) %ptr0
%val1 = load i32, ptr addrspace(1) poison
@@ -4622,29 +5657,29 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_i32_func_i32_imm:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: v_mov_b32_e32 v0, 42
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_mov_b32 s39, 0xf000
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_i32_func_i32_imm:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s50, -1
+; SDAG-NEXT: s_mov_b32 s51, 0xe00000
+; SDAG-NEXT: s_add_u32 s48, s48, s5
+; SDAG-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
+; SDAG-NEXT: s_addc_u32 s49, s49, 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[48:49]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[50:51]
+; SDAG-NEXT: v_mov_b32_e32 v0, 42
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_mov_b32 s39, 0xf000
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], 0
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_i32_func_i32_imm:
; GFX11: ; %bb.0:
@@ -4682,6 +5717,30 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; HSA-NEXT: buffer_store_dword v0, off, s[36:39], 0
; HSA-NEXT: s_waitcnt vmcnt(0)
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_i32_func_i32_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s50, -1
+; GISEL-NEXT: s_mov_b32 s51, 0xe00000
+; GISEL-NEXT: s_add_u32 s48, s48, s5
+; GISEL-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
+; GISEL-NEXT: s_addc_u32 s49, s49, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 42
+; GISEL-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xf000
+; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], 0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_endpgm
%val = call i32 @external_i32_func_i32(i32 42)
store volatile i32 %val, ptr addrspace(1) %out
ret void
@@ -4736,29 +5795,29 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
-; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_struct_i8_i32:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
+; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_struct_i8_i32:
; GFX11: ; %bb.0:
@@ -4797,6 +5856,30 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; HSA-NEXT: s_mov_b32 s32, 0
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_struct_i8_i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[8:9]
+; GISEL-NEXT: s_add_u32 s8, s8, external_void_func_struct_i8_i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s9, s9, external_void_func_struct_i8_i32@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s4
+; GISEL-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; GISEL-NEXT: s_endpgm
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
%val = load { i8, i32 }, ptr addrspace(1) %ptr0
call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
@@ -4860,34 +5943,34 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: v_mov_b32_e32 v0, 3
-; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
-; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_movk_i32 s32, 0x400
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_byval_struct_i8_i32:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: v_mov_b32_e32 v0, 3
+; SDAG-NEXT: buffer_store_byte v0, off, s[36:39], 0
+; SDAG-NEXT: v_mov_b32_e32 v0, 8
+; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
+; SDAG-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_movk_i32 s32, 0x400
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_waitcnt vmcnt(1)
+; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
+; SDAG-NEXT: s_waitcnt vmcnt(1)
+; SDAG-NEXT: buffer_store_dword v1, off, s[36:39], s32
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-TRUE16-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; GFX11-TRUE16: ; %bb.0:
@@ -4948,6 +6031,35 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; HSA-NEXT: buffer_store_dword v1, off, s[0:3], s32
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_byval_struct_i8_i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: v_mov_b32_e32 v0, 3
+; GISEL-NEXT: buffer_store_byte v0, off, s[36:39], 0
+; GISEL-NEXT: v_mov_b32_e32 v0, 8
+; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
+; GISEL-NEXT: buffer_load_dword v0, off, s[36:39], 0
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:4
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_movk_i32 s32, 0x400
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_waitcnt vmcnt(1)
+; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], s32
+; GISEL-NEXT: s_waitcnt vmcnt(1)
+; GISEL-NEXT: buffer_store_dword v1, off, s[36:39], s32 offset:4
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%val = alloca { i8, i32 }, align 8, addrspace(5)
%gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
%gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
@@ -5034,44 +6146,44 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: v_mov_b32_e32 v0, 3
-; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
-; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0
-; GFX9-NEXT: s_movk_i32 s32, 0x800
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32
-; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
-; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s5
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: v_mov_b32_e32 v0, 3
+; SDAG-NEXT: buffer_store_byte v0, off, s[36:39], 0
+; SDAG-NEXT: v_mov_b32_e32 v0, 8
+; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
+; SDAG-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; SDAG-NEXT: s_movk_i32 s32, 0x800
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_waitcnt vmcnt(1)
+; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
+; SDAG-NEXT: s_waitcnt vmcnt(1)
+; SDAG-NEXT: buffer_store_dword v1, off, s[36:39], s32
+; SDAG-NEXT: v_mov_b32_e32 v0, 8
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
+; SDAG-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: s_waitcnt vmcnt(1)
+; SDAG-NEXT: buffer_store_byte v0, off, s[0:3], 0
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_endpgm
;
; GFX11-TRUE16-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; GFX11-TRUE16: ; %bb.0:
@@ -5170,6 +6282,45 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; HSA-NEXT: buffer_store_dword v1, off, s[4:7], 0
; HSA-NEXT: s_waitcnt vmcnt(0)
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s5
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: v_mov_b32_e32 v0, 3
+; GISEL-NEXT: buffer_store_byte v0, off, s[36:39], 0
+; GISEL-NEXT: v_mov_b32_e32 v0, 8
+; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
+; GISEL-NEXT: buffer_load_dword v0, off, s[36:39], 0
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:4
+; GISEL-NEXT: s_movk_i32 s32, 0x800
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_waitcnt vmcnt(1)
+; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], s32
+; GISEL-NEXT: s_waitcnt vmcnt(1)
+; GISEL-NEXT: buffer_store_dword v1, off, s[36:39], s32 offset:4
+; GISEL-NEXT: v_mov_b32_e32 v0, 8
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
+; GISEL-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
+; GISEL-NEXT: s_mov_b32 s2, -1
+; GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GISEL-NEXT: s_waitcnt vmcnt(1)
+; GISEL-NEXT: buffer_store_byte v0, off, s[0:3], 0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_endpgm
%in.val = alloca { i8, i32 }, align 8, addrspace(5)
%out.val = alloca { i8, i32 }, align 8, addrspace(5)
%in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
@@ -5272,47 +6423,47 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: test_call_external_void_func_v16i8:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0
-; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0
-; GFX9-NEXT: v_lshrrev_b32_e32 v18, 24, v0
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
-; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
-; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
-; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
-; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
-; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
-; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3
-; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3
-; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3
-; GFX9-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-NEXT: v_mov_b32_e32 v8, v2
-; GFX9-NEXT: v_mov_b32_e32 v12, v3
-; GFX9-NEXT: v_mov_b32_e32 v1, v16
-; GFX9-NEXT: v_mov_b32_e32 v2, v17
-; GFX9-NEXT: v_mov_b32_e32 v3, v18
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v16i8:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s38, -1
+; SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; SDAG-NEXT: s_add_u32 s36, s36, s3
+; SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SDAG-NEXT: s_mov_b32 s2, -1
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT: s_addc_u32 s37, s37, 0
+; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: v_lshrrev_b32_e32 v16, 8, v0
+; SDAG-NEXT: v_lshrrev_b32_e32 v17, 16, v0
+; SDAG-NEXT: v_lshrrev_b32_e32 v18, 24, v0
+; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v1
+; SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1
+; SDAG-NEXT: v_lshrrev_b32_e32 v7, 24, v1
+; SDAG-NEXT: v_lshrrev_b32_e32 v9, 8, v2
+; SDAG-NEXT: v_lshrrev_b32_e32 v10, 16, v2
+; SDAG-NEXT: v_lshrrev_b32_e32 v11, 24, v2
+; SDAG-NEXT: v_lshrrev_b32_e32 v13, 8, v3
+; SDAG-NEXT: v_lshrrev_b32_e32 v14, 16, v3
+; SDAG-NEXT: v_lshrrev_b32_e32 v15, 24, v3
+; SDAG-NEXT: v_mov_b32_e32 v4, v1
+; SDAG-NEXT: v_mov_b32_e32 v8, v2
+; SDAG-NEXT: v_mov_b32_e32 v12, v3
+; SDAG-NEXT: v_mov_b32_e32 v1, v16
+; SDAG-NEXT: v_mov_b32_e32 v2, v17
+; SDAG-NEXT: v_mov_b32_e32 v3, v18
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_v16i8:
; GFX11: ; %bb.0:
@@ -5384,6 +6535,56 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; HSA-NEXT: v_mov_b32_e32 v3, v18
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v16i8:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s38, -1
+; GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GISEL-NEXT: s_add_u32 s36, s36, s3
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_lshr_b32 s8, s0, 8
+; GISEL-NEXT: s_lshr_b32 s9, s0, 16
+; GISEL-NEXT: s_lshr_b32 s10, s0, 24
+; GISEL-NEXT: s_lshr_b32 s11, s1, 8
+; GISEL-NEXT: s_lshr_b32 s12, s1, 16
+; GISEL-NEXT: s_lshr_b32 s13, s1, 24
+; GISEL-NEXT: s_lshr_b32 s14, s2, 8
+; GISEL-NEXT: s_lshr_b32 s15, s2, 16
+; GISEL-NEXT: s_lshr_b32 s16, s2, 24
+; GISEL-NEXT: s_lshr_b32 s17, s3, 8
+; GISEL-NEXT: s_lshr_b32 s18, s3, 16
+; GISEL-NEXT: s_lshr_b32 s19, s3, 24
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v4, s1
+; GISEL-NEXT: v_mov_b32_e32 v8, s2
+; GISEL-NEXT: v_mov_b32_e32 v12, s3
+; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v1, s8
+; GISEL-NEXT: v_mov_b32_e32 v2, s9
+; GISEL-NEXT: v_mov_b32_e32 v3, s10
+; GISEL-NEXT: v_mov_b32_e32 v5, s11
+; GISEL-NEXT: v_mov_b32_e32 v6, s12
+; GISEL-NEXT: v_mov_b32_e32 v7, s13
+; GISEL-NEXT: v_mov_b32_e32 v9, s14
+; GISEL-NEXT: v_mov_b32_e32 v10, s15
+; GISEL-NEXT: v_mov_b32_e32 v11, s16
+; GISEL-NEXT: v_mov_b32_e32 v13, s17
+; GISEL-NEXT: v_mov_b32_e32 v14, s18
+; GISEL-NEXT: v_mov_b32_e32 v15, s19
+; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
%val = load <16 x i8>, ptr addrspace(1) %ptr
call void @external_void_func_v16i8(<16 x i8> %val)
@@ -5509,64 +6710,64 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
-; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s54, -1
-; GFX9-NEXT: s_mov_b32 s55, 0xe00000
-; GFX9-NEXT: s_add_u32 s52, s52, s5
-; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4
-; GFX9-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
-; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_addc_u32 s53, s53, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s23
-; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32
-; GFX9-NEXT: v_mov_b32_e32 v0, s4
-; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
-; GFX9-NEXT: v_mov_b32_e32 v0, s5
-; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[54:55]
-; GFX9-NEXT: v_mov_b32_e32 v0, s36
-; GFX9-NEXT: v_mov_b32_e32 v1, s37
-; GFX9-NEXT: v_mov_b32_e32 v2, s38
-; GFX9-NEXT: v_mov_b32_e32 v3, s39
-; GFX9-NEXT: v_mov_b32_e32 v4, s40
-; GFX9-NEXT: v_mov_b32_e32 v5, s41
-; GFX9-NEXT: v_mov_b32_e32 v6, s42
-; GFX9-NEXT: v_mov_b32_e32 v7, s43
-; GFX9-NEXT: v_mov_b32_e32 v8, s44
-; GFX9-NEXT: v_mov_b32_e32 v9, s45
-; GFX9-NEXT: v_mov_b32_e32 v10, s46
-; GFX9-NEXT: v_mov_b32_e32 v11, s47
-; GFX9-NEXT: v_mov_b32_e32 v12, s48
-; GFX9-NEXT: v_mov_b32_e32 v13, s49
-; GFX9-NEXT: v_mov_b32_e32 v14, s50
-; GFX9-NEXT: v_mov_b32_e32 v15, s51
-; GFX9-NEXT: v_mov_b32_e32 v16, s8
-; GFX9-NEXT: v_mov_b32_e32 v17, s9
-; GFX9-NEXT: v_mov_b32_e32 v18, s10
-; GFX9-NEXT: v_mov_b32_e32 v19, s11
-; GFX9-NEXT: v_mov_b32_e32 v20, s12
-; GFX9-NEXT: v_mov_b32_e32 v21, s13
-; GFX9-NEXT: v_mov_b32_e32 v22, s14
-; GFX9-NEXT: v_mov_b32_e32 v23, s15
-; GFX9-NEXT: v_mov_b32_e32 v24, s16
-; GFX9-NEXT: v_mov_b32_e32 v25, s17
-; GFX9-NEXT: v_mov_b32_e32 v26, s18
-; GFX9-NEXT: v_mov_b32_e32 v27, s19
-; GFX9-NEXT: v_mov_b32_e32 v28, s20
-; GFX9-NEXT: v_mov_b32_e32 v29, s21
-; GFX9-NEXT: v_mov_b32_e32 v30, s22
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: s_endpgm
+; SDAG-LABEL: stack_passed_arg_alignment_v32i32_f64:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT: s_mov_b32 s54, -1
+; SDAG-NEXT: s_mov_b32 s55, 0xe00000
+; SDAG-NEXT: s_add_u32 s52, s52, s5
+; SDAG-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
+; SDAG-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4
+; SDAG-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_addc_u32 s53, s53, 0
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: v_mov_b32_e32 v0, s23
+; SDAG-NEXT: buffer_store_dword v0, off, s[52:55], s32
+; SDAG-NEXT: v_mov_b32_e32 v0, s4
+; SDAG-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
+; SDAG-NEXT: v_mov_b32_e32 v0, s5
+; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT: s_mov_b64 s[0:1], s[52:53]
+; SDAG-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[2:3], s[54:55]
+; SDAG-NEXT: v_mov_b32_e32 v0, s36
+; SDAG-NEXT: v_mov_b32_e32 v1, s37
+; SDAG-NEXT: v_mov_b32_e32 v2, s38
+; SDAG-NEXT: v_mov_b32_e32 v3, s39
+; SDAG-NEXT: v_mov_b32_e32 v4, s40
+; SDAG-NEXT: v_mov_b32_e32 v5, s41
+; SDAG-NEXT: v_mov_b32_e32 v6, s42
+; SDAG-NEXT: v_mov_b32_e32 v7, s43
+; SDAG-NEXT: v_mov_b32_e32 v8, s44
+; SDAG-NEXT: v_mov_b32_e32 v9, s45
+; SDAG-NEXT: v_mov_b32_e32 v10, s46
+; SDAG-NEXT: v_mov_b32_e32 v11, s47
+; SDAG-NEXT: v_mov_b32_e32 v12, s48
+; SDAG-NEXT: v_mov_b32_e32 v13, s49
+; SDAG-NEXT: v_mov_b32_e32 v14, s50
+; SDAG-NEXT: v_mov_b32_e32 v15, s51
+; SDAG-NEXT: v_mov_b32_e32 v16, s8
+; SDAG-NEXT: v_mov_b32_e32 v17, s9
+; SDAG-NEXT: v_mov_b32_e32 v18, s10
+; SDAG-NEXT: v_mov_b32_e32 v19, s11
+; SDAG-NEXT: v_mov_b32_e32 v20, s12
+; SDAG-NEXT: v_mov_b32_e32 v21, s13
+; SDAG-NEXT: v_mov_b32_e32 v22, s14
+; SDAG-NEXT: v_mov_b32_e32 v23, s15
+; SDAG-NEXT: v_mov_b32_e32 v24, s16
+; SDAG-NEXT: v_mov_b32_e32 v25, s17
+; SDAG-NEXT: v_mov_b32_e32 v26, s18
+; SDAG-NEXT: v_mov_b32_e32 v27, s19
+; SDAG-NEXT: v_mov_b32_e32 v28, s20
+; SDAG-NEXT: v_mov_b32_e32 v29, s21
+; SDAG-NEXT: v_mov_b32_e32 v30, s22
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64:
; GFX11: ; %bb.0: ; %entry
@@ -5662,6 +6863,65 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; HSA-NEXT: v_mov_b32_e32 v30, s22
; HSA-NEXT: s_swappc_b64 s[30:31], s[24:25]
; HSA-NEXT: s_endpgm
+;
+; GISEL-LABEL: stack_passed_arg_alignment_v32i32_f64:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT: s_mov_b32 s54, -1
+; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
+; GISEL-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xa4
+; GISEL-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
+; GISEL-NEXT: s_mov_b32 s55, 0xe00000
+; GISEL-NEXT: s_add_u32 s52, s52, s5
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_addc_u32 s53, s53, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v0, s23
+; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
+; GISEL-NEXT: v_mov_b32_e32 v0, s1
+; GISEL-NEXT: s_mov_b64 s[0:1], s[52:53]
+; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, s36
+; GISEL-NEXT: v_mov_b32_e32 v1, s37
+; GISEL-NEXT: v_mov_b32_e32 v2, s38
+; GISEL-NEXT: v_mov_b32_e32 v3, s39
+; GISEL-NEXT: v_mov_b32_e32 v4, s40
+; GISEL-NEXT: v_mov_b32_e32 v5, s41
+; GISEL-NEXT: v_mov_b32_e32 v6, s42
+; GISEL-NEXT: v_mov_b32_e32 v7, s43
+; GISEL-NEXT: v_mov_b32_e32 v8, s44
+; GISEL-NEXT: v_mov_b32_e32 v9, s45
+; GISEL-NEXT: v_mov_b32_e32 v10, s46
+; GISEL-NEXT: v_mov_b32_e32 v11, s47
+; GISEL-NEXT: v_mov_b32_e32 v12, s48
+; GISEL-NEXT: v_mov_b32_e32 v13, s49
+; GISEL-NEXT: v_mov_b32_e32 v14, s50
+; GISEL-NEXT: v_mov_b32_e32 v15, s51
+; GISEL-NEXT: v_mov_b32_e32 v16, s8
+; GISEL-NEXT: v_mov_b32_e32 v17, s9
+; GISEL-NEXT: v_mov_b32_e32 v18, s10
+; GISEL-NEXT: v_mov_b32_e32 v19, s11
+; GISEL-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GISEL-NEXT: v_mov_b32_e32 v20, s12
+; GISEL-NEXT: v_mov_b32_e32 v21, s13
+; GISEL-NEXT: v_mov_b32_e32 v22, s14
+; GISEL-NEXT: v_mov_b32_e32 v23, s15
+; GISEL-NEXT: v_mov_b32_e32 v24, s16
+; GISEL-NEXT: v_mov_b32_e32 v25, s17
+; GISEL-NEXT: v_mov_b32_e32 v26, s18
+; GISEL-NEXT: v_mov_b32_e32 v27, s19
+; GISEL-NEXT: v_mov_b32_e32 v28, s20
+; GISEL-NEXT: v_mov_b32_e32 v29, s21
+; GISEL-NEXT: v_mov_b32_e32 v30, s22
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: s_endpgm
entry:
call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
ret void
@@ -5702,22 +6962,22 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
; CI-NEXT: s_setpc_b64 s[4:5]
;
-; GFX9-LABEL: tail_call_byval_align16:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
-; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
-; GFX9-NEXT: s_waitcnt vmcnt(2)
-; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; SDAG-LABEL: tail_call_byval_align16:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; SDAG-NEXT: buffer_load_dword v32, off, s[0:3], s32
+; SDAG-NEXT: s_getpc_b64 s[4:5]
+; SDAG-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
+; SDAG-NEXT: s_waitcnt vmcnt(1)
+; SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
+; SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
+; SDAG-NEXT: s_waitcnt vmcnt(2)
+; SDAG-NEXT: buffer_store_dword v32, off, s[0:3], s32
+; SDAG-NEXT: s_waitcnt vmcnt(1)
+; SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
+; SDAG-NEXT: s_setpc_b64 s[4:5]
;
; GFX11-LABEL: tail_call_byval_align16:
; GFX11: ; %bb.0: ; %entry
@@ -5749,6 +7009,23 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; HSA-NEXT: s_waitcnt vmcnt(1)
; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
; HSA-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-LABEL: tail_call_byval_align16:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GISEL-NEXT: s_getpc_b64 s[4:5]
+; GISEL-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32
+; GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20
+; GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16
+; GISEL-NEXT: s_setpc_b64 s[4:5]
entry:
%alloca = alloca double, align 8, addrspace(5)
tail call void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca)
diff --git a/llvm/test/CodeGen/AMDGPU/call-c-function.ll b/llvm/test/CodeGen/AMDGPU/call-c-function.ll
index e1bb3eab25ef..4fbc7271ba0c 100644
--- a/llvm/test/CodeGen/AMDGPU/call-c-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-c-function.ll
@@ -1,21 +1,68 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel=0 -stop-after=finalize-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel=0 -stop-after=finalize-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=SDAG -enable-var-scope %s
+; RUN: llc -global-isel=1 -stop-after=finalize-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GISEL -enable-var-scope %s
; Test that we don't explode on calls from shaders to functions with the C calling convention.
define amdgpu_ps void @amdgpu_ps_call_default_cc() {
- ; CHECK-LABEL: name: amdgpu_ps_call_default_cc
- ; CHECK: bb.0.main_body:
- ; CHECK-NEXT: S_ENDPGM 0
+ ; SDAG-LABEL: name: amdgpu_ps_call_default_cc
+ ; SDAG: bb.0.main_body:
+ ; SDAG-NEXT: S_ENDPGM 0
+ ;
+ ; GISEL-LABEL: name: amdgpu_ps_call_default_cc
+ ; GISEL: bb.1.main_body:
+ ; GISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GISEL-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GISEL-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GISEL-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GISEL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GISEL-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]]
+ ; GISEL-NEXT: $sgpr4_sgpr5 = COPY [[DEF]]
+ ; GISEL-NEXT: $sgpr6_sgpr7 = COPY [[DEF]]
+ ; GISEL-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
+ ; GISEL-NEXT: $sgpr8_sgpr9 = COPY [[S_MOV_B]]
+ ; GISEL-NEXT: $sgpr10_sgpr11 = COPY [[DEF1]]
+ ; GISEL-NEXT: $sgpr12 = COPY [[DEF2]]
+ ; GISEL-NEXT: $sgpr13 = COPY [[DEF2]]
+ ; GISEL-NEXT: $sgpr14 = COPY [[DEF2]]
+ ; GISEL-NEXT: $sgpr15 = COPY [[DEF2]]
+ ; GISEL-NEXT: $vgpr31 = COPY [[DEF2]]
+ ; GISEL-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
+ ; GISEL-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_MOV_B1]], 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GISEL-NEXT: S_ENDPGM 0
main_body:
call void null()
ret void
}
define amdgpu_gfx void @amdgpu_gfx_call_default_cc() {
- ; CHECK-LABEL: name: amdgpu_gfx_call_default_cc
- ; CHECK: bb.0.main_body:
- ; CHECK-NEXT: SI_RETURN
+ ; SDAG-LABEL: name: amdgpu_gfx_call_default_cc
+ ; SDAG: bb.0.main_body:
+ ; SDAG-NEXT: SI_RETURN
+ ;
+ ; GISEL-LABEL: name: amdgpu_gfx_call_default_cc
+ ; GISEL: bb.1.main_body:
+ ; GISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GISEL-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GISEL-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GISEL-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GISEL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GISEL-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]]
+ ; GISEL-NEXT: $sgpr4_sgpr5 = COPY [[DEF]]
+ ; GISEL-NEXT: $sgpr6_sgpr7 = COPY [[DEF]]
+ ; GISEL-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
+ ; GISEL-NEXT: $sgpr8_sgpr9 = COPY [[S_MOV_B]]
+ ; GISEL-NEXT: $sgpr10_sgpr11 = COPY [[DEF1]]
+ ; GISEL-NEXT: $sgpr12 = COPY [[DEF2]]
+ ; GISEL-NEXT: $sgpr13 = COPY [[DEF2]]
+ ; GISEL-NEXT: $sgpr14 = COPY [[DEF2]]
+ ; GISEL-NEXT: $sgpr15 = COPY [[DEF2]]
+ ; GISEL-NEXT: $vgpr31 = COPY [[DEF2]]
+ ; GISEL-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
+ ; GISEL-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_MOV_B1]], 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GISEL-NEXT: SI_RETURN
main_body:
call void null()
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
index 5f324df30f7e..fe0b0188d2d3 100644
--- a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
@@ -1,84 +1,341 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,GISEL %s
-; GCN-LABEL: {{^}}test_bitcast_return_type_noinline:
-; GCN: s_getpc_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@lo+4
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@hi+12
-; GCN: s_swappc_b64
define amdgpu_kernel void @test_bitcast_return_type_noinline() #0 {
+; SDAG-LABEL: test_bitcast_return_type_noinline:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT: s_add_i32 s12, s12, s17
+; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT: s_add_u32 s0, s0, s17
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: s_mov_b32 s13, s15
+; SDAG-NEXT: s_mov_b32 s12, s14
+; SDAG-NEXT: s_getpc_b64 s[18:19]
+; SDAG-NEXT: s_add_u32 s18, s18, ret_i32_noinline@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s19, s19, ret_i32_noinline@rel32@hi+12
+; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT: v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT: s_mov_b32 s14, s16
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0
+; SDAG-NEXT: flat_store_dword v[0:1], v0
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_bitcast_return_type_noinline:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT: s_add_i32 s12, s12, s17
+; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT: s_add_u32 s0, s0, s17
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: s_mov_b32 s13, s15
+; GISEL-NEXT: s_mov_b32 s12, s14
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT: v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT: s_getpc_b64 s[18:19]
+; GISEL-NEXT: s_add_u32 s18, s18, ret_i32_noinline@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s19, s19, ret_i32_noinline@rel32@hi+12
+; GISEL-NEXT: s_mov_b32 s14, s16
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GISEL-NEXT: flat_store_dword v[0:1], v0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_endpgm
%val = call float @ret_i32_noinline()
%op = fadd float %val, 1.0
store volatile float %op, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}test_bitcast_return_type_alwaysinline:
-; GCN: s_swappc_b64
define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() #0 {
+; SDAG-LABEL: test_bitcast_return_type_alwaysinline:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT: s_add_i32 s12, s12, s17
+; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT: s_add_u32 s0, s0, s17
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: s_mov_b32 s13, s15
+; SDAG-NEXT: s_mov_b32 s12, s14
+; SDAG-NEXT: s_getpc_b64 s[18:19]
+; SDAG-NEXT: s_add_u32 s18, s18, ret_i32_alwaysinline@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s19, s19, ret_i32_alwaysinline@rel32@hi+12
+; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT: v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT: s_mov_b32 s14, s16
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0
+; SDAG-NEXT: flat_store_dword v[0:1], v0
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_bitcast_return_type_alwaysinline:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT: s_add_i32 s12, s12, s17
+; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT: s_add_u32 s0, s0, s17
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: s_mov_b32 s13, s15
+; GISEL-NEXT: s_mov_b32 s12, s14
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT: v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT: s_getpc_b64 s[18:19]
+; GISEL-NEXT: s_add_u32 s18, s18, ret_i32_alwaysinline@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s19, s19, ret_i32_alwaysinline@rel32@hi+12
+; GISEL-NEXT: s_mov_b32 s14, s16
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GISEL-NEXT: flat_store_dword v[0:1], v0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_endpgm
%val = call float @ret_i32_alwaysinline()
%op = fadd float %val, 1.0
store volatile float %op, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}test_bitcast_argument_type:
-; GCN: s_getpc_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12
-; GCN: s_swappc_b64
define amdgpu_kernel void @test_bitcast_argument_type() #0 {
+; SDAG-LABEL: test_bitcast_argument_type:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT: s_add_i32 s12, s12, s17
+; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT: s_add_u32 s0, s0, s17
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: s_mov_b32 s13, s15
+; SDAG-NEXT: s_mov_b32 s12, s14
+; SDAG-NEXT: s_getpc_b64 s[18:19]
+; SDAG-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12
+; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT: v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT: v_mov_b32_e32 v0, 2.0
+; SDAG-NEXT: s_mov_b32 s14, s16
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; SDAG-NEXT: flat_store_dword v[0:1], v0
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_bitcast_argument_type:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT: s_add_i32 s12, s12, s17
+; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT: s_add_u32 s0, s0, s17
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: s_mov_b32 s13, s15
+; GISEL-NEXT: s_mov_b32 s12, s14
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GISEL-NEXT: s_getpc_b64 s[18:19]
+; GISEL-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12
+; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT: v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT: v_mov_b32_e32 v0, 2.0
+; GISEL-NEXT: s_mov_b32 s14, s16
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GISEL-NEXT: flat_store_dword v[0:1], v0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_endpgm
%val = call i32 @ident_i32(float 2.0)
%op = add i32 %val, 1
store volatile i32 %op, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}test_bitcast_argument_and_return_types:
-; GCN: s_getpc_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12
-; GCN: s_swappc_b64
define amdgpu_kernel void @test_bitcast_argument_and_return_types() #0 {
+; SDAG-LABEL: test_bitcast_argument_and_return_types:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT: s_add_i32 s12, s12, s17
+; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT: s_add_u32 s0, s0, s17
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: s_mov_b32 s13, s15
+; SDAG-NEXT: s_mov_b32 s12, s14
+; SDAG-NEXT: s_getpc_b64 s[18:19]
+; SDAG-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12
+; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT: v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT: v_mov_b32_e32 v0, 2.0
+; SDAG-NEXT: s_mov_b32 s14, s16
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0
+; SDAG-NEXT: flat_store_dword v[0:1], v0
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_bitcast_argument_and_return_types:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT: s_add_i32 s12, s12, s17
+; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT: s_add_u32 s0, s0, s17
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: s_mov_b32 s13, s15
+; GISEL-NEXT: s_mov_b32 s12, s14
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GISEL-NEXT: s_getpc_b64 s[18:19]
+; GISEL-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12
+; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT: v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT: v_mov_b32_e32 v0, 2.0
+; GISEL-NEXT: s_mov_b32 s14, s16
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GISEL-NEXT: flat_store_dword v[0:1], v0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_endpgm
%val = call float @ident_i32(float 2.0)
%op = fadd float %val, 1.0
store volatile float %op, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}use_workitem_id_x:
-; GCN: s_waitcnt
-; GCN-NEXT: v_and_b32_e32 [[TMP:v[0-9]+]], 0x3ff, v31
-; GCN-NEXT: v_add_i32_e32 v0, vcc, [[TMP]], v0
-; GCN-NEXT: s_setpc_b64
define hidden i32 @use_workitem_id_x(i32 %arg0) #3 {
+; GCN-LABEL: use_workitem_id_x:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 0x3ff, v31
+; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
%id = call i32 @llvm.amdgcn.workitem.id.x()
%op = add i32 %id, %arg0
ret i32 %op
}
-; GCN-LABEL: {{^}}test_bitcast_use_workitem_id_x:
-; GCN: v_mov_b32_e32 v31, v0
-; GCN: s_getpc_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x@rel32@lo+4
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x@rel32@hi+12
-; GCN: v_mov_b32_e32 v0, 9
-; GCN: s_swappc_b64
-; GCN: v_add_f32_e32
define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #3 {
+; SDAG-LABEL: test_bitcast_use_workitem_id_x:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT: s_add_i32 s12, s12, s17
+; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT: s_add_u32 s0, s0, s17
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: s_mov_b32 s13, s15
+; SDAG-NEXT: s_mov_b32 s12, s14
+; SDAG-NEXT: v_mov_b32_e32 v31, v0
+; SDAG-NEXT: s_getpc_b64 s[18:19]
+; SDAG-NEXT: s_add_u32 s18, s18, use_workitem_id_x@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s19, s19, use_workitem_id_x@rel32@hi+12
+; SDAG-NEXT: v_mov_b32_e32 v0, 9
+; SDAG-NEXT: s_mov_b32 s14, s16
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0
+; SDAG-NEXT: flat_store_dword v[0:1], v0
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_bitcast_use_workitem_id_x:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT: s_add_i32 s12, s12, s17
+; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT: s_add_u32 s0, s0, s17
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: v_mov_b32_e32 v31, v0
+; GISEL-NEXT: s_mov_b32 s13, s15
+; GISEL-NEXT: s_mov_b32 s12, s14
+; GISEL-NEXT: s_getpc_b64 s[18:19]
+; GISEL-NEXT: s_add_u32 s18, s18, use_workitem_id_x@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s19, s19, use_workitem_id_x@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 9
+; GISEL-NEXT: s_mov_b32 s14, s16
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GISEL-NEXT: flat_store_dword v[0:1], v0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_endpgm
%val = call float @use_workitem_id_x(i32 9)
%op = fadd float %val, 1.0
store volatile float %op, ptr addrspace(1) poison
ret void
}
-; GCN-LABEL: {{^}}test_invoke:
-; GCN: s_getpc_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12
-; GCN: s_swappc_b64
@_ZTIi = external global ptr
declare i32 @__gxx_personality_v0(...)
define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v0 {
+; SDAG-LABEL: test_invoke:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT: s_add_i32 s12, s12, s17
+; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT: s_add_u32 s0, s0, s17
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: s_mov_b32 s13, s15
+; SDAG-NEXT: s_mov_b32 s12, s14
+; SDAG-NEXT: s_getpc_b64 s[18:19]
+; SDAG-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12
+; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT: v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT: v_mov_b32_e32 v0, 2.0
+; SDAG-NEXT: s_mov_b32 s14, s16
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0
+; SDAG-NEXT: flat_store_dword v[0:1], v0
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_invoke:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT: s_add_i32 s12, s12, s17
+; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT: s_add_u32 s0, s0, s17
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: s_mov_b32 s13, s15
+; GISEL-NEXT: s_mov_b32 s12, s14
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GISEL-NEXT: s_getpc_b64 s[18:19]
+; GISEL-NEXT: s_add_u32 s18, s18, ident_i32@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s19, s19, ident_i32@rel32@hi+12
+; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT: v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT: v_mov_b32_e32 v0, 2.0
+; GISEL-NEXT: s_mov_b32 s14, s16
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GISEL-NEXT: flat_store_dword v[0:1], v0
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_endpgm
%val = invoke float @ident_i32(float 2.0)
to label %continue unwind label %broken
@@ -96,14 +353,28 @@ continue:
; arguments before we lower any calls to them.
define hidden i32 @ret_i32_noinline() #0 {
+; GCN-LABEL: ret_i32_noinline:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, 4
+; GCN-NEXT: s_setpc_b64 s[30:31]
ret i32 4
}
define hidden i32 @ret_i32_alwaysinline() #1 {
+; GCN-LABEL: ret_i32_alwaysinline:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, 4
+; GCN-NEXT: s_setpc_b64 s[30:31]
ret i32 4
}
define hidden i32 @ident_i32(i32 %i) #0 {
+; GCN-LABEL: ident_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
ret i32 %i
}
diff --git a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
index ffe536d347c5..4b5a49fc0c2e 100644
--- a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck -check-prefixes=SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck -check-prefixes=GISEL %s
; Check that call / asm get an implicit-def $mode added to them in
; strictfp functions.
@@ -7,46 +8,80 @@
declare protected void @maybe_defs_mode() #0
define float @call_changes_mode(float %x, float %y) #0 {
- ; CHECK-LABEL: name: call_changes_mode
- ; CHECK: bb.0 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $mode
- ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
- ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ ; SDAG-LABEL: name: call_changes_mode
+ ; SDAG: bb.0 (%ir-block.0):
+ ; SDAG-NEXT: liveins: $vgpr0, $vgpr1
+ ; SDAG-NEXT: {{ $}}
+ ; SDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; SDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SDAG-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; SDAG-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc
+ ; SDAG-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; SDAG-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
+ ; SDAG-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $mode
+ ; SDAG-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; SDAG-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; SDAG-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
+ ; SDAG-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GISEL-LABEL: name: call_changes_mode
+ ; GISEL: bb.1 (%ir-block.0):
+ ; GISEL-NEXT: liveins: $vgpr0, $vgpr1
+ ; GISEL-NEXT: {{ $}}
+ ; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GISEL-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GISEL-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
+ ; GISEL-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def $scc
+ ; GISEL-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GISEL-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GISEL-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
+ ; GISEL-NEXT: SI_RETURN implicit $vgpr0
call void @maybe_defs_mode()
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret float %val
}
define void @tail_call_changes_mode() #0 {
- ; CHECK-LABEL: name: tail_call_changes_mode
- ; CHECK: bb.0 (%ir-block.0):
- ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:ccr_sgpr_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc
- ; CHECK-NEXT: SI_TCRETURN killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, 0, csr_amdgpu, implicit-def $mode
+ ; SDAG-LABEL: name: tail_call_changes_mode
+ ; SDAG: bb.0 (%ir-block.0):
+ ; SDAG-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:ccr_sgpr_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc
+ ; SDAG-NEXT: SI_TCRETURN killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, 0, csr_amdgpu, implicit-def $mode
+ ;
+ ; GISEL-LABEL: name: tail_call_changes_mode
+ ; GISEL: bb.1 (%ir-block.0):
+ ; GISEL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GISEL-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]]
+ ; GISEL-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:ccr_sgpr_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def $scc
+ ; GISEL-NEXT: SI_TCRETURN [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3
tail call void @maybe_defs_mode()
ret void
}
define float @asm_changes_mode(float %x, float %y) #0 {
- ; CHECK-LABEL: name: asm_changes_mode
- ; CHECK: bb.0 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
- ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
- ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ ; SDAG-LABEL: name: asm_changes_mode
+ ; SDAG: bb.0 (%ir-block.0):
+ ; SDAG-NEXT: liveins: $vgpr0, $vgpr1
+ ; SDAG-NEXT: {{ $}}
+ ; SDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; SDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+ ; SDAG-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; SDAG-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
+ ; SDAG-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GISEL-LABEL: name: asm_changes_mode
+ ; GISEL: bb.1 (%ir-block.0):
+ ; GISEL-NEXT: liveins: $vgpr0, $vgpr1
+ ; GISEL-NEXT: {{ $}}
+ ; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+ ; GISEL-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GISEL-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
+ ; GISEL-NEXT: SI_RETURN implicit $vgpr0
call void asm sideeffect "; maybe defs mode", ""()
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret float %val
diff --git a/llvm/test/CodeGen/AMDGPU/call-encoding.ll b/llvm/test/CodeGen/AMDGPU/call-encoding.ll
index 6954c340ca28..6c36c2424a66 100644
--- a/llvm/test/CodeGen/AMDGPU/call-encoding.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-encoding.ll
@@ -1,5 +1,7 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=fiji -d - | FileCheck --check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx900 -d - | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=fiji -d - | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx900 -d - | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=fiji -d - | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx900 -d - | FileCheck --check-prefix=GCN %s
; XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=hawaii -d - | FileCheck --check-prefixes=GCN,CI %s
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index 4df10497bcd2..b250227735bd 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -1,8 +1,13 @@
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN,CI %s
-; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN,CI %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN,CI %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
; Make sure to run a GPU with the SGPR allocation bug.
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index 61a195f9c314..aed107915815 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR,GISEL %s
declare hidden void @external_void_func_void() #3
@@ -223,41 +227,6 @@ define hidden void @void_func_void_clobber_vcc() #2 {
}
define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 {
-; FLATSCR-LABEL: test_call_void_func_void_clobber_vcc:
-; FLATSCR: ; %bb.0:
-; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
-; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
-; FLATSCR-NEXT: s_add_u32 s8, s4, 8
-; FLATSCR-NEXT: s_addc_u32 s9, s5, 0
-; FLATSCR-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; FLATSCR-NEXT: s_mov_b32 s14, s12
-; FLATSCR-NEXT: s_mov_b32 s13, s11
-; FLATSCR-NEXT: s_mov_b32 s12, s10
-; FLATSCR-NEXT: s_mov_b64 s[10:11], s[6:7]
-; FLATSCR-NEXT: s_getpc_b64 s[16:17]
-; FLATSCR-NEXT: s_add_u32 s16, s16, void_func_void_clobber_vcc@rel32@lo+4
-; FLATSCR-NEXT: s_addc_u32 s17, s17, void_func_void_clobber_vcc@rel32@hi+12
-; FLATSCR-NEXT: v_or3_b32 v31, v0, v1, v2
-; FLATSCR-NEXT: s_mov_b64 s[4:5], s[0:1]
-; FLATSCR-NEXT: s_mov_b64 s[6:7], s[2:3]
-; FLATSCR-NEXT: s_mov_b32 s32, 0
-; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; def vcc
-; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: s_mov_b64 s[34:35], vcc
-; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; FLATSCR-NEXT: global_load_dword v0, v[0:1], off glc
-; FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; FLATSCR-NEXT: s_mov_b64 vcc, s[34:35]
-; FLATSCR-NEXT: global_load_dword v0, v[0:1], off glc
-; FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; FLATSCR-NEXT: ; kill: killed $vgpr0_vgpr1
-; FLATSCR-NEXT: ; kill: killed $vgpr0_vgpr1
-; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; use vcc
-; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: s_endpgm
%vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
call void @void_func_void_clobber_vcc()
%val0 = load volatile i32, ptr addrspace(1) poison
@@ -463,51 +432,11 @@ define hidden void @void_func_void_clobber_s34() #2 {
}
define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 {
-; FLATSCR-LABEL: test_call_void_func_void_clobber_s33:
-; FLATSCR: ; %bb.0:
-; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
-; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
-; FLATSCR-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; FLATSCR-NEXT: s_mov_b32 s14, s12
-; FLATSCR-NEXT: s_mov_b32 s13, s11
-; FLATSCR-NEXT: s_mov_b32 s12, s10
-; FLATSCR-NEXT: s_mov_b64 s[10:11], s[6:7]
-; FLATSCR-NEXT: s_mov_b64 s[8:9], s[4:5]
-; FLATSCR-NEXT: s_getpc_b64 s[16:17]
-; FLATSCR-NEXT: s_add_u32 s16, s16, void_func_void_clobber_s33@rel32@lo+4
-; FLATSCR-NEXT: s_addc_u32 s17, s17, void_func_void_clobber_s33@rel32@hi+12
-; FLATSCR-NEXT: v_or3_b32 v31, v0, v1, v2
-; FLATSCR-NEXT: s_mov_b64 s[4:5], s[0:1]
-; FLATSCR-NEXT: s_mov_b64 s[6:7], s[2:3]
-; FLATSCR-NEXT: s_mov_b32 s32, 0
-; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; FLATSCR-NEXT: s_endpgm
call void @void_func_void_clobber_s33()
ret void
}
define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 {
-; FLATSCR-LABEL: test_call_void_func_void_clobber_s34:
-; FLATSCR: ; %bb.0:
-; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
-; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
-; FLATSCR-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; FLATSCR-NEXT: s_mov_b32 s14, s12
-; FLATSCR-NEXT: s_mov_b32 s13, s11
-; FLATSCR-NEXT: s_mov_b32 s12, s10
-; FLATSCR-NEXT: s_mov_b64 s[10:11], s[6:7]
-; FLATSCR-NEXT: s_mov_b64 s[8:9], s[4:5]
-; FLATSCR-NEXT: s_getpc_b64 s[16:17]
-; FLATSCR-NEXT: s_add_u32 s16, s16, void_func_void_clobber_s34@rel32@lo+4
-; FLATSCR-NEXT: s_addc_u32 s17, s17, void_func_void_clobber_s34@rel32@hi+12
-; FLATSCR-NEXT: v_or3_b32 v31, v0, v1, v2
-; FLATSCR-NEXT: s_mov_b64 s[4:5], s[0:1]
-; FLATSCR-NEXT: s_mov_b64 s[6:7], s[2:3]
-; FLATSCR-NEXT: s_mov_b32 s32, 0
-; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; FLATSCR-NEXT: s_endpgm
call void @void_func_void_clobber_s34()
ret void
}
@@ -748,3 +677,6 @@ attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind noinline }
attributes #3 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GISEL: {{.*}}
+; SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/call-return-types.ll b/llvm/test/CodeGen/AMDGPU/call-return-types.ll
index c0f74fd85f0e..21c3696ae98a 100644
--- a/llvm/test/CodeGen/AMDGPU/call-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-return-types.ll
@@ -1,7 +1,12 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
+
+; Ideally, we would also like to test GlobalISel with gfx11 but we are currently blocked on llvm-project#166501.
declare void @external_void_func_void() #0
diff --git a/llvm/test/CodeGen/AMDGPU/call-skip.ll b/llvm/test/CodeGen/AMDGPU/call-skip.ll
index ea2bba1673a0..e2ca278d687b 100644
--- a/llvm/test/CodeGen/AMDGPU/call-skip.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-skip.ll
@@ -1,4 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=0 -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=1 -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GISEL %s
; A call should be skipped if all lanes are zero, since we don't know
; what side effects should be avoided inside the call.
@@ -6,12 +8,37 @@ define hidden void @func() #1 {
ret void
}
-; GCN-LABEL: {{^}}if_call:
-; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
-; GCN: s_swappc_b64
-; GCN: [[END]]:
define void @if_call(i32 %flag) #0 {
+; GCN-LABEL: if_call:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s20, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_xor_saveexec_b64 s[16:17], -1
+; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[16:17]
+; GCN-NEXT: v_writelane_b32 v1, s30, 0
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v1, s31, 1
+; GCN-NEXT: s_and_saveexec_b64 s[16:17], vcc
+; GCN-NEXT: s_cbranch_execz .LBB1_2
+; GCN-NEXT: ; %bb.1: ; %call
+; GCN-NEXT: s_getpc_b64 s[18:19]
+; GCN-NEXT: s_add_u32 s18, s18, func@rel32@lo+4
+; GCN-NEXT: s_addc_u32 s19, s19, func@rel32@hi+12
+; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GCN-NEXT: .LBB1_2: ; %end
+; GCN-NEXT: s_or_b64 exec, exec, s[16:17]
+; GCN-NEXT: v_readlane_b32 s31, v1, 1
+; GCN-NEXT: v_readlane_b32 s30, v1, 0
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
+; GCN-NEXT: s_mov_b32 s33, s20
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%cc = icmp eq i32 %flag, 0
br i1 %cc, label %call, label %end
@@ -23,12 +50,20 @@ end:
ret void
}
-; GCN-LABEL: {{^}}if_asm:
-; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
-; GCN: ; sample asm
-; GCN: [[END]]:
define void @if_asm(i32 %flag) #0 {
+; GCN-LABEL: if_asm:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GCN-NEXT: s_cbranch_execz .LBB2_2
+; GCN-NEXT: ; %bb.1: ; %call
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; sample asm
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: .LBB2_2: ; %end
+; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
+; GCN-NEXT: s_setpc_b64 s[30:31]
%cc = icmp eq i32 %flag, 0
br i1 %cc, label %call, label %end
@@ -40,11 +75,58 @@ end:
ret void
}
-; GCN-LABEL: {{^}}if_call_kernel:
-; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz .LBB3_2
-; GCN: s_swappc_b64
define amdgpu_kernel void @if_call_kernel() #0 {
+; SDAG-LABEL: if_call_kernel:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_add_i32 s12, s12, s17
+; SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT: s_add_u32 s0, s0, s17
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc
+; SDAG-NEXT: s_cbranch_execz .LBB3_2
+; SDAG-NEXT: ; %bb.1: ; %call
+; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT: s_getpc_b64 s[18:19]
+; SDAG-NEXT: s_add_u32 s18, s18, func@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s19, s19, func@rel32@hi+12
+; SDAG-NEXT: v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT: s_mov_b32 s12, s14
+; SDAG-NEXT: s_mov_b32 s13, s15
+; SDAG-NEXT: s_mov_b32 s14, s16
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT: .LBB3_2: ; %end
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: if_call_kernel:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_add_i32 s12, s12, s17
+; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT: s_add_u32 s0, s0, s17
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT: s_cbranch_execz .LBB3_2
+; GISEL-NEXT: ; %bb.1: ; %call
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
+; GISEL-NEXT: s_getpc_b64 s[18:19]
+; GISEL-NEXT: s_add_u32 s18, s18, func@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s19, s19, func@rel32@hi+12
+; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
+; GISEL-NEXT: s_mov_b32 s12, s14
+; GISEL-NEXT: s_mov_b32 s13, s15
+; GISEL-NEXT: s_mov_b32 s14, s16
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT: .LBB3_2: ; %end
+; GISEL-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
%cc = icmp eq i32 %id, 0
br i1 %cc, label %call, label %end
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 675acd0eedfc..a52942cae169 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GISEL %s
; Load argument depends on waitcnt which should be skipped.
define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 {
@@ -27,24 +28,43 @@ define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 {
; Memory waitcnt with no register dependence on the call
define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
-; GCN-LABEL: call_memory_no_dep:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
-; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
-; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
-; GCN-NEXT: s_add_u32 s0, s0, s11
-; GCN-NEXT: s_addc_u32 s1, s1, 0
-; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: s_getpc_b64 s[8:9]
-; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
-; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: global_store_dword v0, v0, s[6:7]
-; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: s_mov_b32 s32, 0
-; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT: s_endpgm
+; SDAG-LABEL: call_memory_no_dep:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
+; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11
+; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; SDAG-NEXT: s_add_u32 s0, s0, s11
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_getpc_b64 s[8:9]
+; SDAG-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: global_store_dword v0, v0, s[6:7]
+; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: call_memory_no_dep:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11
+; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GISEL-NEXT: s_add_u32 s0, s0, s11
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: s_getpc_b64 s[8:9]
+; GISEL-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: global_store_dword v0, v0, s[6:7]
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; GISEL-NEXT: s_endpgm
store i32 0, ptr addrspace(1) %ptr
call void @func(i32 0)
ret void
@@ -52,46 +72,82 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
; Should not wait after the call before memory
define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #0 {
-; GCN-LABEL: call_no_wait_after_call:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
-; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
-; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
-; GCN-NEXT: s_add_u32 s0, s0, s11
-; GCN-NEXT: s_addc_u32 s1, s1, 0
-; GCN-NEXT: s_getpc_b64 s[8:9]
-; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
-; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
-; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: s_mov_b32 s32, 0
-; GCN-NEXT: v_mov_b32_e32 v40, 0
-; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT: global_store_dword v40, v40, s[34:35]
-; GCN-NEXT: s_endpgm
+; SDAG-LABEL: call_no_wait_after_call:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11
+; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; SDAG-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
+; SDAG-NEXT: s_add_u32 s0, s0, s11
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: s_getpc_b64 s[8:9]
+; SDAG-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: v_mov_b32_e32 v40, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; SDAG-NEXT: global_store_dword v40, v40, s[34:35]
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: call_no_wait_after_call:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11
+; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GISEL-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
+; GISEL-NEXT: s_add_u32 s0, s0, s11
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: s_getpc_b64 s[8:9]
+; GISEL-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: global_store_dword v0, v0, s[34:35]
+; GISEL-NEXT: s_endpgm
call void @func(i32 0)
store i32 0, ptr addrspace(1) %ptr
ret void
}
define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %ptr, i32) #0 {
-; GCN-LABEL: call_no_wait_after_call_return_val:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
-; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
-; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
-; GCN-NEXT: s_add_u32 s0, s0, s11
-; GCN-NEXT: s_addc_u32 s1, s1, 0
-; GCN-NEXT: s_getpc_b64 s[8:9]
-; GCN-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
-; GCN-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
-; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: s_mov_b32 s32, 0
-; GCN-NEXT: v_mov_b32_e32 v40, 0
-; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT: global_store_dword v40, v0, s[34:35]
-; GCN-NEXT: s_endpgm
+; SDAG-LABEL: call_no_wait_after_call_return_val:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11
+; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; SDAG-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
+; SDAG-NEXT: s_add_u32 s0, s0, s11
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: s_getpc_b64 s[8:9]
+; SDAG-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
+; SDAG-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
+; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: v_mov_b32_e32 v40, 0
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; SDAG-NEXT: global_store_dword v40, v0, s[34:35]
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: call_no_wait_after_call_return_val:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11
+; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GISEL-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
+; GISEL-NEXT: s_add_u32 s0, s0, s11
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: s_getpc_b64 s[8:9]
+; GISEL-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: global_store_dword v1, v0, s[34:35]
+; GISEL-NEXT: s_endpgm
%rv = call i32 @func.return(i32 0)
store i32 %rv, ptr addrspace(1) %ptr
ret void
@@ -99,22 +155,39 @@ define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %
; Need to wait for the address dependency
define amdgpu_kernel void @call_got_load(ptr addrspace(1) %ptr, i32) #0 {
-; GCN-LABEL: call_got_load:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
-; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
-; GCN-NEXT: s_add_u32 s0, s0, s11
-; GCN-NEXT: s_addc_u32 s1, s1, 0
-; GCN-NEXT: s_getpc_b64 s[6:7]
-; GCN-NEXT: s_add_u32 s6, s6, got.func@gotpcrel32@lo+4
-; GCN-NEXT: s_addc_u32 s7, s7, got.func@gotpcrel32@hi+12
-; GCN-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0
-; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: s_mov_b32 s32, 0
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT: s_endpgm
+; SDAG-LABEL: call_got_load:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11
+; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; SDAG-NEXT: s_add_u32 s0, s0, s11
+; SDAG-NEXT: s_addc_u32 s1, s1, 0
+; SDAG-NEXT: s_getpc_b64 s[6:7]
+; SDAG-NEXT: s_add_u32 s6, s6, got.func@gotpcrel32@lo+4
+; SDAG-NEXT: s_addc_u32 s7, s7, got.func@gotpcrel32@hi+12
+; SDAG-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0
+; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: s_mov_b32 s32, 0
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: call_got_load:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11
+; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GISEL-NEXT: s_add_u32 s0, s0, s11
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: s_getpc_b64 s[6:7]
+; GISEL-NEXT: s_add_u32 s6, s6, got.func@gotpcrel32@lo+4
+; GISEL-NEXT: s_addc_u32 s7, s7, got.func@gotpcrel32@hi+12
+; GISEL-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
+; GISEL-NEXT: s_endpgm
call void @got.func(i32 0)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
index b96de173dc8c..8d05317162e9 100644
--- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
+++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
@@ -702,8 +702,6 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
; CISI-NEXT: s_mov_b32 s10, -1
; CISI-NEXT: s_waitcnt lgkmcnt(0)
; CISI-NEXT: s_add_u32 s4, s4, s6
-; CISI-NEXT: s_cselect_b64 s[12:13], -1, 0
-; CISI-NEXT: s_or_b32 s6, s12, s13
; CISI-NEXT: s_addc_u32 s5, s5, s7
; CISI-NEXT: s_mov_b32 s8, s0
; CISI-NEXT: s_mov_b32 s9, s1
@@ -1674,8 +1672,6 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
; CISI-NEXT: s_mov_b32 s10, -1
; CISI-NEXT: s_waitcnt lgkmcnt(0)
; CISI-NEXT: s_sub_u32 s4, s4, s6
-; CISI-NEXT: s_cselect_b64 s[12:13], -1, 0
-; CISI-NEXT: s_or_b32 s6, s12, s13
; CISI-NEXT: s_subb_u32 s5, s5, s7
; CISI-NEXT: s_mov_b32 s8, s0
; CISI-NEXT: s_mov_b32 s9, s1
diff --git a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
index dbdea8e3c533..71af21a11c2c 100644
--- a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
+++ b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
@@ -12,8 +12,6 @@ define i32 @s_add_co_select_user() {
; GFX7-NEXT: s_load_dword s6, s[4:5], 0x0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_add_u32 s7, s6, s6
-; GFX7-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GFX7-NEXT: s_or_b32 s4, s4, s5
; GFX7-NEXT: s_addc_u32 s8, s6, 0
; GFX7-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], exec
@@ -88,15 +86,13 @@ bb:
define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
; GFX7-LABEL: s_add_co_br_user:
; GFX7: ; %bb.0: ; %bb
-; GFX7-NEXT: s_load_dword s2, s[8:9], 0x0
+; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0
; GFX7-NEXT: s_add_i32 s12, s12, s17
; GFX7-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GFX7-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-NEXT: s_add_u32 s0, s2, s2
-; GFX7-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GFX7-NEXT: s_or_b32 s0, s0, s1
-; GFX7-NEXT: s_addc_u32 s0, s2, 0
+; GFX7-NEXT: s_add_u32 s1, s0, s0
+; GFX7-NEXT: s_addc_u32 s0, s0, 0
; GFX7-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX7-NEXT: s_andn2_b64 vcc, exec, s[0:1]
; GFX7-NEXT: s_cbranch_vccnz .LBB1_2
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
index b8f084d5f82a..db32135939a5 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
@@ -4,14 +4,24 @@
define amdgpu_gs i32 @main() {
; CHECK-LABEL: main:
; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: s_bitcmp1_b32 0, 0
; CHECK-NEXT: s_mov_b32 s0, 0
-; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; CHECK-NEXT: s_cselect_b32 s1, -1, 0
+; CHECK-NEXT: s_or_saveexec_b32 s2, -1
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_readfirstlane_b32 s1, v0
+; CHECK-NEXT: s_mov_b32 exec_lo, s2
+; CHECK-NEXT: s_or_b32 s0, s0, s1
+; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
+; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: s_xor_b32 s0, s0, -1
-; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
+; CHECK-NEXT: s_wait_alu 0xfffe
+; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_readfirstlane_b32 s0, v1
; CHECK-NEXT: s_wait_alu 0xf1ff
; CHECK-NEXT: ; return to shader part epilog
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
index eefc7811d42b..357234080235 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
@@ -263,7 +263,7 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn_neg2048(ptr inreg %sbase, i32 %v
; Uniformity edge cases
; --------------------------------------------------------------------------------
-@ptr.in.lds = internal addrspace(3) global ptr undef
+@ptr.in.lds = internal addrspace(3) global ptr poison
; Base pointer is uniform, but also in VGPRs
define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, i32 %data) {
diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll
index 32888d2acf1c..3d0e2875e91a 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll
@@ -54,7 +54,7 @@ define amdgpu_ps void @flat_store_saddr_i8_zext_vgpr_offset_neg2048(ptr inreg %s
; Uniformity edge cases
; --------------------------------------------------------------------------------
-@ptr.in.lds = internal addrspace(3) global ptr undef
+@ptr.in.lds = internal addrspace(3) global ptr poison
; Base pointer is uniform, but also in VGPRs
define amdgpu_ps void @flat_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8 %data) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
index 02d29909c661..d1ba892d7f7e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
@@ -396,7 +396,8 @@ define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) {
;
; CHECK-GISEL-LABEL: test_readfirstlane_imm_f64:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: s_mov_b64 s[0:1], 0x4040000000000000
+; CHECK-GISEL-NEXT: s_mov_b32 s0, 0
+; CHECK-GISEL-NEXT: s_mov_b32 s1, 0x40400000
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[0:1]
; CHECK-GISEL-NEXT: ;;#ASMEND
@@ -455,13 +456,14 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out
; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 32
; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, 32
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
-; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-GISEL-NEXT: s_endpgm
@@ -488,13 +490,15 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out
; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_f64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; CHECK-GISEL-NEXT: s_mov_b32 s2, 0
; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
-; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; CHECK-GISEL-NEXT: s_mov_b32 s3, 0x40400000
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
-; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, 0x40400000
+; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-GISEL-NEXT: s_endpgm
@@ -584,17 +588,17 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17
+; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
; CHECK-SDAG-NEXT: ;;#ASMEND
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s3
-; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i64:
@@ -624,17 +628,17 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17
+; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
; CHECK-SDAG-NEXT: ;;#ASMEND
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
-; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s3
-; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_f64:
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-compare.mir b/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
index fba42c494343..fa452f3717f0 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-compare.mir
@@ -2277,3 +2277,181 @@ body: |
S_ENDPGM 0
...
+
+---
+name: s_cselect_b64_s_or_b32_s_cmp_lg_u32_0x00000000
+body: |
+ ; GCN-LABEL: name: s_cselect_b64_s_or_b32_s_cmp_lg_u32_0x00000000
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]]
+ ; GCN-NEXT: S_CMP_LG_U32 [[COPY]], 0, implicit-def $scc
+ ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64_xexec = S_CSELECT_B64 -1, 0, implicit $scc
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B64_]].sub0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B64_]].sub1
+ ; GCN-NEXT: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ %0:vgpr_32 = IMPLICIT_DEF
+ %2:sreg_32 = COPY %0
+ S_CMP_LG_U32 %2, 0, implicit-def $scc
+ %31:sreg_64_xexec = S_CSELECT_B64 -1, 0, implicit $scc
+ %40:sreg_32_xm0_xexec = COPY %31.sub0:sreg_64_xexec
+ %41:sreg_32_xm0_xexec = COPY %31.sub1:sreg_64_xexec
+ %sgpr4:sreg_32 = S_OR_B32 %40:sreg_32_xm0_xexec, %41:sreg_32_xm0_xexec, implicit-def $scc
+ S_CMP_LG_U32 %sgpr4, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+---
+# Do not delete s_or_b32 because of intervening def of scc
+name: s_cselect_b64_s_or_b32_s_cmp_lg_u32_0x00000000_cant_optimize_intervening
+body: |
+ ; GCN-LABEL: name: s_cselect_b64_s_or_b32_s_cmp_lg_u32_0x00000000_cant_optimize_intervening
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]]
+ ; GCN-NEXT: S_CMP_LG_U32 [[COPY]], 0, implicit-def $scc
+ ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64_xexec = S_CSELECT_B64 -1, 0, implicit $scc
+ ; GCN-NEXT: S_CMP_LG_U32 [[COPY]], 0, implicit-def $scc
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B64_]].sub0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B64_]].sub1
+ ; GCN-NEXT: %sgpr4:sreg_32 = S_OR_B32 [[COPY1]], [[COPY2]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ %0:vgpr_32 = IMPLICIT_DEF
+ %2:sreg_32 = COPY %0
+ S_CMP_LG_U32 %2, 0, implicit-def $scc
+ %31:sreg_64_xexec = S_CSELECT_B64 -1, 0, implicit $scc
+ S_CMP_LG_U32 %2, 0, implicit-def $scc
+ %40:sreg_32_xm0_xexec = COPY %31.sub0:sreg_64_xexec
+ %41:sreg_32_xm0_xexec = COPY %31.sub1:sreg_64_xexec
+ %sgpr4:sreg_32 = S_OR_B32 %40:sreg_32_xm0_xexec, %41:sreg_32_xm0_xexec, implicit-def $scc
+ S_CMP_LG_U32 %sgpr4, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+# Do not delete s_or_b32 since both operands are sub1.
+name: s_cselect_b64_s_or_b32_s_cmp_lg_u32_0x00000000_cant_optimize
+body: |
+ ; GCN-LABEL: name: s_cselect_b64_s_or_b32_s_cmp_lg_u32_0x00000000_cant_optimize
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]]
+ ; GCN-NEXT: S_CMP_LG_U32 [[COPY]], 0, implicit-def $scc
+ ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64_xexec = S_CSELECT_B64 1, 0, implicit $scc
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B64_]].sub1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B64_]].sub1
+ ; GCN-NEXT: %sgpr4:sreg_32 = S_OR_B32 [[COPY1]], [[COPY2]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ %0:vgpr_32 = IMPLICIT_DEF
+ %2:sreg_32 = COPY %0
+ S_CMP_LG_U32 %2, 0, implicit-def $scc
+ %31:sreg_64_xexec = S_CSELECT_B64 1, 0, implicit $scc
+ %40:sreg_32_xm0_xexec = COPY %31.sub1:sreg_64_xexec
+ %41:sreg_32 = COPY %31.sub1:sreg_64_xexec
+ %sgpr4:sreg_32 = S_OR_B32 %40:sreg_32_xm0_xexec, %41:sreg_32, implicit-def $scc
+ S_CMP_LG_U32 %sgpr4, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: s_cselect_b64_undef_s_or_b32_s_cmp_lg_u32_0x00000000
+body: |
+ ; GCN-LABEL: name: s_cselect_b64_undef_s_or_b32_s_cmp_lg_u32_0x00000000
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]]
+ ; GCN-NEXT: S_CMP_LG_U32 [[COPY]], 0, implicit-def $scc
+ ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64_xexec = S_CSELECT_B64 -1, 0, implicit $scc
+ ; GCN-NEXT: %sgpr4:sreg_32 = S_OR_B32 undef %4:sreg_32_xm0_xexec, undef %5:sreg_32_xm0_xexec, implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ %0:vgpr_32 = IMPLICIT_DEF
+ %2:sreg_32 = COPY %0
+ S_CMP_LG_U32 %2, 0, implicit-def $scc
+ %31:sreg_64_xexec = S_CSELECT_B64 -1, 0, implicit $scc
+ %sgpr4:sreg_32 = S_OR_B32 undef %40:sreg_32_xm0_xexec, undef %41:sreg_32_xm0_xexec, implicit-def $scc
+ S_CMP_LG_U32 %sgpr4, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll b/llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll
index 118c47e68070..cac1fe9605a1 100644
--- a/llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll
@@ -46,7 +46,7 @@ define amdgpu_ps void @_amdgpu_ps_main(float %arg) {
; GFX900-NEXT: s_mov_b64 exec, 0
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v1, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: .LBB0_5: ; %bb6
; GFX900-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX900-NEXT: s_waitcnt vmcnt(0)
@@ -75,7 +75,7 @@ bb5:
bb6:
%i7 = phi float [ 0.000000e+00, %bb5 ], [ %i3, %bb1 ]
%i8 = phi float [ 0.000000e+00, %bb5 ], [ 1.000000e+00, %bb1 ]
- %i9 = phi float [ undef, %bb5 ], [ %i4, %bb1 ]
+ %i9 = phi float [ poison, %bb5 ], [ %i4, %bb1 ]
%i10 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float %i7)
%i11 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %i8, float %i9)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %i10, <2 x half> %i11, i1 false, i1 false)
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index 71f5a94a7f24..74a6d7fe3936 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -8,7 +8,6 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
; GCN-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_ashr_i32 s8, s1, 31
; GCN-NEXT: s_add_u32 s0, s0, s8
@@ -17,8 +16,8 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_xor_b64 s[10:11], s[0:1], s[8:9]
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s10
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s11
-; GCN-NEXT: s_sub_u32 s12, 0, s10
-; GCN-NEXT: s_subb_u32 s13, 0, s11
+; GCN-NEXT: s_sub_u32 s0, 0, s10
+; GCN-NEXT: s_subb_u32 s1, 0, s11
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -27,128 +26,121 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GCN-NEXT: v_mul_hi_u32 v2, s12, v0
-; GCN-NEXT: v_readfirstlane_b32 s14, v1
-; GCN-NEXT: v_readfirstlane_b32 s0, v0
-; GCN-NEXT: s_mul_i32 s1, s12, s14
-; GCN-NEXT: v_readfirstlane_b32 s17, v2
-; GCN-NEXT: s_mul_i32 s15, s13, s0
-; GCN-NEXT: s_mul_i32 s16, s12, s0
-; GCN-NEXT: s_add_i32 s1, s17, s1
-; GCN-NEXT: v_mul_hi_u32 v3, v0, s16
-; GCN-NEXT: s_add_i32 s1, s1, s15
-; GCN-NEXT: v_mul_hi_u32 v0, v0, s1
-; GCN-NEXT: v_mul_hi_u32 v4, v1, s16
-; GCN-NEXT: v_readfirstlane_b32 s15, v3
-; GCN-NEXT: s_mul_i32 s17, s0, s1
-; GCN-NEXT: v_mul_hi_u32 v1, v1, s1
-; GCN-NEXT: s_add_u32 s15, s15, s17
-; GCN-NEXT: v_readfirstlane_b32 s17, v0
-; GCN-NEXT: s_addc_u32 s17, 0, s17
-; GCN-NEXT: s_mul_i32 s16, s14, s16
-; GCN-NEXT: v_readfirstlane_b32 s18, v4
-; GCN-NEXT: s_add_u32 s15, s15, s16
-; GCN-NEXT: s_addc_u32 s15, s17, s18
-; GCN-NEXT: v_readfirstlane_b32 s16, v1
-; GCN-NEXT: s_addc_u32 s16, s16, 0
-; GCN-NEXT: s_mul_i32 s1, s14, s1
-; GCN-NEXT: s_add_u32 s1, s15, s1
-; GCN-NEXT: s_addc_u32 s15, 0, s16
-; GCN-NEXT: s_add_u32 s16, s0, s1
-; GCN-NEXT: v_mov_b32_e32 v0, s16
-; GCN-NEXT: v_mul_hi_u32 v0, s12, v0
-; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GCN-NEXT: s_or_b32 s0, s0, s1
-; GCN-NEXT: s_addc_u32 s14, s14, s15
-; GCN-NEXT: s_mul_i32 s0, s12, s14
-; GCN-NEXT: v_readfirstlane_b32 s1, v0
-; GCN-NEXT: s_add_i32 s0, s1, s0
-; GCN-NEXT: s_mul_i32 s13, s13, s16
-; GCN-NEXT: s_mul_i32 s1, s12, s16
-; GCN-NEXT: s_add_i32 s0, s0, s13
-; GCN-NEXT: v_mov_b32_e32 v2, s1
-; GCN-NEXT: v_mov_b32_e32 v0, s0
-; GCN-NEXT: v_mul_hi_u32 v3, s14, v2
-; GCN-NEXT: v_mul_hi_u32 v2, s16, v2
-; GCN-NEXT: v_mul_hi_u32 v1, s14, v0
-; GCN-NEXT: v_mul_hi_u32 v0, s16, v0
-; GCN-NEXT: s_mul_i32 s13, s16, s0
-; GCN-NEXT: v_readfirstlane_b32 s17, v2
-; GCN-NEXT: s_add_u32 s13, s17, s13
-; GCN-NEXT: v_readfirstlane_b32 s15, v0
-; GCN-NEXT: s_mul_i32 s1, s14, s1
-; GCN-NEXT: s_addc_u32 s15, 0, s15
-; GCN-NEXT: v_readfirstlane_b32 s12, v3
-; GCN-NEXT: s_add_u32 s1, s13, s1
-; GCN-NEXT: s_addc_u32 s1, s15, s12
+; GCN-NEXT: v_mul_hi_u32 v2, s0, v0
; GCN-NEXT: v_readfirstlane_b32 s12, v1
-; GCN-NEXT: s_addc_u32 s12, s12, 0
-; GCN-NEXT: s_mul_i32 s0, s14, s0
-; GCN-NEXT: s_add_u32 s0, s1, s0
-; GCN-NEXT: s_addc_u32 s12, 0, s12
-; GCN-NEXT: s_add_u32 s15, s16, s0
-; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GCN-NEXT: s_or_b32 s0, s0, s1
-; GCN-NEXT: s_addc_u32 s14, s14, s12
+; GCN-NEXT: v_readfirstlane_b32 s2, v0
+; GCN-NEXT: s_mul_i32 s13, s0, s12
+; GCN-NEXT: v_readfirstlane_b32 s16, v2
+; GCN-NEXT: s_mul_i32 s14, s1, s2
+; GCN-NEXT: s_mul_i32 s15, s0, s2
+; GCN-NEXT: s_add_i32 s13, s16, s13
+; GCN-NEXT: v_mul_hi_u32 v3, v0, s15
+; GCN-NEXT: s_add_i32 s13, s13, s14
+; GCN-NEXT: v_mul_hi_u32 v0, v0, s13
+; GCN-NEXT: v_mul_hi_u32 v4, v1, s15
+; GCN-NEXT: v_readfirstlane_b32 s14, v3
+; GCN-NEXT: s_mul_i32 s16, s2, s13
+; GCN-NEXT: v_mul_hi_u32 v1, v1, s13
+; GCN-NEXT: s_add_u32 s14, s14, s16
+; GCN-NEXT: v_readfirstlane_b32 s16, v0
+; GCN-NEXT: s_mul_i32 s15, s12, s15
+; GCN-NEXT: s_addc_u32 s16, 0, s16
+; GCN-NEXT: v_readfirstlane_b32 s17, v4
+; GCN-NEXT: s_add_u32 s14, s14, s15
+; GCN-NEXT: s_addc_u32 s14, s16, s17
+; GCN-NEXT: v_readfirstlane_b32 s15, v1
+; GCN-NEXT: s_addc_u32 s15, s15, 0
+; GCN-NEXT: s_mul_i32 s13, s12, s13
+; GCN-NEXT: s_add_u32 s13, s14, s13
+; GCN-NEXT: s_addc_u32 s14, 0, s15
+; GCN-NEXT: s_add_u32 s13, s2, s13
+; GCN-NEXT: v_mov_b32_e32 v0, s13
+; GCN-NEXT: v_mul_hi_u32 v0, s0, v0
+; GCN-NEXT: s_addc_u32 s12, s12, s14
+; GCN-NEXT: s_mul_i32 s14, s0, s12
+; GCN-NEXT: s_mul_i32 s1, s1, s13
+; GCN-NEXT: v_readfirstlane_b32 s15, v0
+; GCN-NEXT: s_add_i32 s14, s15, s14
+; GCN-NEXT: s_mul_i32 s0, s0, s13
+; GCN-NEXT: s_add_i32 s1, s14, s1
+; GCN-NEXT: v_mov_b32_e32 v2, s0
+; GCN-NEXT: v_mov_b32_e32 v0, s1
+; GCN-NEXT: v_mul_hi_u32 v3, s12, v2
+; GCN-NEXT: v_mul_hi_u32 v2, s13, v2
+; GCN-NEXT: v_mul_hi_u32 v1, s12, v0
+; GCN-NEXT: v_mul_hi_u32 v0, s13, v0
+; GCN-NEXT: s_mul_i32 s15, s13, s1
+; GCN-NEXT: v_readfirstlane_b32 s17, v2
+; GCN-NEXT: s_add_u32 s15, s17, s15
+; GCN-NEXT: v_readfirstlane_b32 s16, v0
+; GCN-NEXT: s_mul_i32 s0, s12, s0
+; GCN-NEXT: s_addc_u32 s16, 0, s16
+; GCN-NEXT: v_readfirstlane_b32 s14, v3
+; GCN-NEXT: s_add_u32 s0, s15, s0
+; GCN-NEXT: s_addc_u32 s0, s16, s14
+; GCN-NEXT: v_readfirstlane_b32 s14, v1
+; GCN-NEXT: s_addc_u32 s14, s14, 0
+; GCN-NEXT: s_mul_i32 s1, s12, s1
+; GCN-NEXT: s_add_u32 s0, s0, s1
+; GCN-NEXT: s_addc_u32 s1, 0, s14
+; GCN-NEXT: s_add_u32 s14, s13, s0
+; GCN-NEXT: s_addc_u32 s15, s12, s1
; GCN-NEXT: s_ashr_i32 s12, s7, 31
; GCN-NEXT: s_add_u32 s0, s6, s12
; GCN-NEXT: s_mov_b32 s13, s12
; GCN-NEXT: s_addc_u32 s1, s7, s12
; GCN-NEXT: s_xor_b64 s[6:7], s[0:1], s[12:13]
-; GCN-NEXT: v_mov_b32_e32 v0, s14
+; GCN-NEXT: v_mov_b32_e32 v0, s15
; GCN-NEXT: v_mul_hi_u32 v1, s6, v0
-; GCN-NEXT: v_mov_b32_e32 v2, s15
+; GCN-NEXT: v_mov_b32_e32 v2, s14
; GCN-NEXT: v_mul_hi_u32 v3, s6, v2
; GCN-NEXT: s_mov_b32 s0, s4
; GCN-NEXT: v_readfirstlane_b32 s4, v1
; GCN-NEXT: v_mul_hi_u32 v1, s7, v2
-; GCN-NEXT: s_mul_i32 s1, s6, s14
+; GCN-NEXT: s_mul_i32 s1, s6, s15
; GCN-NEXT: v_readfirstlane_b32 s16, v3
; GCN-NEXT: v_mul_hi_u32 v0, s7, v0
; GCN-NEXT: s_add_u32 s1, s16, s1
; GCN-NEXT: s_addc_u32 s4, 0, s4
-; GCN-NEXT: s_mul_i32 s15, s7, s15
+; GCN-NEXT: s_mul_i32 s14, s7, s14
; GCN-NEXT: v_readfirstlane_b32 s16, v1
-; GCN-NEXT: s_add_u32 s1, s1, s15
+; GCN-NEXT: s_add_u32 s1, s1, s14
; GCN-NEXT: s_addc_u32 s1, s4, s16
; GCN-NEXT: v_readfirstlane_b32 s4, v0
; GCN-NEXT: s_addc_u32 s4, s4, 0
-; GCN-NEXT: s_mul_i32 s14, s7, s14
-; GCN-NEXT: s_add_u32 s16, s1, s14
-; GCN-NEXT: v_mov_b32_e32 v0, s16
+; GCN-NEXT: s_mul_i32 s14, s7, s15
+; GCN-NEXT: s_add_u32 s14, s1, s14
+; GCN-NEXT: v_mov_b32_e32 v0, s14
; GCN-NEXT: v_mul_hi_u32 v0, s10, v0
-; GCN-NEXT: s_addc_u32 s17, 0, s4
+; GCN-NEXT: s_addc_u32 s15, 0, s4
; GCN-NEXT: s_mov_b32 s1, s5
-; GCN-NEXT: s_mul_i32 s4, s10, s17
+; GCN-NEXT: s_mul_i32 s4, s10, s15
; GCN-NEXT: v_readfirstlane_b32 s5, v0
; GCN-NEXT: s_add_i32 s4, s5, s4
-; GCN-NEXT: s_mul_i32 s5, s11, s16
-; GCN-NEXT: s_add_i32 s18, s4, s5
-; GCN-NEXT: s_sub_i32 s14, s7, s18
-; GCN-NEXT: s_mul_i32 s4, s10, s16
+; GCN-NEXT: s_mul_i32 s5, s11, s14
+; GCN-NEXT: s_add_i32 s16, s4, s5
+; GCN-NEXT: s_sub_i32 s17, s7, s16
+; GCN-NEXT: s_mul_i32 s4, s10, s14
; GCN-NEXT: s_sub_u32 s6, s6, s4
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GCN-NEXT: s_or_b32 s15, s4, s5
-; GCN-NEXT: s_subb_u32 s19, s14, s11
-; GCN-NEXT: s_sub_u32 s20, s6, s10
-; GCN-NEXT: s_cselect_b64 s[14:15], -1, 0
-; GCN-NEXT: s_or_b32 s14, s14, s15
-; GCN-NEXT: s_subb_u32 s14, s19, 0
-; GCN-NEXT: s_cmp_ge_u32 s14, s11
-; GCN-NEXT: s_cselect_b32 s15, -1, 0
-; GCN-NEXT: s_cmp_ge_u32 s20, s10
+; GCN-NEXT: s_subb_u32 s17, s17, s11
+; GCN-NEXT: s_sub_u32 s18, s6, s10
+; GCN-NEXT: s_subb_u32 s17, s17, 0
+; GCN-NEXT: s_cmp_ge_u32 s17, s11
; GCN-NEXT: s_cselect_b32 s19, -1, 0
-; GCN-NEXT: s_cmp_eq_u32 s14, s11
-; GCN-NEXT: s_cselect_b32 s14, s19, s15
-; GCN-NEXT: s_add_u32 s15, s16, 1
-; GCN-NEXT: s_addc_u32 s19, s17, 0
-; GCN-NEXT: s_add_u32 s20, s16, 2
-; GCN-NEXT: s_addc_u32 s21, s17, 0
-; GCN-NEXT: s_cmp_lg_u32 s14, 0
-; GCN-NEXT: s_cselect_b32 s14, s20, s15
-; GCN-NEXT: s_cselect_b32 s15, s21, s19
+; GCN-NEXT: s_cmp_ge_u32 s18, s10
+; GCN-NEXT: s_cselect_b32 s18, -1, 0
+; GCN-NEXT: s_cmp_eq_u32 s17, s11
+; GCN-NEXT: s_cselect_b32 s17, s18, s19
+; GCN-NEXT: s_add_u32 s18, s14, 1
+; GCN-NEXT: s_addc_u32 s19, s15, 0
+; GCN-NEXT: s_add_u32 s20, s14, 2
+; GCN-NEXT: s_addc_u32 s21, s15, 0
+; GCN-NEXT: s_cmp_lg_u32 s17, 0
+; GCN-NEXT: s_cselect_b32 s17, s20, s18
+; GCN-NEXT: s_cselect_b32 s18, s21, s19
; GCN-NEXT: s_or_b32 s4, s4, s5
-; GCN-NEXT: s_subb_u32 s4, s7, s18
+; GCN-NEXT: s_subb_u32 s4, s7, s16
; GCN-NEXT: s_cmp_ge_u32 s4, s11
; GCN-NEXT: s_cselect_b32 s5, -1, 0
; GCN-NEXT: s_cmp_ge_u32 s6, s10
@@ -156,13 +148,14 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_cmp_eq_u32 s4, s11
; GCN-NEXT: s_cselect_b32 s4, s6, s5
; GCN-NEXT: s_cmp_lg_u32 s4, 0
-; GCN-NEXT: s_cselect_b32 s5, s15, s17
-; GCN-NEXT: s_cselect_b32 s4, s14, s16
+; GCN-NEXT: s_cselect_b32 s5, s18, s15
+; GCN-NEXT: s_cselect_b32 s4, s17, s14
; GCN-NEXT: s_xor_b64 s[6:7], s[12:13], s[8:9]
; GCN-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
; GCN-NEXT: s_sub_u32 s4, s4, s6
; GCN-NEXT: s_subb_u32 s5, s5, s7
; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GCN-NEXT: s_endpgm
@@ -202,8 +195,6 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s18, s16, 1
-; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GCN-IR-NEXT: s_or_b32 s10, s10, s11
; GCN-IR-NEXT: s_addc_u32 s10, s17, 0
; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0
; GCN-IR-NEXT: s_sub_i32 s16, 63, s16
@@ -235,8 +226,6 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-IR-NEXT: s_sub_u32 s16, s16, s20
; GCN-IR-NEXT: s_subb_u32 s17, s17, s21
; GCN-IR-NEXT: s_add_u32 s14, s14, 1
-; GCN-IR-NEXT: s_cselect_b64 s[20:21], -1, 0
-; GCN-IR-NEXT: s_or_b32 s20, s20, s21
; GCN-IR-NEXT: s_addc_u32 s15, s15, 0
; GCN-IR-NEXT: s_cselect_b64 s[20:21], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[12:13], s[8:9]
@@ -1150,8 +1139,7 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s6
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s7
; GCN-NEXT: s_sub_u32 s2, 0, s6
-; GCN-NEXT: s_subb_u32 s10, 0, s7
-; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_subb_u32 s8, 0, s7
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -1161,115 +1149,109 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_mul_hi_u32 v2, s2, v0
-; GCN-NEXT: v_readfirstlane_b32 s11, v1
-; GCN-NEXT: v_readfirstlane_b32 s8, v0
-; GCN-NEXT: s_mul_i32 s9, s2, s11
-; GCN-NEXT: v_readfirstlane_b32 s14, v2
-; GCN-NEXT: s_mul_i32 s12, s10, s8
-; GCN-NEXT: s_mul_i32 s13, s2, s8
-; GCN-NEXT: s_add_i32 s9, s14, s9
-; GCN-NEXT: v_mul_hi_u32 v3, v0, s13
-; GCN-NEXT: s_add_i32 s9, s9, s12
-; GCN-NEXT: v_mul_hi_u32 v0, v0, s9
-; GCN-NEXT: v_mul_hi_u32 v4, v1, s13
-; GCN-NEXT: v_readfirstlane_b32 s12, v3
-; GCN-NEXT: s_mul_i32 s15, s8, s9
-; GCN-NEXT: v_mul_hi_u32 v1, v1, s9
-; GCN-NEXT: s_add_u32 s12, s12, s15
-; GCN-NEXT: v_readfirstlane_b32 s15, v0
-; GCN-NEXT: s_mul_i32 s13, s11, s13
-; GCN-NEXT: s_addc_u32 s15, 0, s15
-; GCN-NEXT: v_readfirstlane_b32 s14, v4
-; GCN-NEXT: s_add_u32 s12, s12, s13
-; GCN-NEXT: s_addc_u32 s12, s15, s14
-; GCN-NEXT: v_readfirstlane_b32 s13, v1
-; GCN-NEXT: s_addc_u32 s13, s13, 0
-; GCN-NEXT: s_mul_i32 s9, s11, s9
-; GCN-NEXT: s_add_u32 s9, s12, s9
-; GCN-NEXT: s_addc_u32 s12, 0, s13
-; GCN-NEXT: s_add_u32 s13, s8, s9
-; GCN-NEXT: v_mov_b32_e32 v0, s13
+; GCN-NEXT: v_readfirstlane_b32 s9, v1
+; GCN-NEXT: v_readfirstlane_b32 s3, v0
+; GCN-NEXT: s_mul_i32 s10, s2, s9
+; GCN-NEXT: v_readfirstlane_b32 s13, v2
+; GCN-NEXT: s_mul_i32 s11, s8, s3
+; GCN-NEXT: s_mul_i32 s12, s2, s3
+; GCN-NEXT: s_add_i32 s10, s13, s10
+; GCN-NEXT: v_mul_hi_u32 v3, v0, s12
+; GCN-NEXT: s_add_i32 s10, s10, s11
+; GCN-NEXT: v_mul_hi_u32 v0, v0, s10
+; GCN-NEXT: v_mul_hi_u32 v4, v1, s12
+; GCN-NEXT: v_readfirstlane_b32 s11, v3
+; GCN-NEXT: v_mul_hi_u32 v1, v1, s10
+; GCN-NEXT: s_mul_i32 s14, s3, s10
+; GCN-NEXT: s_add_u32 s11, s11, s14
+; GCN-NEXT: v_readfirstlane_b32 s14, v0
+; GCN-NEXT: s_mul_i32 s12, s9, s12
+; GCN-NEXT: s_addc_u32 s14, 0, s14
+; GCN-NEXT: v_readfirstlane_b32 s13, v4
+; GCN-NEXT: s_add_u32 s11, s11, s12
+; GCN-NEXT: v_readfirstlane_b32 s15, v1
+; GCN-NEXT: s_addc_u32 s11, s14, s13
+; GCN-NEXT: s_addc_u32 s12, s15, 0
+; GCN-NEXT: s_mul_i32 s10, s9, s10
+; GCN-NEXT: s_add_u32 s10, s11, s10
+; GCN-NEXT: s_addc_u32 s11, 0, s12
+; GCN-NEXT: s_add_u32 s10, s3, s10
+; GCN-NEXT: v_mov_b32_e32 v0, s10
; GCN-NEXT: v_mul_hi_u32 v0, s2, v0
-; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT: s_or_b32 s8, s8, s9
-; GCN-NEXT: s_addc_u32 s11, s11, s12
-; GCN-NEXT: s_mul_i32 s8, s2, s11
-; GCN-NEXT: v_readfirstlane_b32 s9, v0
-; GCN-NEXT: s_add_i32 s8, s9, s8
-; GCN-NEXT: s_mul_i32 s10, s10, s13
-; GCN-NEXT: s_mul_i32 s2, s2, s13
-; GCN-NEXT: s_add_i32 s8, s8, s10
+; GCN-NEXT: s_addc_u32 s9, s9, s11
+; GCN-NEXT: s_mul_i32 s11, s2, s9
+; GCN-NEXT: s_mul_i32 s8, s8, s10
+; GCN-NEXT: v_readfirstlane_b32 s12, v0
+; GCN-NEXT: s_add_i32 s11, s12, s11
+; GCN-NEXT: s_mul_i32 s2, s2, s10
+; GCN-NEXT: s_add_i32 s8, s11, s8
; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_mov_b32_e32 v0, s8
-; GCN-NEXT: v_mul_hi_u32 v3, s11, v2
-; GCN-NEXT: v_mul_hi_u32 v2, s13, v2
-; GCN-NEXT: v_mul_hi_u32 v1, s11, v0
-; GCN-NEXT: v_mul_hi_u32 v0, s13, v0
-; GCN-NEXT: s_mul_i32 s10, s13, s8
+; GCN-NEXT: v_mul_hi_u32 v3, s9, v2
+; GCN-NEXT: v_mul_hi_u32 v2, s10, v2
+; GCN-NEXT: v_mul_hi_u32 v1, s9, v0
+; GCN-NEXT: v_mul_hi_u32 v0, s10, v0
+; GCN-NEXT: s_mul_i32 s12, s10, s8
; GCN-NEXT: v_readfirstlane_b32 s14, v2
-; GCN-NEXT: s_add_u32 s10, s14, s10
-; GCN-NEXT: v_readfirstlane_b32 s12, v0
-; GCN-NEXT: s_mul_i32 s2, s11, s2
-; GCN-NEXT: s_addc_u32 s12, 0, s12
-; GCN-NEXT: v_readfirstlane_b32 s9, v3
-; GCN-NEXT: s_add_u32 s2, s10, s2
-; GCN-NEXT: s_addc_u32 s2, s12, s9
-; GCN-NEXT: v_readfirstlane_b32 s9, v1
-; GCN-NEXT: s_addc_u32 s9, s9, 0
-; GCN-NEXT: s_mul_i32 s8, s11, s8
+; GCN-NEXT: s_add_u32 s12, s14, s12
+; GCN-NEXT: v_readfirstlane_b32 s13, v0
+; GCN-NEXT: s_mul_i32 s2, s9, s2
+; GCN-NEXT: s_addc_u32 s13, 0, s13
+; GCN-NEXT: v_readfirstlane_b32 s11, v3
+; GCN-NEXT: s_add_u32 s2, s12, s2
+; GCN-NEXT: s_addc_u32 s2, s13, s11
+; GCN-NEXT: v_readfirstlane_b32 s11, v1
+; GCN-NEXT: s_addc_u32 s11, s11, 0
+; GCN-NEXT: s_mul_i32 s8, s9, s8
; GCN-NEXT: s_add_u32 s2, s2, s8
-; GCN-NEXT: s_addc_u32 s10, 0, s9
-; GCN-NEXT: s_add_u32 s2, s13, s2
-; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT: s_or_b32 s8, s8, s9
-; GCN-NEXT: s_addc_u32 s8, s11, s10
+; GCN-NEXT: s_addc_u32 s8, 0, s11
+; GCN-NEXT: s_add_u32 s2, s10, s2
+; GCN-NEXT: s_addc_u32 s8, s9, s8
; GCN-NEXT: v_mul_hi_u32 v1, s2, 24
; GCN-NEXT: v_mul_hi_u32 v0, s8, 24
; GCN-NEXT: s_mul_i32 s8, s8, 24
-; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: v_readfirstlane_b32 s10, v1
; GCN-NEXT: v_readfirstlane_b32 s9, v0
; GCN-NEXT: s_add_u32 s8, s10, s8
-; GCN-NEXT: s_addc_u32 s12, 0, s9
-; GCN-NEXT: v_mov_b32_e32 v0, s12
+; GCN-NEXT: s_addc_u32 s10, 0, s9
+; GCN-NEXT: v_mov_b32_e32 v0, s10
; GCN-NEXT: v_mul_hi_u32 v0, s6, v0
-; GCN-NEXT: s_mul_i32 s8, s7, s12
+; GCN-NEXT: s_mul_i32 s8, s7, s10
+; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: v_readfirstlane_b32 s9, v0
-; GCN-NEXT: s_add_i32 s13, s9, s8
-; GCN-NEXT: s_sub_i32 s10, 0, s13
-; GCN-NEXT: s_mul_i32 s8, s6, s12
-; GCN-NEXT: s_sub_u32 s14, 24, s8
+; GCN-NEXT: s_add_i32 s11, s9, s8
+; GCN-NEXT: s_sub_i32 s12, 0, s11
+; GCN-NEXT: s_mul_i32 s8, s6, s10
+; GCN-NEXT: s_sub_u32 s13, 24, s8
; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT: s_or_b32 s11, s8, s9
-; GCN-NEXT: s_subb_u32 s15, s10, s7
-; GCN-NEXT: s_sub_u32 s16, s14, s6
-; GCN-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GCN-NEXT: s_or_b32 s10, s10, s11
-; GCN-NEXT: s_subb_u32 s10, s15, 0
-; GCN-NEXT: s_cmp_ge_u32 s10, s7
-; GCN-NEXT: s_cselect_b32 s11, -1, 0
-; GCN-NEXT: s_cmp_ge_u32 s16, s6
+; GCN-NEXT: s_subb_u32 s12, s12, s7
+; GCN-NEXT: s_sub_u32 s14, s13, s6
+; GCN-NEXT: s_subb_u32 s12, s12, 0
+; GCN-NEXT: s_cmp_ge_u32 s12, s7
; GCN-NEXT: s_cselect_b32 s15, -1, 0
-; GCN-NEXT: s_cmp_eq_u32 s10, s7
-; GCN-NEXT: s_cselect_b32 s10, s15, s11
-; GCN-NEXT: s_add_u32 s11, s12, 1
+; GCN-NEXT: s_cmp_ge_u32 s14, s6
+; GCN-NEXT: s_cselect_b32 s14, -1, 0
+; GCN-NEXT: s_cmp_eq_u32 s12, s7
+; GCN-NEXT: s_cselect_b32 s12, s14, s15
+; GCN-NEXT: s_add_u32 s14, s10, 1
; GCN-NEXT: s_addc_u32 s15, 0, 0
-; GCN-NEXT: s_add_u32 s16, s12, 2
+; GCN-NEXT: s_add_u32 s16, s10, 2
; GCN-NEXT: s_addc_u32 s17, 0, 0
-; GCN-NEXT: s_cmp_lg_u32 s10, 0
-; GCN-NEXT: s_cselect_b32 s10, s16, s11
-; GCN-NEXT: s_cselect_b32 s11, s17, s15
+; GCN-NEXT: s_cmp_lg_u32 s12, 0
+; GCN-NEXT: s_cselect_b32 s12, s16, s14
+; GCN-NEXT: s_cselect_b32 s14, s17, s15
; GCN-NEXT: s_or_b32 s8, s8, s9
-; GCN-NEXT: s_subb_u32 s8, 0, s13
+; GCN-NEXT: s_subb_u32 s8, 0, s11
; GCN-NEXT: s_cmp_ge_u32 s8, s7
; GCN-NEXT: s_cselect_b32 s9, -1, 0
-; GCN-NEXT: s_cmp_ge_u32 s14, s6
+; GCN-NEXT: s_cmp_ge_u32 s13, s6
; GCN-NEXT: s_cselect_b32 s6, -1, 0
; GCN-NEXT: s_cmp_eq_u32 s8, s7
; GCN-NEXT: s_cselect_b32 s6, s6, s9
; GCN-NEXT: s_cmp_lg_u32 s6, 0
-; GCN-NEXT: s_cselect_b32 s7, s11, 0
-; GCN-NEXT: s_cselect_b32 s6, s10, s12
+; GCN-NEXT: s_cselect_b32 s7, s14, 0
+; GCN-NEXT: s_cselect_b32 s6, s12, s10
; GCN-NEXT: s_xor_b64 s[6:7], s[6:7], s[4:5]
; GCN-NEXT: s_sub_u32 s6, s6, s4
; GCN-NEXT: s_subb_u32 s7, s7, s4
@@ -1303,8 +1285,6 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s12, s10, 1
-; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-IR-NEXT: s_or_b32 s8, s8, s9
; GCN-IR-NEXT: s_addc_u32 s8, s11, 0
; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0
; GCN-IR-NEXT: s_sub_i32 s10, 63, s10
@@ -1335,8 +1315,6 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_sub_u32 s12, s12, s18
; GCN-IR-NEXT: s_subb_u32 s13, s13, s19
; GCN-IR-NEXT: s_add_u32 s16, s16, 1
-; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0
-; GCN-IR-NEXT: s_or_b32 s18, s18, s19
; GCN-IR-NEXT: s_addc_u32 s17, s17, 0
; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[10:11], s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll b/llvm/test/CodeGen/AMDGPU/shlN_add.ll
index 9f4a6f2f63f1..3e507a0c5889 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll
+++ b/llvm/test/CodeGen/AMDGPU/shlN_add.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10-SDAG %s
+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
@@ -7,6 +12,24 @@
; Test gfx9+ s_shl[1-4]_add_u32 pattern matching
define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) {
+; GFX9-SDAG-LABEL: s_shl1_add_u32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 1
+; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: s_shl1_add_u32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 1
+; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: s_shl1_add_u32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 1
+; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_shl1_add_u32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s1
@@ -28,6 +51,24 @@ define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) {
}
define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) {
+; GFX9-SDAG-LABEL: s_shl2_add_u32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: s_shl2_add_u32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: s_shl2_add_u32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_shl2_add_u32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s1
@@ -49,6 +90,24 @@ define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) {
}
define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) {
+; GFX9-SDAG-LABEL: s_shl3_add_u32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 3
+; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: s_shl3_add_u32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 3
+; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: s_shl3_add_u32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 3
+; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_shl3_add_u32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s1
@@ -70,6 +129,24 @@ define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) {
}
define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) {
+; GFX9-SDAG-LABEL: s_shl4_add_u32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 4
+; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: s_shl4_add_u32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 4
+; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: s_shl4_add_u32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 4
+; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_shl4_add_u32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s1
@@ -102,6 +179,25 @@ define amdgpu_ps i32 @s_shl5_add_u32(i32 inreg %src0, i32 inreg %src1) {
}
define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) {
+; GFX9-SDAG-LABEL: v_shl1_add_u32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_shl1_add_u32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_shl1_add_u32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_shl1_add_u32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -126,6 +222,25 @@ define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) {
}
define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) {
+; GFX9-SDAG-LABEL: v_shl2_add_u32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_shl2_add_u32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_shl2_add_u32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_shl2_add_u32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -150,6 +265,25 @@ define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) {
}
define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) {
+; GFX9-SDAG-LABEL: v_shl3_add_u32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 3, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_shl3_add_u32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0
+; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_shl3_add_u32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 3, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_shl3_add_u32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -174,6 +308,25 @@ define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) {
}
define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) {
+; GFX9-SDAG-LABEL: v_shl4_add_u32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 4, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_shl4_add_u32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_shl4_add_u32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 4, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_shl4_add_u32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -198,6 +351,25 @@ define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) {
}
define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) {
+; GFX9-SDAG-LABEL: v_shl5_add_u32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 5, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_shl5_add_u32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_shl5_add_u32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 5, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_shl5_add_u32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -224,6 +396,22 @@ define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) {
; FIXME: Use v_lshl_add_u32
; shift is scalar, but add is vector.
define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
+; GFX9-SDAG-LABEL: shl1_add_u32_vgpr1:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 1, v0
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: shl1_add_u32_vgpr1:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 1
+; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: shl1_add_u32_vgpr1:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 1, v0
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: shl1_add_u32_vgpr1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
@@ -248,6 +436,22 @@ define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
}
define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
+; GFX9-SDAG-LABEL: shl2_add_u32_vgpr1:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 2, v0
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: shl2_add_u32_vgpr1:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: shl2_add_u32_vgpr1:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 2, v0
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: shl2_add_u32_vgpr1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
@@ -272,6 +476,22 @@ define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
}
define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
+; GFX9-SDAG-LABEL: shl3_add_u32_vgpr1:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 3, v0
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: shl3_add_u32_vgpr1:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 3
+; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: shl3_add_u32_vgpr1:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 3, v0
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: shl3_add_u32_vgpr1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl_b32 s0, s0, 3
@@ -296,6 +516,22 @@ define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
}
define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
+; GFX9-SDAG-LABEL: shl4_add_u32_vgpr1:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 4, v0
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: shl4_add_u32_vgpr1:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 4
+; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: shl4_add_u32_vgpr1:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 4, v0
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: shl4_add_u32_vgpr1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl_b32 s0, s0, 4
@@ -320,6 +556,22 @@ define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
}
define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
+; GFX9-SDAG-LABEL: shl5_add_u32_vgpr1:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, v0
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: shl5_add_u32_vgpr1:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 5
+; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: shl5_add_u32_vgpr1:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, v0
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: shl5_add_u32_vgpr1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl_b32 s0, s0, 5
@@ -344,6 +596,30 @@ define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
}
define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
+; GFX9-SDAG-LABEL: s_shl1_add_u32_v2:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 1
+; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 1
+; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: s_shl1_add_u32_v2:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 1
+; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 1
+; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: s_shl1_add_u32_v2:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 1
+; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 1
+; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_shl1_add_u32_v2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s2
@@ -369,6 +645,30 @@ define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i
}
define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
+; GFX9-SDAG-LABEL: s_shl2_add_u32_v2:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 2
+; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: s_shl2_add_u32_v2:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 2
+; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: s_shl2_add_u32_v2:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 2
+; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_shl2_add_u32_v2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2
@@ -394,6 +694,30 @@ define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i
}
define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
+; GFX9-SDAG-LABEL: s_shl3_add_u32_v2:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 3
+; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 3
+; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: s_shl3_add_u32_v2:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 3
+; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 3
+; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: s_shl3_add_u32_v2:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 3
+; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 3
+; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_shl3_add_u32_v2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s2
@@ -419,6 +743,30 @@ define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i
}
define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
+; GFX9-SDAG-LABEL: s_shl4_add_u32_v2:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 4
+; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 4
+; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: s_shl4_add_u32_v2:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 4
+; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 4
+; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: s_shl4_add_u32_v2:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 4
+; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 4
+; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_shl4_add_u32_v2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s2
@@ -444,6 +792,30 @@ define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i
}
define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
+; GFX9-SDAG-LABEL: s_shl_2_4_add_u32_v2:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 4
+; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX9-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX8-SDAG-LABEL: s_shl_2_4_add_u32_v2:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 4
+; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX8-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-SDAG-LABEL: s_shl_2_4_add_u32_v2:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 4
+; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2
+; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_shl_2_4_add_u32_v2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
index ef96944abef0..586579fcaeb9 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
@@ -20,33 +20,38 @@ define void @test() {
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: .LBB0_3: ; %bb.3
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: ; implicit-def: $sgpr4
+; CHECK-NEXT: v_mov_b32_e32 v0, s4
+; CHECK-NEXT: v_readfirstlane_b32 s6, v0
; CHECK-NEXT: s_mov_b64 s[4:5], -1
+; CHECK-NEXT: s_mov_b32 s7, 0
+; CHECK-NEXT: s_cmp_eq_u32 s6, s7
; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v1, s4, 0
; CHECK-NEXT: v_writelane_b32 v1, s5, 1
-; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
-; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: s_mov_b64 s[10:11], exec
+; CHECK-NEXT: s_mov_b64 exec, -1
; CHECK-NEXT: v_accvgpr_write_b32 a0, v1 ; Reload Reuse
-; CHECK-NEXT: s_mov_b64 exec, s[8:9]
+; CHECK-NEXT: s_mov_b64 exec, s[10:11]
; CHECK-NEXT: s_cbranch_scc1 .LBB0_5
; CHECK-NEXT: ; %bb.4: ; %bb.4
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
+; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse
-; CHECK-NEXT: s_mov_b64 exec, s[8:9]
+; CHECK-NEXT: s_mov_b64 exec, s[10:11]
; CHECK-NEXT: s_mov_b64 s[4:5], 0
; CHECK-NEXT: v_writelane_b32 v1, s4, 0
; CHECK-NEXT: v_writelane_b32 v1, s5, 1
-; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
+; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: v_accvgpr_write_b32 a0, v1 ; Reload Reuse
-; CHECK-NEXT: s_mov_b64 exec, s[8:9]
+; CHECK-NEXT: s_mov_b64 exec, s[10:11]
; CHECK-NEXT: .LBB0_5: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
+; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse
-; CHECK-NEXT: s_mov_b64 exec, s[8:9]
+; CHECK-NEXT: s_mov_b64 exec, s[10:11]
; CHECK-NEXT: v_readlane_b32 s4, v1, 0
; CHECK-NEXT: v_readlane_b32 s5, v1, 1
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
index 364598f7cf6c..5aafb0f576fb 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
@@ -31,8 +31,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr10
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr8
; CHECK-NEXT: undef [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4)
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %117:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: KILL undef %117:sgpr_128
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %125:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: KILL undef %125:sgpr_128
; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 4, implicit-def dead $scc
; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc
; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc
@@ -44,85 +44,87 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc
; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.71, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) poison`, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 64, 0 :: (invariant load (s128) from %ir.88, addrspace 4)
; CHECK-NEXT: KILL undef %74:sreg_64
; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
- ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %112:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %87:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: KILL undef %112:sgpr_128
- ; CHECK-NEXT: KILL undef %87:sgpr_128
+ ; CHECK-NEXT: KILL undef %89:sgpr_128
+ ; CHECK-NEXT: KILL undef %118:sgpr_128
; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc
; CHECK-NEXT: undef [[S_ADD_U32_1:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc
; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_1]], implicit-def $scc
; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %148:sreg_32, 31, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %148:sreg_32, implicit-def $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.77, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.83, addrspace 4)
- ; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1
+ ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.87, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.93, addrspace 4)
; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1
- ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %148:sreg_32, implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %169:sreg_32, 31, implicit-def dead $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %169:sreg_32, implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %169:sreg_32, implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %279:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %302:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %334:sgpr_128, undef %335:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %345:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.95, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 0, 0 :: (invariant load (s128) from %ir.100, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.105, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4)
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %329:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %340:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %357:sgpr_128, undef %358:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %368:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4)
+ ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %352:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %363:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc
- ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %361:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %384:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 224, 0 :: (invariant load (s128) from %ir.117, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.133, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 576, 0 :: (invariant load (s128) from %ir.138, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.122, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 224, 0 :: (invariant load (s128) from %ir.128, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 224, 0 :: (invariant load (s128) from %ir.134, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.162, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.140, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc
@@ -133,49 +135,49 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_17]], 168, 0 :: (invariant load (s32) from %ir.260, align 8, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.145, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.273, align 8, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.157, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 553734060
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 0, 0 :: (invariant load (s128) from %ir.158, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.170, addrspace 4)
; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub1
; CHECK-NEXT: [[COPY15:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]]
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.166, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.171, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.178, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.183, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s64) from %ir.269, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.282, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.193, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.199, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.205, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.211, addrspace 4)
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.204, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.209, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.216, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.221, addrspace 4)
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc
; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0
; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_]]
@@ -187,30 +189,30 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.280, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.293, addrspace 4)
; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM2]].sub1, 65535, implicit-def dead $scc
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]]
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 160, 0 :: (invariant load (s128) from %ir.244, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %443:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4)
- ; CHECK-NEXT: KILL [[S_ADD_U32_15]].sub0, [[S_ADD_U32_15]].sub1
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %470:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4)
+ ; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1
+ ; CHECK-NEXT: KILL undef %470:sreg_64
; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3
- ; CHECK-NEXT: KILL undef %443:sreg_64
; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.252, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4)
; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s32) from %ir.291, align 8, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]]
+ ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]]
@@ -222,22 +224,22 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc
- ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_21]], 96, 0 :: (invariant load (s128) from %ir.309, addrspace 4)
- ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.315, addrspace 4)
- ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.321, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.323, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.329, addrspace 4)
+ ; CHECK-NEXT: undef [[S_ADD_U32_24:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_24]], 96, 0 :: (invariant load (s128) from %ir.335, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]]
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]]
- ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
- ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]]
+ ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
@@ -349,13 +351,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec
- ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %516:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %543:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4)
; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec
; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec
; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %530:vgpr_32, undef %532:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
+ ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %557:vgpr_32, undef %559:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
; CHECK-NEXT: S_ENDPGM 0
.expVert:
%0 = extractelement <31 x i32> %userData, i64 2
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index ea9bb0417dfa..862e2dd2de05 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -8,12 +8,11 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
; GCN-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s8
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s9
-; GCN-NEXT: s_sub_u32 s10, 0, s8
-; GCN-NEXT: s_subb_u32 s11, 0, s9
+; GCN-NEXT: s_sub_u32 s0, 0, s8
+; GCN-NEXT: s_subb_u32 s1, 0, s9
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -22,69 +21,65 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GCN-NEXT: v_mul_hi_u32 v2, s10, v0
-; GCN-NEXT: v_readfirstlane_b32 s12, v1
-; GCN-NEXT: v_readfirstlane_b32 s0, v0
-; GCN-NEXT: s_mul_i32 s1, s10, s12
-; GCN-NEXT: v_readfirstlane_b32 s15, v2
-; GCN-NEXT: s_mul_i32 s13, s11, s0
-; GCN-NEXT: s_mul_i32 s14, s10, s0
-; GCN-NEXT: s_add_i32 s1, s15, s1
-; GCN-NEXT: v_mul_hi_u32 v3, v0, s14
-; GCN-NEXT: s_add_i32 s1, s1, s13
-; GCN-NEXT: v_mul_hi_u32 v0, v0, s1
-; GCN-NEXT: v_mul_hi_u32 v4, v1, s14
-; GCN-NEXT: v_readfirstlane_b32 s13, v3
-; GCN-NEXT: s_mul_i32 s15, s0, s1
-; GCN-NEXT: v_mul_hi_u32 v1, v1, s1
-; GCN-NEXT: s_add_u32 s13, s13, s15
+; GCN-NEXT: v_mul_hi_u32 v2, s0, v0
+; GCN-NEXT: v_readfirstlane_b32 s10, v1
+; GCN-NEXT: v_readfirstlane_b32 s2, v0
+; GCN-NEXT: s_mul_i32 s11, s0, s10
+; GCN-NEXT: v_readfirstlane_b32 s14, v2
+; GCN-NEXT: s_mul_i32 s12, s1, s2
+; GCN-NEXT: s_mul_i32 s13, s0, s2
+; GCN-NEXT: s_add_i32 s11, s14, s11
+; GCN-NEXT: v_mul_hi_u32 v3, v0, s13
+; GCN-NEXT: s_add_i32 s11, s11, s12
+; GCN-NEXT: v_mul_hi_u32 v0, v0, s11
+; GCN-NEXT: v_mul_hi_u32 v4, v1, s13
+; GCN-NEXT: v_readfirstlane_b32 s12, v3
+; GCN-NEXT: s_mul_i32 s15, s2, s11
+; GCN-NEXT: v_mul_hi_u32 v1, v1, s11
+; GCN-NEXT: s_add_u32 s12, s12, s15
; GCN-NEXT: v_readfirstlane_b32 s15, v0
-; GCN-NEXT: s_mul_i32 s14, s12, s14
+; GCN-NEXT: s_mul_i32 s13, s10, s13
; GCN-NEXT: s_addc_u32 s15, 0, s15
-; GCN-NEXT: v_readfirstlane_b32 s16, v4
-; GCN-NEXT: s_add_u32 s13, s13, s14
-; GCN-NEXT: s_addc_u32 s13, s15, s16
-; GCN-NEXT: v_readfirstlane_b32 s14, v1
-; GCN-NEXT: s_addc_u32 s14, s14, 0
-; GCN-NEXT: s_mul_i32 s1, s12, s1
-; GCN-NEXT: s_add_u32 s1, s13, s1
-; GCN-NEXT: s_addc_u32 s13, 0, s14
-; GCN-NEXT: s_add_u32 s14, s0, s1
-; GCN-NEXT: v_mov_b32_e32 v0, s14
-; GCN-NEXT: v_mul_hi_u32 v0, s10, v0
-; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GCN-NEXT: s_or_b32 s0, s0, s1
-; GCN-NEXT: s_addc_u32 s12, s12, s13
-; GCN-NEXT: s_mul_i32 s0, s10, s12
-; GCN-NEXT: v_readfirstlane_b32 s1, v0
-; GCN-NEXT: s_add_i32 s0, s1, s0
-; GCN-NEXT: s_mul_i32 s11, s11, s14
-; GCN-NEXT: s_mul_i32 s1, s10, s14
-; GCN-NEXT: s_add_i32 s0, s0, s11
-; GCN-NEXT: v_mov_b32_e32 v2, s1
-; GCN-NEXT: v_mov_b32_e32 v0, s0
-; GCN-NEXT: v_mul_hi_u32 v3, s12, v2
-; GCN-NEXT: v_mul_hi_u32 v2, s14, v2
-; GCN-NEXT: v_mul_hi_u32 v1, s12, v0
-; GCN-NEXT: v_mul_hi_u32 v0, s14, v0
-; GCN-NEXT: s_mul_i32 s11, s14, s0
-; GCN-NEXT: v_readfirstlane_b32 s15, v2
-; GCN-NEXT: s_add_u32 s11, s15, s11
+; GCN-NEXT: v_readfirstlane_b32 s14, v4
+; GCN-NEXT: s_add_u32 s12, s12, s13
+; GCN-NEXT: s_addc_u32 s12, s15, s14
+; GCN-NEXT: v_readfirstlane_b32 s13, v1
+; GCN-NEXT: s_addc_u32 s13, s13, 0
+; GCN-NEXT: s_mul_i32 s11, s10, s11
+; GCN-NEXT: s_add_u32 s11, s12, s11
+; GCN-NEXT: s_addc_u32 s12, 0, s13
+; GCN-NEXT: s_add_u32 s11, s2, s11
+; GCN-NEXT: v_mov_b32_e32 v0, s11
+; GCN-NEXT: v_mul_hi_u32 v0, s0, v0
+; GCN-NEXT: s_addc_u32 s10, s10, s12
+; GCN-NEXT: s_mul_i32 s12, s0, s10
+; GCN-NEXT: s_mul_i32 s1, s1, s11
; GCN-NEXT: v_readfirstlane_b32 s13, v0
-; GCN-NEXT: s_mul_i32 s1, s12, s1
-; GCN-NEXT: s_addc_u32 s13, 0, s13
-; GCN-NEXT: v_readfirstlane_b32 s10, v3
-; GCN-NEXT: s_add_u32 s1, s11, s1
-; GCN-NEXT: s_addc_u32 s1, s13, s10
-; GCN-NEXT: v_readfirstlane_b32 s10, v1
-; GCN-NEXT: s_addc_u32 s10, s10, 0
-; GCN-NEXT: s_mul_i32 s0, s12, s0
-; GCN-NEXT: s_add_u32 s0, s1, s0
-; GCN-NEXT: s_addc_u32 s10, 0, s10
-; GCN-NEXT: s_add_u32 s11, s14, s0
-; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GCN-NEXT: s_or_b32 s0, s0, s1
-; GCN-NEXT: s_addc_u32 s1, s12, s10
+; GCN-NEXT: s_add_i32 s12, s13, s12
+; GCN-NEXT: s_mul_i32 s0, s0, s11
+; GCN-NEXT: s_add_i32 s1, s12, s1
+; GCN-NEXT: v_mov_b32_e32 v2, s0
+; GCN-NEXT: v_mov_b32_e32 v0, s1
+; GCN-NEXT: v_mul_hi_u32 v3, s10, v2
+; GCN-NEXT: v_mul_hi_u32 v2, s11, v2
+; GCN-NEXT: v_mul_hi_u32 v1, s10, v0
+; GCN-NEXT: v_mul_hi_u32 v0, s11, v0
+; GCN-NEXT: s_mul_i32 s13, s11, s1
+; GCN-NEXT: v_readfirstlane_b32 s15, v2
+; GCN-NEXT: s_add_u32 s13, s15, s13
+; GCN-NEXT: v_readfirstlane_b32 s14, v0
+; GCN-NEXT: s_mul_i32 s0, s10, s0
+; GCN-NEXT: s_addc_u32 s14, 0, s14
+; GCN-NEXT: v_readfirstlane_b32 s12, v3
+; GCN-NEXT: s_add_u32 s0, s13, s0
+; GCN-NEXT: s_addc_u32 s0, s14, s12
+; GCN-NEXT: v_readfirstlane_b32 s12, v1
+; GCN-NEXT: s_addc_u32 s12, s12, 0
+; GCN-NEXT: s_mul_i32 s1, s10, s1
+; GCN-NEXT: s_add_u32 s0, s0, s1
+; GCN-NEXT: s_addc_u32 s1, 0, s12
+; GCN-NEXT: s_add_u32 s11, s11, s0
+; GCN-NEXT: s_addc_u32 s1, s10, s1
; GCN-NEXT: v_mov_b32_e32 v0, s1
; GCN-NEXT: v_mul_hi_u32 v1, s6, v0
; GCN-NEXT: v_mov_b32_e32 v2, s11
@@ -118,11 +113,9 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_mul_i32 s4, s8, s4
; GCN-NEXT: s_sub_u32 s6, s6, s4
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GCN-NEXT: s_or_b32 s11, s4, s5
; GCN-NEXT: s_subb_u32 s13, s10, s9
; GCN-NEXT: s_sub_u32 s14, s6, s8
; GCN-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GCN-NEXT: s_or_b32 s15, s10, s11
; GCN-NEXT: s_subb_u32 s15, s13, 0
; GCN-NEXT: s_cmp_ge_u32 s15, s9
; GCN-NEXT: s_cselect_b32 s16, -1, 0
@@ -131,13 +124,11 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_cmp_eq_u32 s15, s9
; GCN-NEXT: s_cselect_b32 s16, s17, s16
; GCN-NEXT: s_or_b32 s10, s10, s11
-; GCN-NEXT: s_subb_u32 s13, s13, s9
-; GCN-NEXT: s_sub_u32 s17, s14, s8
-; GCN-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GCN-NEXT: s_or_b32 s10, s10, s11
-; GCN-NEXT: s_subb_u32 s10, s13, 0
+; GCN-NEXT: s_subb_u32 s10, s13, s9
+; GCN-NEXT: s_sub_u32 s11, s14, s8
+; GCN-NEXT: s_subb_u32 s10, s10, 0
; GCN-NEXT: s_cmp_lg_u32 s16, 0
-; GCN-NEXT: s_cselect_b32 s11, s17, s14
+; GCN-NEXT: s_cselect_b32 s11, s11, s14
; GCN-NEXT: s_cselect_b32 s10, s10, s15
; GCN-NEXT: s_or_b32 s4, s4, s5
; GCN-NEXT: s_subb_u32 s4, s7, s12
@@ -150,6 +141,7 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_cmp_lg_u32 s5, 0
; GCN-NEXT: s_cselect_b32 s4, s10, s4
; GCN-NEXT: s_cselect_b32 s5, s11, s6
+; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: v_mov_b32_e32 v0, s5
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -180,8 +172,6 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s14, s12, 1
-; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-IR-NEXT: s_or_b32 s8, s8, s9
; GCN-IR-NEXT: s_addc_u32 s8, s13, 0
; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0
; GCN-IR-NEXT: s_sub_i32 s12, 63, s12
@@ -213,8 +203,6 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-IR-NEXT: s_sub_u32 s12, s12, s18
; GCN-IR-NEXT: s_subb_u32 s13, s13, s19
; GCN-IR-NEXT: s_add_u32 s16, s16, 1
-; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0
-; GCN-IR-NEXT: s_or_b32 s18, s18, s19
; GCN-IR-NEXT: s_addc_u32 s17, s17, 0
; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5]
@@ -968,81 +956,76 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s4
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s5
-; GCN-NEXT: s_sub_u32 s10, 0, s4
-; GCN-NEXT: s_subb_u32 s11, 0, s5
+; GCN-NEXT: s_sub_u32 s8, 0, s4
+; GCN-NEXT: s_subb_u32 s9, 0, s5
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
-; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GCN-NEXT: v_mul_hi_u32 v2, s10, v0
-; GCN-NEXT: v_readfirstlane_b32 s12, v1
-; GCN-NEXT: v_readfirstlane_b32 s8, v0
-; GCN-NEXT: s_mul_i32 s9, s10, s12
-; GCN-NEXT: v_readfirstlane_b32 s15, v2
-; GCN-NEXT: s_mul_i32 s13, s11, s8
-; GCN-NEXT: s_mul_i32 s14, s10, s8
-; GCN-NEXT: s_add_i32 s9, s15, s9
-; GCN-NEXT: v_mul_hi_u32 v3, v0, s14
-; GCN-NEXT: s_add_i32 s9, s9, s13
-; GCN-NEXT: v_mul_hi_u32 v0, v0, s9
-; GCN-NEXT: v_mul_hi_u32 v4, v1, s14
-; GCN-NEXT: v_readfirstlane_b32 s13, v3
-; GCN-NEXT: s_mul_i32 s15, s8, s9
-; GCN-NEXT: s_add_u32 s13, s13, s15
-; GCN-NEXT: v_readfirstlane_b32 s15, v0
-; GCN-NEXT: v_mul_hi_u32 v0, v1, s9
-; GCN-NEXT: s_addc_u32 s15, 0, s15
-; GCN-NEXT: s_mul_i32 s14, s12, s14
-; GCN-NEXT: v_readfirstlane_b32 s16, v4
-; GCN-NEXT: s_add_u32 s13, s13, s14
-; GCN-NEXT: s_addc_u32 s13, s15, s16
+; GCN-NEXT: v_mul_hi_u32 v2, s8, v0
+; GCN-NEXT: v_readfirstlane_b32 s10, v1
+; GCN-NEXT: v_readfirstlane_b32 s2, v0
+; GCN-NEXT: s_mul_i32 s11, s8, s10
+; GCN-NEXT: v_readfirstlane_b32 s14, v2
+; GCN-NEXT: s_mul_i32 s12, s9, s2
+; GCN-NEXT: s_mul_i32 s13, s8, s2
+; GCN-NEXT: s_add_i32 s11, s14, s11
+; GCN-NEXT: v_mul_hi_u32 v3, v0, s13
+; GCN-NEXT: s_add_i32 s11, s11, s12
+; GCN-NEXT: v_mul_hi_u32 v0, v0, s11
+; GCN-NEXT: v_mul_hi_u32 v4, v1, s13
+; GCN-NEXT: v_readfirstlane_b32 s12, v3
+; GCN-NEXT: s_mul_i32 s14, s2, s11
+; GCN-NEXT: v_mul_hi_u32 v1, v1, s11
+; GCN-NEXT: s_add_u32 s12, s12, s14
; GCN-NEXT: v_readfirstlane_b32 s14, v0
-; GCN-NEXT: s_addc_u32 s14, s14, 0
-; GCN-NEXT: s_mul_i32 s9, s12, s9
-; GCN-NEXT: s_add_u32 s9, s13, s9
-; GCN-NEXT: s_addc_u32 s13, 0, s14
-; GCN-NEXT: s_add_u32 s14, s8, s9
-; GCN-NEXT: v_mov_b32_e32 v0, s14
-; GCN-NEXT: v_mul_hi_u32 v0, s10, v0
-; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT: s_or_b32 s8, s8, s9
-; GCN-NEXT: s_addc_u32 s12, s12, s13
-; GCN-NEXT: s_mul_i32 s8, s10, s12
-; GCN-NEXT: v_readfirstlane_b32 s9, v0
-; GCN-NEXT: s_add_i32 s8, s9, s8
-; GCN-NEXT: s_mul_i32 s11, s11, s14
-; GCN-NEXT: s_mul_i32 s9, s10, s14
-; GCN-NEXT: s_add_i32 s8, s8, s11
-; GCN-NEXT: v_mov_b32_e32 v2, s9
-; GCN-NEXT: v_mov_b32_e32 v0, s8
-; GCN-NEXT: v_mul_hi_u32 v3, s12, v2
-; GCN-NEXT: v_mul_hi_u32 v2, s14, v2
-; GCN-NEXT: v_mul_hi_u32 v1, s12, v0
-; GCN-NEXT: v_mul_hi_u32 v0, s14, v0
-; GCN-NEXT: s_mul_i32 s11, s14, s8
-; GCN-NEXT: v_readfirstlane_b32 s15, v2
-; GCN-NEXT: s_add_u32 s11, s15, s11
+; GCN-NEXT: s_addc_u32 s14, 0, s14
+; GCN-NEXT: s_mul_i32 s13, s10, s13
+; GCN-NEXT: v_readfirstlane_b32 s15, v4
+; GCN-NEXT: s_add_u32 s12, s12, s13
+; GCN-NEXT: s_addc_u32 s12, s14, s15
+; GCN-NEXT: v_readfirstlane_b32 s13, v1
+; GCN-NEXT: s_addc_u32 s13, s13, 0
+; GCN-NEXT: s_mul_i32 s11, s10, s11
+; GCN-NEXT: s_add_u32 s11, s12, s11
+; GCN-NEXT: s_addc_u32 s12, 0, s13
+; GCN-NEXT: s_add_u32 s11, s2, s11
+; GCN-NEXT: v_mov_b32_e32 v0, s11
+; GCN-NEXT: v_mul_hi_u32 v0, s8, v0
+; GCN-NEXT: s_addc_u32 s10, s10, s12
+; GCN-NEXT: s_mul_i32 s12, s8, s10
+; GCN-NEXT: s_mul_i32 s9, s9, s11
; GCN-NEXT: v_readfirstlane_b32 s13, v0
-; GCN-NEXT: s_mul_i32 s9, s12, s9
-; GCN-NEXT: s_addc_u32 s13, 0, s13
-; GCN-NEXT: v_readfirstlane_b32 s10, v3
-; GCN-NEXT: s_add_u32 s9, s11, s9
-; GCN-NEXT: s_addc_u32 s9, s13, s10
-; GCN-NEXT: v_readfirstlane_b32 s10, v1
-; GCN-NEXT: s_addc_u32 s10, s10, 0
-; GCN-NEXT: s_mul_i32 s8, s12, s8
-; GCN-NEXT: s_add_u32 s8, s9, s8
-; GCN-NEXT: s_addc_u32 s10, 0, s10
-; GCN-NEXT: s_add_u32 s11, s14, s8
-; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT: s_or_b32 s8, s8, s9
-; GCN-NEXT: s_addc_u32 s10, s12, s10
+; GCN-NEXT: s_add_i32 s12, s13, s12
+; GCN-NEXT: s_mul_i32 s8, s8, s11
+; GCN-NEXT: s_add_i32 s9, s12, s9
+; GCN-NEXT: v_mov_b32_e32 v2, s8
+; GCN-NEXT: v_mov_b32_e32 v0, s9
+; GCN-NEXT: v_mul_hi_u32 v3, s10, v2
+; GCN-NEXT: v_mul_hi_u32 v2, s11, v2
+; GCN-NEXT: v_mul_hi_u32 v1, s10, v0
+; GCN-NEXT: v_mul_hi_u32 v0, s11, v0
+; GCN-NEXT: s_mul_i32 s13, s11, s9
+; GCN-NEXT: v_readfirstlane_b32 s15, v2
+; GCN-NEXT: s_add_u32 s13, s15, s13
+; GCN-NEXT: v_readfirstlane_b32 s14, v0
+; GCN-NEXT: s_mul_i32 s8, s10, s8
+; GCN-NEXT: s_addc_u32 s14, 0, s14
+; GCN-NEXT: v_readfirstlane_b32 s12, v3
+; GCN-NEXT: s_add_u32 s8, s13, s8
+; GCN-NEXT: s_addc_u32 s8, s14, s12
+; GCN-NEXT: v_readfirstlane_b32 s12, v1
+; GCN-NEXT: s_addc_u32 s12, s12, 0
+; GCN-NEXT: s_mul_i32 s9, s10, s9
+; GCN-NEXT: s_add_u32 s8, s8, s9
+; GCN-NEXT: s_addc_u32 s9, 0, s12
+; GCN-NEXT: s_add_u32 s11, s11, s8
+; GCN-NEXT: s_addc_u32 s10, s10, s9
; GCN-NEXT: s_ashr_i32 s8, s7, 31
; GCN-NEXT: s_add_u32 s6, s6, s8
; GCN-NEXT: s_mov_b32 s9, s8
@@ -1071,6 +1054,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: v_mul_hi_u32 v0, s4, v0
; GCN-NEXT: s_addc_u32 s11, 0, s12
; GCN-NEXT: s_mul_i32 s11, s4, s11
+; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: v_readfirstlane_b32 s12, v0
; GCN-NEXT: s_add_i32 s11, s12, s11
; GCN-NEXT: s_mul_i32 s12, s5, s10
@@ -1079,11 +1063,9 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: s_mul_i32 s10, s4, s10
; GCN-NEXT: s_sub_u32 s6, s6, s10
; GCN-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GCN-NEXT: s_or_b32 s13, s10, s11
; GCN-NEXT: s_subb_u32 s15, s12, s5
; GCN-NEXT: s_sub_u32 s16, s6, s4
; GCN-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GCN-NEXT: s_or_b32 s17, s12, s13
; GCN-NEXT: s_subb_u32 s17, s15, 0
; GCN-NEXT: s_cmp_ge_u32 s17, s5
; GCN-NEXT: s_cselect_b32 s18, -1, 0
@@ -1092,13 +1074,11 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: s_cmp_eq_u32 s17, s5
; GCN-NEXT: s_cselect_b32 s18, s19, s18
; GCN-NEXT: s_or_b32 s12, s12, s13
-; GCN-NEXT: s_subb_u32 s15, s15, s5
-; GCN-NEXT: s_sub_u32 s19, s16, s4
-; GCN-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GCN-NEXT: s_or_b32 s12, s12, s13
-; GCN-NEXT: s_subb_u32 s12, s15, 0
+; GCN-NEXT: s_subb_u32 s12, s15, s5
+; GCN-NEXT: s_sub_u32 s13, s16, s4
+; GCN-NEXT: s_subb_u32 s12, s12, 0
; GCN-NEXT: s_cmp_lg_u32 s18, 0
-; GCN-NEXT: s_cselect_b32 s13, s19, s16
+; GCN-NEXT: s_cselect_b32 s13, s13, s16
; GCN-NEXT: s_cselect_b32 s12, s12, s17
; GCN-NEXT: s_or_b32 s10, s10, s11
; GCN-NEXT: s_subb_u32 s7, s7, s14
@@ -1156,8 +1136,6 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s16, s14, 1
-; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GCN-IR-NEXT: s_or_b32 s10, s10, s11
; GCN-IR-NEXT: s_addc_u32 s10, s15, 0
; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0
; GCN-IR-NEXT: s_sub_i32 s14, 63, s14
@@ -1189,8 +1167,6 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-IR-NEXT: s_sub_u32 s14, s14, s20
; GCN-IR-NEXT: s_subb_u32 s15, s15, s21
; GCN-IR-NEXT: s_add_u32 s18, s18, 1
-; GCN-IR-NEXT: s_cselect_b64 s[20:21], -1, 0
-; GCN-IR-NEXT: s_or_b32 s20, s20, s21
; GCN-IR-NEXT: s_addc_u32 s19, s19, 0
; GCN-IR-NEXT: s_cselect_b64 s[20:21], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[12:13], s[2:3]
@@ -1316,8 +1292,7 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s4
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s5
; GCN-NEXT: s_sub_u32 s2, 0, s4
-; GCN-NEXT: s_subb_u32 s8, 0, s5
-; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_subb_u32 s6, 0, s5
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -1327,72 +1302,68 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_mul_hi_u32 v2, s2, v0
-; GCN-NEXT: v_readfirstlane_b32 s9, v1
-; GCN-NEXT: v_readfirstlane_b32 s6, v0
-; GCN-NEXT: s_mul_i32 s7, s2, s9
-; GCN-NEXT: v_readfirstlane_b32 s12, v2
-; GCN-NEXT: s_mul_i32 s10, s8, s6
-; GCN-NEXT: s_mul_i32 s11, s2, s6
-; GCN-NEXT: s_add_i32 s7, s12, s7
-; GCN-NEXT: v_mul_hi_u32 v3, v0, s11
-; GCN-NEXT: s_add_i32 s7, s7, s10
-; GCN-NEXT: v_mul_hi_u32 v0, v0, s7
-; GCN-NEXT: v_mul_hi_u32 v4, v1, s11
-; GCN-NEXT: v_readfirstlane_b32 s10, v3
-; GCN-NEXT: s_mul_i32 s13, s6, s7
-; GCN-NEXT: v_mul_hi_u32 v1, v1, s7
-; GCN-NEXT: s_add_u32 s10, s10, s13
-; GCN-NEXT: v_readfirstlane_b32 s13, v0
-; GCN-NEXT: s_mul_i32 s11, s9, s11
-; GCN-NEXT: s_addc_u32 s13, 0, s13
-; GCN-NEXT: v_readfirstlane_b32 s12, v4
-; GCN-NEXT: s_add_u32 s10, s10, s11
-; GCN-NEXT: s_addc_u32 s10, s13, s12
-; GCN-NEXT: v_readfirstlane_b32 s11, v1
-; GCN-NEXT: s_addc_u32 s11, s11, 0
-; GCN-NEXT: s_mul_i32 s7, s9, s7
-; GCN-NEXT: s_add_u32 s7, s10, s7
-; GCN-NEXT: s_addc_u32 s10, 0, s11
-; GCN-NEXT: s_add_u32 s11, s6, s7
-; GCN-NEXT: v_mov_b32_e32 v0, s11
+; GCN-NEXT: v_readfirstlane_b32 s7, v1
+; GCN-NEXT: v_readfirstlane_b32 s3, v0
+; GCN-NEXT: s_mul_i32 s8, s2, s7
+; GCN-NEXT: v_readfirstlane_b32 s11, v2
+; GCN-NEXT: s_mul_i32 s9, s6, s3
+; GCN-NEXT: s_mul_i32 s10, s2, s3
+; GCN-NEXT: s_add_i32 s8, s11, s8
+; GCN-NEXT: v_mul_hi_u32 v3, v0, s10
+; GCN-NEXT: s_add_i32 s8, s8, s9
+; GCN-NEXT: v_mul_hi_u32 v0, v0, s8
+; GCN-NEXT: v_mul_hi_u32 v4, v1, s10
+; GCN-NEXT: v_readfirstlane_b32 s9, v3
+; GCN-NEXT: v_mul_hi_u32 v1, v1, s8
+; GCN-NEXT: s_mul_i32 s12, s3, s8
+; GCN-NEXT: s_add_u32 s9, s9, s12
+; GCN-NEXT: v_readfirstlane_b32 s12, v0
+; GCN-NEXT: s_mul_i32 s10, s7, s10
+; GCN-NEXT: s_addc_u32 s12, 0, s12
+; GCN-NEXT: v_readfirstlane_b32 s11, v4
+; GCN-NEXT: s_add_u32 s9, s9, s10
+; GCN-NEXT: v_readfirstlane_b32 s13, v1
+; GCN-NEXT: s_addc_u32 s9, s12, s11
+; GCN-NEXT: s_addc_u32 s10, s13, 0
+; GCN-NEXT: s_mul_i32 s8, s7, s8
+; GCN-NEXT: s_add_u32 s8, s9, s8
+; GCN-NEXT: s_addc_u32 s9, 0, s10
+; GCN-NEXT: s_add_u32 s8, s3, s8
+; GCN-NEXT: v_mov_b32_e32 v0, s8
; GCN-NEXT: v_mul_hi_u32 v0, s2, v0
-; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0
-; GCN-NEXT: s_or_b32 s6, s6, s7
-; GCN-NEXT: s_addc_u32 s9, s9, s10
-; GCN-NEXT: s_mul_i32 s6, s2, s9
-; GCN-NEXT: v_readfirstlane_b32 s7, v0
-; GCN-NEXT: s_add_i32 s6, s7, s6
-; GCN-NEXT: s_mul_i32 s8, s8, s11
-; GCN-NEXT: s_mul_i32 s2, s2, s11
-; GCN-NEXT: s_add_i32 s6, s6, s8
+; GCN-NEXT: s_addc_u32 s7, s7, s9
+; GCN-NEXT: s_mul_i32 s9, s2, s7
+; GCN-NEXT: s_mul_i32 s6, s6, s8
+; GCN-NEXT: v_readfirstlane_b32 s10, v0
+; GCN-NEXT: s_add_i32 s9, s10, s9
+; GCN-NEXT: s_mul_i32 s2, s2, s8
+; GCN-NEXT: s_add_i32 s6, s9, s6
; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_mov_b32_e32 v0, s6
-; GCN-NEXT: v_mul_hi_u32 v3, s9, v2
-; GCN-NEXT: v_mul_hi_u32 v2, s11, v2
-; GCN-NEXT: v_mul_hi_u32 v1, s9, v0
-; GCN-NEXT: v_mul_hi_u32 v0, s11, v0
-; GCN-NEXT: s_mul_i32 s8, s11, s6
+; GCN-NEXT: v_mul_hi_u32 v3, s7, v2
+; GCN-NEXT: v_mul_hi_u32 v2, s8, v2
+; GCN-NEXT: v_mul_hi_u32 v1, s7, v0
+; GCN-NEXT: v_mul_hi_u32 v0, s8, v0
+; GCN-NEXT: s_mul_i32 s10, s8, s6
; GCN-NEXT: v_readfirstlane_b32 s12, v2
-; GCN-NEXT: s_add_u32 s8, s12, s8
-; GCN-NEXT: v_readfirstlane_b32 s10, v0
-; GCN-NEXT: s_mul_i32 s2, s9, s2
-; GCN-NEXT: s_addc_u32 s10, 0, s10
-; GCN-NEXT: v_readfirstlane_b32 s7, v3
-; GCN-NEXT: s_add_u32 s2, s8, s2
-; GCN-NEXT: s_addc_u32 s2, s10, s7
-; GCN-NEXT: v_readfirstlane_b32 s7, v1
-; GCN-NEXT: s_addc_u32 s7, s7, 0
-; GCN-NEXT: s_mul_i32 s6, s9, s6
+; GCN-NEXT: s_add_u32 s10, s12, s10
+; GCN-NEXT: v_readfirstlane_b32 s11, v0
+; GCN-NEXT: s_mul_i32 s2, s7, s2
+; GCN-NEXT: s_addc_u32 s11, 0, s11
+; GCN-NEXT: v_readfirstlane_b32 s9, v3
+; GCN-NEXT: s_add_u32 s2, s10, s2
+; GCN-NEXT: s_addc_u32 s2, s11, s9
+; GCN-NEXT: v_readfirstlane_b32 s9, v1
+; GCN-NEXT: s_addc_u32 s9, s9, 0
+; GCN-NEXT: s_mul_i32 s6, s7, s6
; GCN-NEXT: s_add_u32 s2, s2, s6
-; GCN-NEXT: s_addc_u32 s8, 0, s7
-; GCN-NEXT: s_add_u32 s2, s11, s2
-; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0
-; GCN-NEXT: s_or_b32 s6, s6, s7
-; GCN-NEXT: s_addc_u32 s6, s9, s8
+; GCN-NEXT: s_addc_u32 s6, 0, s9
+; GCN-NEXT: s_add_u32 s2, s8, s2
+; GCN-NEXT: s_addc_u32 s6, s7, s6
; GCN-NEXT: v_mul_hi_u32 v1, s2, 24
; GCN-NEXT: v_mul_hi_u32 v0, s6, 24
; GCN-NEXT: s_mul_i32 s6, s6, 24
-; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: v_readfirstlane_b32 s8, v1
; GCN-NEXT: v_readfirstlane_b32 s7, v0
; GCN-NEXT: s_add_u32 s6, s8, s6
@@ -1401,16 +1372,15 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_mul_hi_u32 v0, s4, v0
; GCN-NEXT: s_mul_i32 s7, s5, s6
; GCN-NEXT: s_mul_i32 s6, s4, s6
+; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: v_readfirstlane_b32 s8, v0
; GCN-NEXT: s_add_i32 s10, s8, s7
; GCN-NEXT: s_sub_i32 s8, 0, s10
; GCN-NEXT: s_sub_u32 s11, 24, s6
; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0
-; GCN-NEXT: s_or_b32 s9, s6, s7
; GCN-NEXT: s_subb_u32 s12, s8, s5
; GCN-NEXT: s_sub_u32 s13, s11, s4
; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT: s_or_b32 s14, s8, s9
; GCN-NEXT: s_subb_u32 s14, s12, 0
; GCN-NEXT: s_cmp_ge_u32 s14, s5
; GCN-NEXT: s_cselect_b32 s15, -1, 0
@@ -1419,13 +1389,11 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: s_cmp_eq_u32 s14, s5
; GCN-NEXT: s_cselect_b32 s15, s16, s15
; GCN-NEXT: s_or_b32 s8, s8, s9
-; GCN-NEXT: s_subb_u32 s12, s12, s5
-; GCN-NEXT: s_sub_u32 s16, s13, s4
-; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT: s_or_b32 s8, s8, s9
-; GCN-NEXT: s_subb_u32 s8, s12, 0
+; GCN-NEXT: s_subb_u32 s8, s12, s5
+; GCN-NEXT: s_sub_u32 s9, s13, s4
+; GCN-NEXT: s_subb_u32 s8, s8, 0
; GCN-NEXT: s_cmp_lg_u32 s15, 0
-; GCN-NEXT: s_cselect_b32 s9, s16, s13
+; GCN-NEXT: s_cselect_b32 s9, s9, s13
; GCN-NEXT: s_cselect_b32 s8, s8, s14
; GCN-NEXT: s_or_b32 s6, s6, s7
; GCN-NEXT: s_subb_u32 s6, 0, s10
@@ -1468,8 +1436,6 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s8, s2, 1
-; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GCN-IR-NEXT: s_or_b32 s9, s10, s11
; GCN-IR-NEXT: s_addc_u32 s3, s3, 0
; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0
; GCN-IR-NEXT: s_sub_i32 s2, 63, s2
@@ -1500,8 +1466,6 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_sub_u32 s10, s10, s16
; GCN-IR-NEXT: s_subb_u32 s11, s11, s17
; GCN-IR-NEXT: s_add_u32 s14, s14, 1
-; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0
-; GCN-IR-NEXT: s_or_b32 s16, s16, s17
; GCN-IR-NEXT: s_addc_u32 s15, s15, 0
; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[8:9], s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
index d80ec6bd3494..8f8e2c0ba52f 100644
--- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
@@ -655,7 +655,7 @@ bb:
br label %bb5
bb5: ; preds = %bb5.backedge, %bb
- %tmp4.i.sroa.0.0 = phi <9 x double> [ undef, %bb ], [ %tmp4.i.sroa.0.1, %bb5.backedge ]
+ %tmp4.i.sroa.0.0 = phi <9 x double> [ poison, %bb ], [ %tmp4.i.sroa.0.1, %bb5.backedge ]
%tmp14.1.i = load i32, ptr inttoptr (i64 128 to ptr), align 128
store i32 0, ptr addrspace(5) null, align 4
%tmp14.2.i = load i32, ptr inttoptr (i64 128 to ptr), align 128
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
new file mode 100644
index 000000000000..696962a88c8b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
@@ -0,0 +1,57 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 %s --passes=two-address-instruction -verify-each -o - | FileCheck --check-prefixes=GCN %s
+
+# Exercise very basic handling of BUNDLE'd instructions by the two-address-instruction pass.
+
+# This test is an example where it is best to keep the two-address instruction
+# and resolve the tie with a COPY that is expected to be coalesced.
+---
+name: test_fmac_bundle
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: test_fmac_bundle
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GCN-NEXT: BUNDLE implicit-def [[COPY2]], implicit [[DEF]], implicit [[DEF1]], implicit [[COPY2]](tied-def 0), implicit $mode, implicit $exec {
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], killed [[DEF1]], killed [[COPY2]], implicit $mode, implicit $exec
+ ; GCN-NEXT: }
+ %10:vgpr_32 = COPY $vgpr0
+ %11:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = V_ADD_U32_e64 %10, %11, 0, implicit $exec
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:vgpr_32 = IMPLICIT_DEF
+ BUNDLE implicit-def %3:vgpr_32, implicit %0, implicit %1, implicit killed %2(tied-def 0), implicit $mode, implicit $exec {
+ %3:vgpr_32 = V_FMAC_F32_e32 killed %0, killed %1, killed %2, implicit $mode, implicit $exec
+ }
+
+...
+
+# This test is an example where conversion to three-address form would be beneficial.
+---
+name: test_fmac_reuse_bundle
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: test_fmac_reuse_bundle
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GCN-NEXT: BUNDLE implicit-def [[COPY1]], implicit [[DEF]], implicit [[DEF1]], implicit [[COPY1]](tied-def 0), implicit $mode, implicit $exec {
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], killed [[DEF1]], killed [[COPY1]], implicit $mode, implicit $exec
+ ; GCN-NEXT: }
+ ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[COPY]], 0, implicit $exec
+ %2:vgpr_32 = COPY $vgpr0
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:vgpr_32 = IMPLICIT_DEF
+ BUNDLE implicit-def %3:vgpr_32, implicit %0, implicit %1, implicit %2(tied-def 0), implicit $mode, implicit $exec {
+ %3:vgpr_32 = V_FMAC_F32_e32 killed %0, killed %1, killed %2, implicit $mode, implicit $exec
+ }
+ %4:vgpr_32 = V_ADD_U32_e64 %3, %2, 0, implicit $exec
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll
index bdd22f25e91c..b000fae124ed 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddo.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll
@@ -15,10 +15,8 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_add_u32 s2, s2, s8
-; SI-NEXT: s_mov_b32 s5, s1
-; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
-; SI-NEXT: s_or_b32 s0, s0, s1
; SI-NEXT: s_addc_u32 s3, s3, s9
+; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: v_mov_b32_e32 v1, s3
@@ -433,8 +431,6 @@ define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_add_u32 s4, s4, s6
-; SI-NEXT: s_cselect_b64 s[12:13], -1, 0
-; SI-NEXT: s_or_b32 s6, s12, s13
; SI-NEXT: s_addc_u32 s5, s5, s7
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index fd461ac80ea5..775483c040b7 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -146,8 +146,6 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s14, s12, 1
-; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-IR-NEXT: s_or_b32 s8, s8, s9
; GCN-IR-NEXT: s_addc_u32 s8, s13, 0
; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0
; GCN-IR-NEXT: s_sub_i32 s12, 63, s12
@@ -179,8 +177,6 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-IR-NEXT: s_sub_u32 s12, s12, s16
; GCN-IR-NEXT: s_subb_u32 s13, s13, s17
; GCN-IR-NEXT: s_add_u32 s10, s10, 1
-; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0
-; GCN-IR-NEXT: s_or_b32 s16, s16, s17
; GCN-IR-NEXT: s_addc_u32 s11, s11, 0
; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[2:3], s[4:5]
@@ -786,12 +782,11 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-LABEL: s_test_udiv_k_num_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s2
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s3
-; GCN-NEXT: s_sub_u32 s6, 0, s2
-; GCN-NEXT: s_subb_u32 s8, 0, s3
+; GCN-NEXT: s_sub_u32 s4, 0, s2
+; GCN-NEXT: s_subb_u32 s5, 0, s3
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -800,118 +795,112 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GCN-NEXT: v_mul_hi_u32 v2, s6, v0
+; GCN-NEXT: v_mul_hi_u32 v2, s4, v0
+; GCN-NEXT: v_readfirstlane_b32 s6, v1
+; GCN-NEXT: v_readfirstlane_b32 s7, v0
+; GCN-NEXT: s_mul_i32 s8, s4, s6
+; GCN-NEXT: v_readfirstlane_b32 s11, v2
+; GCN-NEXT: s_mul_i32 s9, s5, s7
+; GCN-NEXT: s_mul_i32 s10, s4, s7
+; GCN-NEXT: s_add_i32 s8, s11, s8
+; GCN-NEXT: v_mul_hi_u32 v3, v0, s10
+; GCN-NEXT: s_add_i32 s8, s8, s9
+; GCN-NEXT: v_mul_hi_u32 v0, v0, s8
+; GCN-NEXT: v_mul_hi_u32 v4, v1, s10
+; GCN-NEXT: v_readfirstlane_b32 s9, v3
+; GCN-NEXT: s_mul_i32 s12, s7, s8
+; GCN-NEXT: v_mul_hi_u32 v1, v1, s8
+; GCN-NEXT: s_add_u32 s9, s9, s12
+; GCN-NEXT: v_readfirstlane_b32 s12, v0
+; GCN-NEXT: s_mul_i32 s10, s6, s10
+; GCN-NEXT: s_addc_u32 s12, 0, s12
+; GCN-NEXT: v_readfirstlane_b32 s11, v4
+; GCN-NEXT: s_add_u32 s9, s9, s10
+; GCN-NEXT: v_readfirstlane_b32 s13, v1
+; GCN-NEXT: s_addc_u32 s9, s12, s11
+; GCN-NEXT: s_mul_i32 s8, s6, s8
+; GCN-NEXT: s_addc_u32 s10, s13, 0
+; GCN-NEXT: s_add_u32 s8, s9, s8
+; GCN-NEXT: s_addc_u32 s9, 0, s10
+; GCN-NEXT: s_add_u32 s8, s7, s8
+; GCN-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NEXT: v_mul_hi_u32 v0, s4, v0
+; GCN-NEXT: s_addc_u32 s6, s6, s9
+; GCN-NEXT: s_mul_i32 s9, s4, s6
+; GCN-NEXT: s_mul_i32 s5, s5, s8
+; GCN-NEXT: v_readfirstlane_b32 s10, v0
+; GCN-NEXT: s_add_i32 s9, s10, s9
+; GCN-NEXT: s_mul_i32 s4, s4, s8
+; GCN-NEXT: s_add_i32 s5, s9, s5
+; GCN-NEXT: v_mov_b32_e32 v2, s4
+; GCN-NEXT: v_mov_b32_e32 v0, s5
+; GCN-NEXT: v_mul_hi_u32 v3, s6, v2
+; GCN-NEXT: v_mul_hi_u32 v2, s8, v2
+; GCN-NEXT: v_mul_hi_u32 v1, s6, v0
+; GCN-NEXT: v_mul_hi_u32 v0, s8, v0
+; GCN-NEXT: s_mul_i32 s10, s8, s5
+; GCN-NEXT: v_readfirstlane_b32 s12, v2
+; GCN-NEXT: s_add_u32 s10, s12, s10
+; GCN-NEXT: v_readfirstlane_b32 s11, v0
+; GCN-NEXT: s_mul_i32 s4, s6, s4
+; GCN-NEXT: s_addc_u32 s11, 0, s11
+; GCN-NEXT: v_readfirstlane_b32 s9, v3
+; GCN-NEXT: s_add_u32 s4, s10, s4
+; GCN-NEXT: s_addc_u32 s4, s11, s9
; GCN-NEXT: v_readfirstlane_b32 s9, v1
+; GCN-NEXT: s_addc_u32 s9, s9, 0
+; GCN-NEXT: s_mul_i32 s5, s6, s5
+; GCN-NEXT: s_add_u32 s4, s4, s5
+; GCN-NEXT: s_addc_u32 s5, 0, s9
+; GCN-NEXT: s_add_u32 s4, s8, s4
+; GCN-NEXT: s_addc_u32 s5, s6, s5
+; GCN-NEXT: v_mul_hi_u32 v1, s4, 24
+; GCN-NEXT: v_mul_hi_u32 v0, s5, 24
+; GCN-NEXT: s_mul_i32 s5, s5, 24
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: v_readfirstlane_b32 s8, v1
; GCN-NEXT: v_readfirstlane_b32 s4, v0
-; GCN-NEXT: s_mul_i32 s5, s6, s9
-; GCN-NEXT: v_readfirstlane_b32 s12, v2
-; GCN-NEXT: s_mul_i32 s10, s8, s4
-; GCN-NEXT: s_mul_i32 s11, s6, s4
-; GCN-NEXT: s_add_i32 s5, s12, s5
-; GCN-NEXT: v_mul_hi_u32 v3, v0, s11
-; GCN-NEXT: s_add_i32 s5, s5, s10
-; GCN-NEXT: v_mul_hi_u32 v0, v0, s5
-; GCN-NEXT: v_mul_hi_u32 v4, v1, s11
-; GCN-NEXT: v_readfirstlane_b32 s10, v3
-; GCN-NEXT: v_mul_hi_u32 v1, v1, s5
-; GCN-NEXT: s_mul_i32 s13, s4, s5
-; GCN-NEXT: s_add_u32 s10, s10, s13
-; GCN-NEXT: v_readfirstlane_b32 s13, v0
-; GCN-NEXT: s_mul_i32 s11, s9, s11
-; GCN-NEXT: s_addc_u32 s13, 0, s13
-; GCN-NEXT: v_readfirstlane_b32 s12, v4
-; GCN-NEXT: s_add_u32 s10, s10, s11
-; GCN-NEXT: v_readfirstlane_b32 s14, v1
-; GCN-NEXT: s_addc_u32 s10, s13, s12
-; GCN-NEXT: s_addc_u32 s11, s14, 0
-; GCN-NEXT: s_mul_i32 s5, s9, s5
-; GCN-NEXT: s_add_u32 s5, s10, s5
-; GCN-NEXT: s_addc_u32 s10, 0, s11
-; GCN-NEXT: s_add_u32 s11, s4, s5
-; GCN-NEXT: v_mov_b32_e32 v0, s11
-; GCN-NEXT: v_mul_hi_u32 v0, s6, v0
-; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GCN-NEXT: s_or_b32 s4, s4, s5
-; GCN-NEXT: s_addc_u32 s9, s9, s10
-; GCN-NEXT: s_mul_i32 s4, s6, s9
-; GCN-NEXT: v_readfirstlane_b32 s5, v0
-; GCN-NEXT: s_add_i32 s4, s5, s4
-; GCN-NEXT: s_mul_i32 s8, s8, s11
-; GCN-NEXT: s_mul_i32 s5, s6, s11
-; GCN-NEXT: s_add_i32 s4, s4, s8
-; GCN-NEXT: v_mov_b32_e32 v2, s5
-; GCN-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NEXT: v_mul_hi_u32 v3, s9, v2
-; GCN-NEXT: v_mul_hi_u32 v2, s11, v2
-; GCN-NEXT: v_mul_hi_u32 v1, s9, v0
-; GCN-NEXT: v_mul_hi_u32 v0, s11, v0
-; GCN-NEXT: s_mul_i32 s8, s11, s4
-; GCN-NEXT: v_readfirstlane_b32 s12, v2
-; GCN-NEXT: s_add_u32 s8, s12, s8
-; GCN-NEXT: v_readfirstlane_b32 s10, v0
-; GCN-NEXT: s_mul_i32 s5, s9, s5
-; GCN-NEXT: s_addc_u32 s10, 0, s10
-; GCN-NEXT: v_readfirstlane_b32 s6, v3
; GCN-NEXT: s_add_u32 s5, s8, s5
-; GCN-NEXT: s_addc_u32 s5, s10, s6
-; GCN-NEXT: v_readfirstlane_b32 s6, v1
-; GCN-NEXT: s_addc_u32 s6, s6, 0
-; GCN-NEXT: s_mul_i32 s4, s9, s4
-; GCN-NEXT: s_add_u32 s4, s5, s4
-; GCN-NEXT: s_addc_u32 s6, 0, s6
-; GCN-NEXT: s_add_u32 s8, s11, s4
-; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GCN-NEXT: s_or_b32 s4, s4, s5
-; GCN-NEXT: s_addc_u32 s4, s9, s6
-; GCN-NEXT: v_mul_hi_u32 v1, s8, 24
-; GCN-NEXT: v_mul_hi_u32 v0, s4, 24
-; GCN-NEXT: s_mul_i32 s4, s4, 24
-; GCN-NEXT: s_mov_b32 s6, -1
-; GCN-NEXT: v_readfirstlane_b32 s8, v1
-; GCN-NEXT: v_readfirstlane_b32 s5, v0
-; GCN-NEXT: s_add_u32 s4, s8, s4
-; GCN-NEXT: s_addc_u32 s10, 0, s5
-; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_addc_u32 s8, 0, s4
+; GCN-NEXT: v_mov_b32_e32 v0, s8
; GCN-NEXT: v_mul_hi_u32 v0, s2, v0
; GCN-NEXT: s_mov_b32 s4, s0
; GCN-NEXT: s_mov_b32 s5, s1
-; GCN-NEXT: s_mul_i32 s0, s3, s10
+; GCN-NEXT: s_mul_i32 s0, s3, s8
; GCN-NEXT: v_readfirstlane_b32 s1, v0
-; GCN-NEXT: s_add_i32 s11, s1, s0
-; GCN-NEXT: s_sub_i32 s8, 0, s11
-; GCN-NEXT: s_mul_i32 s0, s2, s10
-; GCN-NEXT: s_sub_u32 s12, 24, s0
+; GCN-NEXT: s_add_i32 s9, s1, s0
+; GCN-NEXT: s_sub_i32 s10, 0, s9
+; GCN-NEXT: s_mul_i32 s0, s2, s8
+; GCN-NEXT: s_sub_u32 s11, 24, s0
; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GCN-NEXT: s_or_b32 s9, s0, s1
-; GCN-NEXT: s_subb_u32 s13, s8, s3
-; GCN-NEXT: s_sub_u32 s14, s12, s2
-; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT: s_or_b32 s8, s8, s9
-; GCN-NEXT: s_subb_u32 s8, s13, 0
-; GCN-NEXT: s_cmp_ge_u32 s8, s3
-; GCN-NEXT: s_cselect_b32 s9, -1, 0
-; GCN-NEXT: s_cmp_ge_u32 s14, s2
+; GCN-NEXT: s_subb_u32 s10, s10, s3
+; GCN-NEXT: s_sub_u32 s12, s11, s2
+; GCN-NEXT: s_subb_u32 s10, s10, 0
+; GCN-NEXT: s_cmp_ge_u32 s10, s3
; GCN-NEXT: s_cselect_b32 s13, -1, 0
-; GCN-NEXT: s_cmp_eq_u32 s8, s3
-; GCN-NEXT: s_cselect_b32 s8, s13, s9
-; GCN-NEXT: s_add_u32 s9, s10, 1
+; GCN-NEXT: s_cmp_ge_u32 s12, s2
+; GCN-NEXT: s_cselect_b32 s12, -1, 0
+; GCN-NEXT: s_cmp_eq_u32 s10, s3
+; GCN-NEXT: s_cselect_b32 s10, s12, s13
+; GCN-NEXT: s_add_u32 s12, s8, 1
; GCN-NEXT: s_addc_u32 s13, 0, 0
-; GCN-NEXT: s_add_u32 s14, s10, 2
+; GCN-NEXT: s_add_u32 s14, s8, 2
; GCN-NEXT: s_addc_u32 s15, 0, 0
-; GCN-NEXT: s_cmp_lg_u32 s8, 0
-; GCN-NEXT: s_cselect_b32 s8, s14, s9
-; GCN-NEXT: s_cselect_b32 s9, s15, s13
+; GCN-NEXT: s_cmp_lg_u32 s10, 0
+; GCN-NEXT: s_cselect_b32 s10, s14, s12
+; GCN-NEXT: s_cselect_b32 s12, s15, s13
; GCN-NEXT: s_or_b32 s0, s0, s1
-; GCN-NEXT: s_subb_u32 s0, 0, s11
+; GCN-NEXT: s_subb_u32 s0, 0, s9
; GCN-NEXT: s_cmp_ge_u32 s0, s3
; GCN-NEXT: s_cselect_b32 s1, -1, 0
-; GCN-NEXT: s_cmp_ge_u32 s12, s2
+; GCN-NEXT: s_cmp_ge_u32 s11, s2
; GCN-NEXT: s_cselect_b32 s2, -1, 0
; GCN-NEXT: s_cmp_eq_u32 s0, s3
; GCN-NEXT: s_cselect_b32 s0, s2, s1
; GCN-NEXT: s_cmp_lg_u32 s0, 0
-; GCN-NEXT: s_cselect_b32 s0, s9, 0
-; GCN-NEXT: s_cselect_b32 s1, s8, s10
+; GCN-NEXT: s_cselect_b32 s0, s12, 0
+; GCN-NEXT: s_cselect_b32 s1, s10, s8
+; GCN-NEXT: s_mov_b32 s6, -1
; GCN-NEXT: v_mov_b32_e32 v0, s1
; GCN-NEXT: v_mov_b32_e32 v1, s0
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -937,8 +926,6 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s10, s8, 1
-; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0
-; GCN-IR-NEXT: s_or_b32 s6, s6, s7
; GCN-IR-NEXT: s_addc_u32 s6, s9, 0
; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0
; GCN-IR-NEXT: s_sub_i32 s8, 63, s8
@@ -969,8 +956,6 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_sub_u32 s10, s10, s16
; GCN-IR-NEXT: s_subb_u32 s11, s11, s17
; GCN-IR-NEXT: s_add_u32 s14, s14, 1
-; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0
-; GCN-IR-NEXT: s_or_b32 s16, s16, s17
; GCN-IR-NEXT: s_addc_u32 s15, s15, 0
; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[8:9], s[4:5]
@@ -1307,8 +1292,6 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_cbranch_vccz .LBB11_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s11, s8, 1
-; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0
-; GCN-IR-NEXT: s_or_b32 s6, s6, s7
; GCN-IR-NEXT: s_addc_u32 s6, s9, 0
; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0
; GCN-IR-NEXT: s_sub_i32 s8, 63, s8
@@ -1336,8 +1319,6 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_sub_u32 s2, s2, s8
; GCN-IR-NEXT: s_subb_u32 s3, s3, 0
; GCN-IR-NEXT: s_add_u32 s10, s10, 1
-; GCN-IR-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GCN-IR-NEXT: s_or_b32 s12, s12, s13
; GCN-IR-NEXT: s_addc_u32 s11, s11, 0
; GCN-IR-NEXT: s_cselect_b64 s[12:13], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[8:9], s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index 137dc1fe4229..28e6627b8741 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -8,12 +8,11 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
; GCN-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s8
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s9
-; GCN-NEXT: s_sub_u32 s10, 0, s8
-; GCN-NEXT: s_subb_u32 s11, 0, s9
+; GCN-NEXT: s_sub_u32 s0, 0, s8
+; GCN-NEXT: s_subb_u32 s1, 0, s9
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -22,69 +21,65 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GCN-NEXT: v_mul_hi_u32 v2, s10, v0
-; GCN-NEXT: v_readfirstlane_b32 s12, v1
-; GCN-NEXT: v_readfirstlane_b32 s0, v0
-; GCN-NEXT: s_mul_i32 s1, s10, s12
-; GCN-NEXT: v_readfirstlane_b32 s15, v2
-; GCN-NEXT: s_mul_i32 s13, s11, s0
-; GCN-NEXT: s_mul_i32 s14, s10, s0
-; GCN-NEXT: s_add_i32 s1, s15, s1
-; GCN-NEXT: v_mul_hi_u32 v3, v0, s14
-; GCN-NEXT: s_add_i32 s1, s1, s13
-; GCN-NEXT: v_mul_hi_u32 v0, v0, s1
-; GCN-NEXT: v_mul_hi_u32 v4, v1, s14
-; GCN-NEXT: v_readfirstlane_b32 s13, v3
-; GCN-NEXT: s_mul_i32 s15, s0, s1
-; GCN-NEXT: v_mul_hi_u32 v1, v1, s1
-; GCN-NEXT: s_add_u32 s13, s13, s15
+; GCN-NEXT: v_mul_hi_u32 v2, s0, v0
+; GCN-NEXT: v_readfirstlane_b32 s10, v1
+; GCN-NEXT: v_readfirstlane_b32 s2, v0
+; GCN-NEXT: s_mul_i32 s11, s0, s10
+; GCN-NEXT: v_readfirstlane_b32 s14, v2
+; GCN-NEXT: s_mul_i32 s12, s1, s2
+; GCN-NEXT: s_mul_i32 s13, s0, s2
+; GCN-NEXT: s_add_i32 s11, s14, s11
+; GCN-NEXT: v_mul_hi_u32 v3, v0, s13
+; GCN-NEXT: s_add_i32 s11, s11, s12
+; GCN-NEXT: v_mul_hi_u32 v0, v0, s11
+; GCN-NEXT: v_mul_hi_u32 v4, v1, s13
+; GCN-NEXT: v_readfirstlane_b32 s12, v3
+; GCN-NEXT: s_mul_i32 s15, s2, s11
+; GCN-NEXT: v_mul_hi_u32 v1, v1, s11
+; GCN-NEXT: s_add_u32 s12, s12, s15
; GCN-NEXT: v_readfirstlane_b32 s15, v0
-; GCN-NEXT: s_mul_i32 s14, s12, s14
+; GCN-NEXT: s_mul_i32 s13, s10, s13
; GCN-NEXT: s_addc_u32 s15, 0, s15
-; GCN-NEXT: v_readfirstlane_b32 s16, v4
-; GCN-NEXT: s_add_u32 s13, s13, s14
-; GCN-NEXT: s_addc_u32 s13, s15, s16
-; GCN-NEXT: v_readfirstlane_b32 s14, v1
-; GCN-NEXT: s_addc_u32 s14, s14, 0
-; GCN-NEXT: s_mul_i32 s1, s12, s1
-; GCN-NEXT: s_add_u32 s1, s13, s1
-; GCN-NEXT: s_addc_u32 s13, 0, s14
-; GCN-NEXT: s_add_u32 s14, s0, s1
-; GCN-NEXT: v_mov_b32_e32 v0, s14
-; GCN-NEXT: v_mul_hi_u32 v0, s10, v0
-; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GCN-NEXT: s_or_b32 s0, s0, s1
-; GCN-NEXT: s_addc_u32 s12, s12, s13
-; GCN-NEXT: s_mul_i32 s0, s10, s12
-; GCN-NEXT: v_readfirstlane_b32 s1, v0
-; GCN-NEXT: s_add_i32 s0, s1, s0
-; GCN-NEXT: s_mul_i32 s11, s11, s14
-; GCN-NEXT: s_mul_i32 s1, s10, s14
-; GCN-NEXT: s_add_i32 s0, s0, s11
-; GCN-NEXT: v_mov_b32_e32 v2, s1
-; GCN-NEXT: v_mov_b32_e32 v0, s0
-; GCN-NEXT: v_mul_hi_u32 v3, s12, v2
-; GCN-NEXT: v_mul_hi_u32 v2, s14, v2
-; GCN-NEXT: v_mul_hi_u32 v1, s12, v0
-; GCN-NEXT: v_mul_hi_u32 v0, s14, v0
-; GCN-NEXT: s_mul_i32 s11, s14, s0
-; GCN-NEXT: v_readfirstlane_b32 s15, v2
-; GCN-NEXT: s_add_u32 s11, s15, s11
+; GCN-NEXT: v_readfirstlane_b32 s14, v4
+; GCN-NEXT: s_add_u32 s12, s12, s13
+; GCN-NEXT: s_addc_u32 s12, s15, s14
+; GCN-NEXT: v_readfirstlane_b32 s13, v1
+; GCN-NEXT: s_addc_u32 s13, s13, 0
+; GCN-NEXT: s_mul_i32 s11, s10, s11
+; GCN-NEXT: s_add_u32 s11, s12, s11
+; GCN-NEXT: s_addc_u32 s12, 0, s13
+; GCN-NEXT: s_add_u32 s11, s2, s11
+; GCN-NEXT: v_mov_b32_e32 v0, s11
+; GCN-NEXT: v_mul_hi_u32 v0, s0, v0
+; GCN-NEXT: s_addc_u32 s10, s10, s12
+; GCN-NEXT: s_mul_i32 s12, s0, s10
+; GCN-NEXT: s_mul_i32 s1, s1, s11
; GCN-NEXT: v_readfirstlane_b32 s13, v0
-; GCN-NEXT: s_mul_i32 s1, s12, s1
-; GCN-NEXT: s_addc_u32 s13, 0, s13
-; GCN-NEXT: v_readfirstlane_b32 s10, v3
-; GCN-NEXT: s_add_u32 s1, s11, s1
-; GCN-NEXT: s_addc_u32 s1, s13, s10
-; GCN-NEXT: v_readfirstlane_b32 s10, v1
-; GCN-NEXT: s_addc_u32 s10, s10, 0
-; GCN-NEXT: s_mul_i32 s0, s12, s0
-; GCN-NEXT: s_add_u32 s0, s1, s0
-; GCN-NEXT: s_addc_u32 s10, 0, s10
-; GCN-NEXT: s_add_u32 s11, s14, s0
-; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GCN-NEXT: s_or_b32 s0, s0, s1
-; GCN-NEXT: s_addc_u32 s1, s12, s10
+; GCN-NEXT: s_add_i32 s12, s13, s12
+; GCN-NEXT: s_mul_i32 s0, s0, s11
+; GCN-NEXT: s_add_i32 s1, s12, s1
+; GCN-NEXT: v_mov_b32_e32 v2, s0
+; GCN-NEXT: v_mov_b32_e32 v0, s1
+; GCN-NEXT: v_mul_hi_u32 v3, s10, v2
+; GCN-NEXT: v_mul_hi_u32 v2, s11, v2
+; GCN-NEXT: v_mul_hi_u32 v1, s10, v0
+; GCN-NEXT: v_mul_hi_u32 v0, s11, v0
+; GCN-NEXT: s_mul_i32 s13, s11, s1
+; GCN-NEXT: v_readfirstlane_b32 s15, v2
+; GCN-NEXT: s_add_u32 s13, s15, s13
+; GCN-NEXT: v_readfirstlane_b32 s14, v0
+; GCN-NEXT: s_mul_i32 s0, s10, s0
+; GCN-NEXT: s_addc_u32 s14, 0, s14
+; GCN-NEXT: v_readfirstlane_b32 s12, v3
+; GCN-NEXT: s_add_u32 s0, s13, s0
+; GCN-NEXT: s_addc_u32 s0, s14, s12
+; GCN-NEXT: v_readfirstlane_b32 s12, v1
+; GCN-NEXT: s_addc_u32 s12, s12, 0
+; GCN-NEXT: s_mul_i32 s1, s10, s1
+; GCN-NEXT: s_add_u32 s0, s0, s1
+; GCN-NEXT: s_addc_u32 s1, 0, s12
+; GCN-NEXT: s_add_u32 s11, s11, s0
+; GCN-NEXT: s_addc_u32 s1, s10, s1
; GCN-NEXT: v_mov_b32_e32 v0, s1
; GCN-NEXT: v_mul_hi_u32 v1, s6, v0
; GCN-NEXT: v_mov_b32_e32 v2, s11
@@ -118,11 +113,9 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-NEXT: s_mul_i32 s4, s8, s4
; GCN-NEXT: s_sub_u32 s6, s6, s4
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GCN-NEXT: s_or_b32 s11, s4, s5
; GCN-NEXT: s_subb_u32 s13, s10, s9
; GCN-NEXT: s_sub_u32 s14, s6, s8
; GCN-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GCN-NEXT: s_or_b32 s15, s10, s11
; GCN-NEXT: s_subb_u32 s15, s13, 0
; GCN-NEXT: s_cmp_ge_u32 s15, s9
; GCN-NEXT: s_cselect_b32 s16, -1, 0
@@ -131,13 +124,11 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-NEXT: s_cmp_eq_u32 s15, s9
; GCN-NEXT: s_cselect_b32 s16, s17, s16
; GCN-NEXT: s_or_b32 s10, s10, s11
-; GCN-NEXT: s_subb_u32 s13, s13, s9
-; GCN-NEXT: s_sub_u32 s17, s14, s8
-; GCN-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GCN-NEXT: s_or_b32 s10, s10, s11
-; GCN-NEXT: s_subb_u32 s10, s13, 0
+; GCN-NEXT: s_subb_u32 s10, s13, s9
+; GCN-NEXT: s_sub_u32 s11, s14, s8
+; GCN-NEXT: s_subb_u32 s10, s10, 0
; GCN-NEXT: s_cmp_lg_u32 s16, 0
-; GCN-NEXT: s_cselect_b32 s11, s17, s14
+; GCN-NEXT: s_cselect_b32 s11, s11, s14
; GCN-NEXT: s_cselect_b32 s10, s10, s15
; GCN-NEXT: s_or_b32 s4, s4, s5
; GCN-NEXT: s_subb_u32 s4, s7, s12
@@ -150,6 +141,7 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-NEXT: s_cmp_lg_u32 s5, 0
; GCN-NEXT: s_cselect_b32 s4, s10, s4
; GCN-NEXT: s_cselect_b32 s5, s11, s6
+; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: v_mov_b32_e32 v0, s5
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -180,8 +172,6 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s14, s12, 1
-; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-IR-NEXT: s_or_b32 s8, s8, s9
; GCN-IR-NEXT: s_addc_u32 s8, s13, 0
; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0
; GCN-IR-NEXT: s_sub_i32 s12, 63, s12
@@ -213,8 +203,6 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-IR-NEXT: s_sub_u32 s12, s12, s18
; GCN-IR-NEXT: s_subb_u32 s13, s13, s19
; GCN-IR-NEXT: s_add_u32 s16, s16, 1
-; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0
-; GCN-IR-NEXT: s_or_b32 s18, s18, s19
; GCN-IR-NEXT: s_addc_u32 s17, s17, 0
; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5]
@@ -803,12 +791,11 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-LABEL: s_test_urem_k_num_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s2
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s3
-; GCN-NEXT: s_sub_u32 s6, 0, s2
-; GCN-NEXT: s_subb_u32 s8, 0, s3
+; GCN-NEXT: s_sub_u32 s4, 0, s2
+; GCN-NEXT: s_subb_u32 s5, 0, s3
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -817,77 +804,73 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GCN-NEXT: v_mul_hi_u32 v2, s6, v0
+; GCN-NEXT: v_mul_hi_u32 v2, s4, v0
+; GCN-NEXT: v_readfirstlane_b32 s6, v1
+; GCN-NEXT: v_readfirstlane_b32 s7, v0
+; GCN-NEXT: s_mul_i32 s8, s4, s6
+; GCN-NEXT: v_readfirstlane_b32 s11, v2
+; GCN-NEXT: s_mul_i32 s9, s5, s7
+; GCN-NEXT: s_mul_i32 s10, s4, s7
+; GCN-NEXT: s_add_i32 s8, s11, s8
+; GCN-NEXT: v_mul_hi_u32 v3, v0, s10
+; GCN-NEXT: s_add_i32 s8, s8, s9
+; GCN-NEXT: v_mul_hi_u32 v0, v0, s8
+; GCN-NEXT: v_mul_hi_u32 v4, v1, s10
+; GCN-NEXT: v_readfirstlane_b32 s9, v3
+; GCN-NEXT: s_mul_i32 s12, s7, s8
+; GCN-NEXT: v_mul_hi_u32 v1, v1, s8
+; GCN-NEXT: s_add_u32 s9, s9, s12
+; GCN-NEXT: v_readfirstlane_b32 s12, v0
+; GCN-NEXT: s_mul_i32 s10, s6, s10
+; GCN-NEXT: s_addc_u32 s12, 0, s12
+; GCN-NEXT: v_readfirstlane_b32 s11, v4
+; GCN-NEXT: s_add_u32 s9, s9, s10
+; GCN-NEXT: v_readfirstlane_b32 s13, v1
+; GCN-NEXT: s_addc_u32 s9, s12, s11
+; GCN-NEXT: s_mul_i32 s8, s6, s8
+; GCN-NEXT: s_addc_u32 s10, s13, 0
+; GCN-NEXT: s_add_u32 s8, s9, s8
+; GCN-NEXT: s_addc_u32 s9, 0, s10
+; GCN-NEXT: s_add_u32 s8, s7, s8
+; GCN-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NEXT: v_mul_hi_u32 v0, s4, v0
+; GCN-NEXT: s_addc_u32 s6, s6, s9
+; GCN-NEXT: s_mul_i32 s9, s4, s6
+; GCN-NEXT: s_mul_i32 s5, s5, s8
+; GCN-NEXT: v_readfirstlane_b32 s10, v0
+; GCN-NEXT: s_add_i32 s9, s10, s9
+; GCN-NEXT: s_mul_i32 s4, s4, s8
+; GCN-NEXT: s_add_i32 s5, s9, s5
+; GCN-NEXT: v_mov_b32_e32 v2, s4
+; GCN-NEXT: v_mov_b32_e32 v0, s5
+; GCN-NEXT: v_mul_hi_u32 v3, s6, v2
+; GCN-NEXT: v_mul_hi_u32 v2, s8, v2
+; GCN-NEXT: v_mul_hi_u32 v1, s6, v0
+; GCN-NEXT: v_mul_hi_u32 v0, s8, v0
+; GCN-NEXT: s_mul_i32 s10, s8, s5
+; GCN-NEXT: v_readfirstlane_b32 s12, v2
+; GCN-NEXT: s_add_u32 s10, s12, s10
+; GCN-NEXT: v_readfirstlane_b32 s11, v0
+; GCN-NEXT: s_mul_i32 s4, s6, s4
+; GCN-NEXT: s_addc_u32 s11, 0, s11
+; GCN-NEXT: v_readfirstlane_b32 s9, v3
+; GCN-NEXT: s_add_u32 s4, s10, s4
+; GCN-NEXT: s_addc_u32 s4, s11, s9
; GCN-NEXT: v_readfirstlane_b32 s9, v1
+; GCN-NEXT: s_addc_u32 s9, s9, 0
+; GCN-NEXT: s_mul_i32 s5, s6, s5
+; GCN-NEXT: s_add_u32 s4, s4, s5
+; GCN-NEXT: s_addc_u32 s5, 0, s9
+; GCN-NEXT: s_add_u32 s4, s8, s4
+; GCN-NEXT: s_addc_u32 s5, s6, s5
+; GCN-NEXT: v_mul_hi_u32 v1, s4, 24
+; GCN-NEXT: v_mul_hi_u32 v0, s5, 24
+; GCN-NEXT: s_mul_i32 s5, s5, 24
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: v_readfirstlane_b32 s8, v1
; GCN-NEXT: v_readfirstlane_b32 s4, v0
-; GCN-NEXT: s_mul_i32 s5, s6, s9
-; GCN-NEXT: v_readfirstlane_b32 s12, v2
-; GCN-NEXT: s_mul_i32 s10, s8, s4
-; GCN-NEXT: s_mul_i32 s11, s6, s4
-; GCN-NEXT: s_add_i32 s5, s12, s5
-; GCN-NEXT: v_mul_hi_u32 v3, v0, s11
-; GCN-NEXT: s_add_i32 s5, s5, s10
-; GCN-NEXT: v_mul_hi_u32 v0, v0, s5
-; GCN-NEXT: v_mul_hi_u32 v4, v1, s11
-; GCN-NEXT: v_readfirstlane_b32 s10, v3
-; GCN-NEXT: v_mul_hi_u32 v1, v1, s5
-; GCN-NEXT: s_mul_i32 s13, s4, s5
-; GCN-NEXT: s_add_u32 s10, s10, s13
-; GCN-NEXT: v_readfirstlane_b32 s13, v0
-; GCN-NEXT: s_mul_i32 s11, s9, s11
-; GCN-NEXT: s_addc_u32 s13, 0, s13
-; GCN-NEXT: v_readfirstlane_b32 s12, v4
-; GCN-NEXT: s_add_u32 s10, s10, s11
-; GCN-NEXT: v_readfirstlane_b32 s14, v1
-; GCN-NEXT: s_addc_u32 s10, s13, s12
-; GCN-NEXT: s_addc_u32 s11, s14, 0
-; GCN-NEXT: s_mul_i32 s5, s9, s5
-; GCN-NEXT: s_add_u32 s5, s10, s5
-; GCN-NEXT: s_addc_u32 s10, 0, s11
-; GCN-NEXT: s_add_u32 s11, s4, s5
-; GCN-NEXT: v_mov_b32_e32 v0, s11
-; GCN-NEXT: v_mul_hi_u32 v0, s6, v0
-; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GCN-NEXT: s_or_b32 s4, s4, s5
-; GCN-NEXT: s_addc_u32 s9, s9, s10
-; GCN-NEXT: s_mul_i32 s4, s6, s9
-; GCN-NEXT: v_readfirstlane_b32 s5, v0
-; GCN-NEXT: s_add_i32 s4, s5, s4
-; GCN-NEXT: s_mul_i32 s8, s8, s11
-; GCN-NEXT: s_mul_i32 s5, s6, s11
-; GCN-NEXT: s_add_i32 s4, s4, s8
-; GCN-NEXT: v_mov_b32_e32 v2, s5
-; GCN-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NEXT: v_mul_hi_u32 v3, s9, v2
-; GCN-NEXT: v_mul_hi_u32 v2, s11, v2
-; GCN-NEXT: v_mul_hi_u32 v1, s9, v0
-; GCN-NEXT: v_mul_hi_u32 v0, s11, v0
-; GCN-NEXT: s_mul_i32 s8, s11, s4
-; GCN-NEXT: v_readfirstlane_b32 s12, v2
-; GCN-NEXT: s_add_u32 s8, s12, s8
-; GCN-NEXT: v_readfirstlane_b32 s10, v0
-; GCN-NEXT: s_mul_i32 s5, s9, s5
-; GCN-NEXT: s_addc_u32 s10, 0, s10
-; GCN-NEXT: v_readfirstlane_b32 s6, v3
; GCN-NEXT: s_add_u32 s5, s8, s5
-; GCN-NEXT: s_addc_u32 s5, s10, s6
-; GCN-NEXT: v_readfirstlane_b32 s6, v1
-; GCN-NEXT: s_addc_u32 s6, s6, 0
-; GCN-NEXT: s_mul_i32 s4, s9, s4
-; GCN-NEXT: s_add_u32 s4, s5, s4
-; GCN-NEXT: s_addc_u32 s6, 0, s6
-; GCN-NEXT: s_add_u32 s8, s11, s4
-; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GCN-NEXT: s_or_b32 s4, s4, s5
-; GCN-NEXT: s_addc_u32 s4, s9, s6
-; GCN-NEXT: v_mul_hi_u32 v1, s8, 24
-; GCN-NEXT: v_mul_hi_u32 v0, s4, 24
-; GCN-NEXT: s_mul_i32 s4, s4, 24
-; GCN-NEXT: s_mov_b32 s6, -1
-; GCN-NEXT: v_readfirstlane_b32 s8, v1
-; GCN-NEXT: v_readfirstlane_b32 s5, v0
-; GCN-NEXT: s_add_u32 s4, s8, s4
-; GCN-NEXT: s_addc_u32 s8, 0, s5
+; GCN-NEXT: s_addc_u32 s8, 0, s4
; GCN-NEXT: v_mov_b32_e32 v0, s8
; GCN-NEXT: v_mul_hi_u32 v0, s2, v0
; GCN-NEXT: s_mov_b32 s4, s0
@@ -899,11 +882,9 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: s_mul_i32 s0, s2, s8
; GCN-NEXT: s_sub_u32 s11, 24, s0
; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GCN-NEXT: s_or_b32 s8, s0, s1
; GCN-NEXT: s_subb_u32 s12, s9, s3
; GCN-NEXT: s_sub_u32 s13, s11, s2
; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT: s_or_b32 s14, s8, s9
; GCN-NEXT: s_subb_u32 s14, s12, 0
; GCN-NEXT: s_cmp_ge_u32 s14, s3
; GCN-NEXT: s_cselect_b32 s15, -1, 0
@@ -912,13 +893,11 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: s_cmp_eq_u32 s14, s3
; GCN-NEXT: s_cselect_b32 s15, s16, s15
; GCN-NEXT: s_or_b32 s8, s8, s9
-; GCN-NEXT: s_subb_u32 s12, s12, s3
-; GCN-NEXT: s_sub_u32 s16, s13, s2
-; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT: s_or_b32 s8, s8, s9
-; GCN-NEXT: s_subb_u32 s8, s12, 0
+; GCN-NEXT: s_subb_u32 s8, s12, s3
+; GCN-NEXT: s_sub_u32 s9, s13, s2
+; GCN-NEXT: s_subb_u32 s8, s8, 0
; GCN-NEXT: s_cmp_lg_u32 s15, 0
-; GCN-NEXT: s_cselect_b32 s9, s16, s13
+; GCN-NEXT: s_cselect_b32 s9, s9, s13
; GCN-NEXT: s_cselect_b32 s8, s8, s14
; GCN-NEXT: s_or_b32 s0, s0, s1
; GCN-NEXT: s_subb_u32 s0, 0, s10
@@ -931,6 +910,7 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: s_cmp_lg_u32 s1, 0
; GCN-NEXT: s_cselect_b32 s0, s8, s0
; GCN-NEXT: s_cselect_b32 s1, s9, s11
+; GCN-NEXT: s_mov_b32 s6, -1
; GCN-NEXT: v_mov_b32_e32 v0, s1
; GCN-NEXT: v_mov_b32_e32 v1, s0
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -956,8 +936,6 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_cbranch_vccz .LBB6_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s10, s8, 1
-; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0
-; GCN-IR-NEXT: s_or_b32 s6, s6, s7
; GCN-IR-NEXT: s_addc_u32 s6, s9, 0
; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0
; GCN-IR-NEXT: s_sub_i32 s8, 63, s8
@@ -988,8 +966,6 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_sub_u32 s10, s10, s16
; GCN-IR-NEXT: s_subb_u32 s11, s11, s17
; GCN-IR-NEXT: s_add_u32 s14, s14, 1
-; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0
-; GCN-IR-NEXT: s_or_b32 s16, s16, s17
; GCN-IR-NEXT: s_addc_u32 s15, s15, 0
; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[8:9], s[4:5]
@@ -1077,8 +1053,6 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: s_add_u32 s11, s8, 1
-; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0
-; GCN-IR-NEXT: s_or_b32 s6, s6, s7
; GCN-IR-NEXT: s_addc_u32 s6, s9, 0
; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0
; GCN-IR-NEXT: s_sub_i32 s8, 63, s8
@@ -1106,8 +1080,6 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_sub_u32 s8, s8, s10
; GCN-IR-NEXT: s_subb_u32 s9, s9, 0
; GCN-IR-NEXT: s_add_u32 s12, s12, 1
-; GCN-IR-NEXT: s_cselect_b64 s[14:15], -1, 0
-; GCN-IR-NEXT: s_or_b32 s14, s14, s15
; GCN-IR-NEXT: s_addc_u32 s13, s13, 0
; GCN-IR-NEXT: s_cselect_b64 s[14:15], -1, 0
; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll
index e8db6471b6a4..8a54ad301f48 100644
--- a/llvm/test/CodeGen/AMDGPU/usubo.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubo.ll
@@ -15,10 +15,8 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_sub_u32 s2, s2, s8
-; SI-NEXT: s_mov_b32 s5, s1
-; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
-; SI-NEXT: s_or_b32 s0, s0, s1
; SI-NEXT: s_subb_u32 s3, s3, s9
+; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: v_mov_b32_e32 v1, s3
@@ -432,8 +430,6 @@ define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_sub_u32 s4, s4, s6
-; SI-NEXT: s_cselect_b64 s[12:13], -1, 0
-; SI-NEXT: s_or_b32 s6, s12, s13
; SI-NEXT: s_subb_u32 s5, s5, s7
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
diff --git a/llvm/test/CodeGen/DirectX/llvm_assume.ll b/llvm/test/CodeGen/DirectX/llvm_assume.ll
new file mode 100644
index 000000000000..d739592b75d7
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/llvm_assume.ll
@@ -0,0 +1,9 @@
+; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+define void @test_llvm_assume(i1 %0) {
+; CHECK-LABEL: test_llvm_assume
+; CHECK-NEXT: ret void
+tail call void @llvm.assume(i1 %0)
+ret void
+}
+
diff --git a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
index a8557e47b0ea..475935d2eb13 100644
--- a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
+++ b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
@@ -42,3 +42,68 @@ define void @alloca_2d_gep_test() {
%3 = getelementptr inbounds nuw [2 x <2 x i32>], ptr %1, i32 0, i32 %2
ret void
}
+
+; CHECK-LABEL: subtype_array_test
+define void @subtype_array_test() {
+ ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
+ ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
+ ; CHECK: ret void
+ %arr = alloca [8 x [4 x i32]], align 4
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw [4 x i32], ptr %arr, i32 %i
+ ret void
+}
+
+; CHECK-LABEL: subtype_vector_test
+define void @subtype_vector_test() {
+ ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
+ ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
+ ; CHECK: ret void
+ %arr = alloca [8 x <4 x i32>], align 4
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw <4 x i32>, ptr %arr, i32 %i
+ ret void
+}
+
+; CHECK-LABEL: subtype_scalar_test
+define void @subtype_scalar_test() {
+ ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
+ ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
+ ; CHECK: ret void
+ %arr = alloca [8 x [4 x i32]], align 4
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw i32, ptr %arr, i32 %i
+ ret void
+}
+
+; CHECK-LABEL: subtype_i8_test
+define void @subtype_i8_test() {
+ ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
+ ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i8, ptr [[alloca_val]], i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
+ ; FCHECK: [[flatidx_lshr:%.*]] = lshr i32 [[flatidx_mul]], 2
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_lshr]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
+ ; CHECK: ret void
+ %arr = alloca [8 x [4 x i32]], align 4
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw i8, ptr %arr, i32 %i
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/scalarize-global.ll b/llvm/test/CodeGen/DirectX/scalarize-global.ll
new file mode 100644
index 000000000000..ca10f6ece5a8
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalarize-global.ll
@@ -0,0 +1,70 @@
+; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=SCHECK,CHECK
+; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=FCHECK,CHECK
+
+@"arrayofVecData" = local_unnamed_addr addrspace(3) global [8 x <4 x i32>] zeroinitializer, align 16
+@"vecData" = external addrspace(3) global <4 x i32>, align 4
+
+; SCHECK: [[arrayofVecData:@arrayofVecData.*]] = local_unnamed_addr addrspace(3) global [8 x [4 x i32]] zeroinitializer, align 16
+; FCHECK: [[arrayofVecData:@arrayofVecData.*]] = local_unnamed_addr addrspace(3) global [32 x i32] zeroinitializer, align 16
+; CHECK: [[vecData:@vecData.*]] = external addrspace(3) global [4 x i32], align 4
+
+; CHECK-LABEL: subtype_array_test
+define <4 x i32> @subtype_array_test() {
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
+ ; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
+ ; CHECK: ret <4 x i32> [[x]]
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @"arrayofVecData", i32 %i
+ %x = load <4 x i32>, ptr addrspace(3) %gep, align 4
+ ret <4 x i32> %x
+}
+
+; CHECK-LABEL: subtype_vector_test
+define <4 x i32> @subtype_vector_test() {
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
+ ; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
+ ; CHECK: ret <4 x i32> [[x]]
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw <4 x i32>, ptr addrspace(3) @"arrayofVecData", i32 %i
+ %x = load <4 x i32>, ptr addrspace(3) %gep, align 4
+ ret <4 x i32> %x
+}
+
+; CHECK-LABEL: subtype_scalar_test
+define <4 x i32> @subtype_scalar_test() {
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
+ ; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
+ ; CHECK: ret <4 x i32> [[x]]
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw i32, ptr addrspace(3) @"arrayofVecData", i32 %i
+ %x = load <4 x i32>, ptr addrspace(3) %gep, align 4
+ ret <4 x i32> %x
+}
+
+; CHECK-LABEL: subtype_i8_test
+define <4 x i32> @subtype_i8_test() {
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
+ ; FCHECK: [[flatidx_lshr:%.*]] = lshr i32 [[flatidx_mul]], 2
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_lshr]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
+ ; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
+ ; CHECK: ret <4 x i32> [[x]]
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw i8, ptr addrspace(3) @"arrayofVecData", i32 %i
+ %x = load <4 x i32>, ptr addrspace(3) %gep, align 4
+ ret <4 x i32> %x
+}
diff --git a/llvm/test/CodeGen/Generic/reloc-none.ll b/llvm/test/CodeGen/Generic/reloc-none.ll
new file mode 100644
index 000000000000..0c8b7a57aca8
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/reloc-none.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s | FileCheck %s
+
+; CHECK: .reloc {{.*}}, BFD_RELOC_NONE, foo
+
+define void @test_reloc_none() {
+ call void @llvm.reloc.none(metadata !"foo")
+ ret void
+}
+
+declare void @llvm.reloc.none(metadata)
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll b/llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll
new file mode 100644
index 000000000000..9625a605910c
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll
@@ -0,0 +1,372 @@
+; REQUIRES: hexagon-registered-target, silver
+; This tests correct handling of register spills and fills of
+; qf operands during register allocation.
+
+; RUN: llc -mcpu=hexagonv79 -mattr=+hvx-length128b,+hvxv79,+hvx-ieee-fp,+hvx-qfloat,-long-calls -debug-only=handle-qfp %s 2>&1 -o - | FileCheck %s --check-prefixes V79-81,V79
+; RUN: llc -mcpu=hexagonv81 -mattr=+hvx-length128b,+hvxv81,+hvx-ieee-fp,+hvx-qfloat,-long-calls -debug-only=handle-qfp %s 2>&1 -o - | FileCheck %s --check-prefixes V79-81,V81
+
+; V79-81: Finding uses of: renamable $w{{[0-9]+}} = V6_vmpy_qf32_hf
+; V79-81: Inserting after conv: [[VREG0:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG0]]
+; V79-81-NEXT: Inserting after conv: [[VREG1:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG1]]
+; V79-81: Finding uses of: renamable $w{{[0-9]+}} = V6_vmpy_qf32_hf
+; V79-81: Inserting after conv: [[VREG2:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG2]]
+; V79-81-NEXT: Inserting after conv: [[VREG3:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG3]]
+; V79-81: Finding uses of: renamable $w{{[0-9]+}} = V6_vmpy_qf32_hf
+; V79-81-DAG: Inserting after conv: [[VREG4:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG4]]
+; V79-81-DAG: Inserting after conv: [[VREG5:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG5]]
+; V79-81-DAG: Inserting new instruction: $v{{[0-9]+}} = V6_vadd_sf killed renamable [[VREG2]], killed renamable [[VREG0]]
+; V79-81-DAG: Inserting new instruction: $v{{[0-9]+}} = V6_vsub_sf killed renamable $v{{[0-9]+}}, killed renamable $v{{[0-9]+}}
+;
+; V79-81: Analyzing convert instruction: renamable [[VREG6:\$v[0-9]+]] = V6_vconv_hf_qf32 killed renamable $w{{[0-9]+}}
+; V79: Inserting new instruction: [[VREG30:\$v[0-9]+]] = V6_vd0
+; V79-NEXT: Inserting new instruction: [[VREG7:\$v[0-9]+]] = V6_vadd_sf killed renamable [[VREG7]], killed [[VREG30]]
+; V79: Inserting new instruction: [[VREG30]] = V6_vd0
+; V79-NEXT: Inserting new instruction: [[VREG8:\$v[0-9]+]] = V6_vadd_sf killed renamable [[VREG8]], killed [[VREG30]]
+; V81: Inserting new instruction: [[VREG7:\$v[0-9]+]] = V6_vconv_qf32_sf killed renamable [[VREG7]]
+; V81: Inserting new instruction: [[VREG8:\$v[0-9]+]] = V6_vconv_qf32_sf killed renamable [[VREG8]]
+
+; V79-81: Analyzing convert instruction: renamable [[VREG9:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable $v{{[0-9]+}}
+; V79: Inserting new instruction: [[VREG30]] = V6_vd0
+; V79-NEXT: Inserting new instruction: [[VREG10:\$v[0-9]+]] = V6_vadd_sf killed renamable [[VREG10]], killed [[VREG30]]
+; V81: Inserting new instruction: [[VREG8:\$v[0-9]+]] = V6_vconv_qf32_sf killed renamable [[VREG8]]
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+@.str.1 = private unnamed_addr constant [9 x i8] c"0x%08lx \00", align 1
+@.str.3 = private unnamed_addr constant [173 x i8] c"/prj/qct/llvm/devops/aether/hexbuild/test_trees/MASTER/test/regress/features/hexagon/arch_v68/hvx_ieee_fp/hvx_ieee_fp_test.c:126 0 && \22ERROR: Failed to acquire HVX unit.\\n\22\00", align 1
+@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1
+@.str.5 = private unnamed_addr constant [33 x i8] c"half -3 converted to vhf = %.2f\0A\00", align 1
+@.str.6 = private unnamed_addr constant [35 x i8] c"uhalf 32k converted to vhf = %.2f\0A\00", align 1
+@.str.7 = private unnamed_addr constant [32 x i8] c"sf 0.5 converted to vhf = %.2f\0A\00", align 1
+@.str.8 = private unnamed_addr constant [32 x i8] c"vhf 4.0 conveted to ubyte = %d\0A\00", align 1
+@.str.9 = private unnamed_addr constant [32 x i8] c"vhf 2.0 conveted to uhalf = %d\0A\00", align 1
+@.str.10 = private unnamed_addr constant [30 x i8] c"byte 4 conveted to hf = %.2f\0A\00", align 1
+@.str.11 = private unnamed_addr constant [31 x i8] c"ubyte 4 conveted to hf = %.2f\0A\00", align 1
+@.str.12 = private unnamed_addr constant [27 x i8] c"hf -3 conveted to sf = %f\0A\00", align 1
+@.str.13 = private unnamed_addr constant [31 x i8] c"vhf 4.0 conveted to byte = %d\0A\00", align 1
+@.str.14 = private unnamed_addr constant [31 x i8] c"vhf 4.0 conveted to half = %d\0A\00", align 1
+@.str.16 = private unnamed_addr constant [33 x i8] c"max of hf 2.0 and hf 4.0 = %.2f\0A\00", align 1
+@.str.17 = private unnamed_addr constant [33 x i8] c"min of hf 2.0 and hf 4.0 = %.2f\0A\00", align 1
+@.str.18 = private unnamed_addr constant [32 x i8] c"max of sf 0.5 and sf 0.25 = %f\0A\00", align 1
+@.str.19 = private unnamed_addr constant [32 x i8] c"min of sf 0.5 and sf 0.25 = %f\0A\00", align 1
+@.str.21 = private unnamed_addr constant [25 x i8] c"negate of hf 4.0 = %.2f\0A\00", align 1
+@.str.22 = private unnamed_addr constant [23 x i8] c"abs of hf -6.0 = %.2f\0A\00", align 1
+@.str.23 = private unnamed_addr constant [23 x i8] c"negate of sf 0.5 = %f\0A\00", align 1
+@.str.24 = private unnamed_addr constant [22 x i8] c"abs of sf -0.25 = %f\0A\00", align 1
+@.str.26 = private unnamed_addr constant [32 x i8] c"hf add of 4.0 and -6.0 = %.2f\0A\00", align 1
+@.str.27 = private unnamed_addr constant [32 x i8] c"hf sub of 4.0 and -6.0 = %.2f\0A\00", align 1
+@.str.28 = private unnamed_addr constant [31 x i8] c"sf add of 0.5 and -0.25 = %f\0A\00", align 1
+@.str.29 = private unnamed_addr constant [31 x i8] c"sf sub of 0.5 and -0.25 = %f\0A\00", align 1
+@.str.30 = private unnamed_addr constant [36 x i8] c"sf add of hf 4.0 and hf -6.0 = %f\0A\00", align 1
+@.str.31 = private unnamed_addr constant [36 x i8] c"sf sub of hf 4.0 and hf -6.0 = %f\0A\00", align 1
+@.str.33 = private unnamed_addr constant [32 x i8] c"hf mpy of 4.0 and -6.0 = %.2f\0A\00", align 1
+@.str.34 = private unnamed_addr constant [35 x i8] c"hf accmpy of 4.0 and -6.0 = %.2f\0A\00", align 1
+@.str.35 = private unnamed_addr constant [36 x i8] c"sf mpy of hf 4.0 and hf -6.0 = %f\0A\00", align 1
+@.str.36 = private unnamed_addr constant [39 x i8] c"sf accmpy of hf 4.0 and hf -6.0 = %f\0A\00", align 1
+@.str.37 = private unnamed_addr constant [31 x i8] c"sf mpy of 0.5 and -0.25 = %f\0A\00", align 1
+@.str.39 = private unnamed_addr constant [25 x i8] c"w copy from sf 0.5 = %f\0A\00", align 1
+@str = private unnamed_addr constant [35 x i8] c"ERROR: Failed to acquire HVX unit.\00", align 1
+@str.40 = private unnamed_addr constant [25 x i8] c"\0AConversion intructions\0A\00", align 1
+@str.41 = private unnamed_addr constant [23 x i8] c"\0AMin/Max instructions\0A\00", align 1
+@str.42 = private unnamed_addr constant [23 x i8] c"\0Aabs/neg instructions\0A\00", align 1
+@str.43 = private unnamed_addr constant [23 x i8] c"\0Aadd/sub instructions\0A\00", align 1
+@str.44 = private unnamed_addr constant [24 x i8] c"\0Amultiply instructions\0A\00", align 1
+@str.45 = private unnamed_addr constant [19 x i8] c"\0Acopy instruction\0A\00", align 1
+
+declare dso_local void @print_vector_words(<32 x i32> noundef %x) local_unnamed_addr #0
+
+; Function Attrs: nofree nounwind optsize
+declare dso_local noundef i32 @printf(ptr nocapture noundef readonly, ...) local_unnamed_addr #0
+
+; Function Attrs: nounwind optsize
+define dso_local i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #1 {
+entry:
+ %call = tail call i32 @acquire_vector_unit(i8 noundef zeroext 0) #6
+ %tobool.not = icmp eq i32 %call, 0
+ br i1 %tobool.not, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %puts = tail call i32 @puts(ptr nonnull dereferenceable(1) @str)
+ tail call void @_Assert(ptr noundef nonnull @.str.3, ptr noundef nonnull @__func__.main) #7
+ unreachable
+
+if.end: ; preds = %entry
+ tail call void @set_double_vector_mode() #6
+ %0 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 16384)
+ %1 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 17408)
+ %2 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 -14848)
+ %3 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1056964608)
+ %4 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1048576000)
+ %5 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 -1098907648)
+ %6 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 -3)
+ %7 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 32768)
+ %puts147 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.40)
+ %8 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.hf.h.128B(<32 x i32> %6)
+ %bc.i = bitcast <32 x i32> %8 to <64 x half>
+ %9 = extractelement <64 x half> %bc.i, i64 0
+ %conv = fpext half %9 to double
+ %call12 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.5, double noundef %conv) #6
+ %10 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.hf.uh.128B(<32 x i32> %7)
+ %bc.i153 = bitcast <32 x i32> %10 to <64 x half>
+ %11 = extractelement <64 x half> %bc.i153, i64 0
+ %conv14 = fpext half %11 to double
+ %call15 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.6, double noundef %conv14) #6
+ %12 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.hf.sf.128B(<32 x i32> %3, <32 x i32> %3)
+ %bc.i155 = bitcast <32 x i32> %12 to <64 x half>
+ %13 = extractelement <64 x half> %bc.i155, i64 0
+ %conv17 = fpext half %13 to double
+ %call18 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.7, double noundef %conv17) #6
+ %14 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.ub.hf.128B(<32 x i32> %1, <32 x i32> %1)
+ %15 = bitcast <32 x i32> %14 to <128 x i8>
+ %conv.i = extractelement <128 x i8> %15, i64 0
+ %conv20 = zext i8 %conv.i to i32
+ %call21 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.8, i32 noundef %conv20) #6
+ %16 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.uh.hf.128B(<32 x i32> %0)
+ %17 = bitcast <32 x i32> %16 to <64 x i16>
+ %conv.i157 = extractelement <64 x i16> %17, i64 0
+ %conv23 = sext i16 %conv.i157 to i32
+ %call24 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.9, i32 noundef %conv23) #6
+ %18 = tail call <64 x i32> @llvm.hexagon.V6.vcvt.hf.b.128B(<32 x i32> %14)
+ %bc.i158 = bitcast <64 x i32> %18 to <128 x half>
+ %19 = extractelement <128 x half> %bc.i158, i64 0
+ %conv26 = fpext half %19 to double
+ %call27 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.10, double noundef %conv26) #6
+ %20 = tail call <64 x i32> @llvm.hexagon.V6.vcvt.hf.ub.128B(<32 x i32> %14)
+ %bc.i159 = bitcast <64 x i32> %20 to <128 x half>
+ %21 = extractelement <128 x half> %bc.i159, i64 0
+ %conv29 = fpext half %21 to double
+ %call30 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.11, double noundef %conv29) #6
+ %22 = tail call <64 x i32> @llvm.hexagon.V6.vcvt.sf.hf.128B(<32 x i32> %8)
+ %bc.i161 = bitcast <64 x i32> %22 to <64 x float>
+ %23 = extractelement <64 x float> %bc.i161, i64 0
+ %conv32 = fpext float %23 to double
+ %call33 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.12, double noundef %conv32) #6
+ %24 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.b.hf.128B(<32 x i32> %1, <32 x i32> %1)
+ %25 = bitcast <32 x i32> %24 to <128 x i8>
+ %conv.i162 = extractelement <128 x i8> %25, i64 0
+ %conv35 = zext i8 %conv.i162 to i32
+ %call36 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.13, i32 noundef %conv35) #6
+ %26 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.h.hf.128B(<32 x i32> %1)
+ %27 = bitcast <32 x i32> %26 to <64 x i16>
+ %conv.i163 = extractelement <64 x i16> %27, i64 0
+ %conv38 = sext i16 %conv.i163 to i32
+ %call39 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.14, i32 noundef %conv38) #6
+ %28 = tail call <32 x i32> @llvm.hexagon.V6.vfmax.hf.128B(<32 x i32> %0, <32 x i32> %1)
+ %puts148 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.41)
+ %bc.i164 = bitcast <32 x i32> %28 to <64 x half>
+ %29 = extractelement <64 x half> %bc.i164, i64 0
+ %conv42 = fpext half %29 to double
+ %call43 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.16, double noundef %conv42) #6
+ %30 = tail call <32 x i32> @llvm.hexagon.V6.vfmin.hf.128B(<32 x i32> %0, <32 x i32> %1)
+ %bc.i166 = bitcast <32 x i32> %30 to <64 x half>
+ %31 = extractelement <64 x half> %bc.i166, i64 0
+ %conv45 = fpext half %31 to double
+ %call46 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.17, double noundef %conv45) #6
+ %32 = tail call <32 x i32> @llvm.hexagon.V6.vfmax.sf.128B(<32 x i32> %3, <32 x i32> %4)
+ %bc.i168 = bitcast <32 x i32> %32 to <32 x float>
+ %33 = extractelement <32 x float> %bc.i168, i64 0
+ %conv48 = fpext float %33 to double
+ %call49 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.18, double noundef %conv48) #6
+ %34 = tail call <32 x i32> @llvm.hexagon.V6.vfmin.sf.128B(<32 x i32> %3, <32 x i32> %4)
+ %bc.i169 = bitcast <32 x i32> %34 to <32 x float>
+ %35 = extractelement <32 x float> %bc.i169, i64 0
+ %conv51 = fpext float %35 to double
+ %call52 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.19, double noundef %conv51) #6
+ %puts149 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.42)
+ %36 = tail call <32 x i32> @llvm.hexagon.V6.vfneg.hf.128B(<32 x i32> %1)
+ %bc.i170 = bitcast <32 x i32> %36 to <64 x half>
+ %37 = extractelement <64 x half> %bc.i170, i64 0
+ %conv55 = fpext half %37 to double
+ %call56 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.21, double noundef %conv55) #6
+ %38 = tail call <32 x i32> @llvm.hexagon.V6.vabs.hf.128B(<32 x i32> %2)
+ %bc.i172 = bitcast <32 x i32> %38 to <64 x half>
+ %39 = extractelement <64 x half> %bc.i172, i64 0
+ %conv58 = fpext half %39 to double
+ %call59 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.22, double noundef %conv58) #6
+ %40 = tail call <32 x i32> @llvm.hexagon.V6.vfneg.sf.128B(<32 x i32> %3)
+ %bc.i174 = bitcast <32 x i32> %40 to <32 x float>
+ %41 = extractelement <32 x float> %bc.i174, i64 0
+ %conv61 = fpext float %41 to double
+ %call62 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.23, double noundef %conv61) #6
+ %42 = tail call <32 x i32> @llvm.hexagon.V6.vabs.sf.128B(<32 x i32> %5)
+ %bc.i175 = bitcast <32 x i32> %42 to <32 x float>
+ %43 = extractelement <32 x float> %bc.i175, i64 0
+ %conv64 = fpext float %43 to double
+ %call65 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.24, double noundef %conv64) #6
+ %puts150 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.43)
+ %44 = tail call <32 x i32> @llvm.hexagon.V6.vadd.hf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i176 = bitcast <32 x i32> %44 to <64 x half>
+ %45 = extractelement <64 x half> %bc.i176, i64 0
+ %conv68 = fpext half %45 to double
+ %call69 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.26, double noundef %conv68) #6
+ %46 = tail call <32 x i32> @llvm.hexagon.V6.vsub.hf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i178 = bitcast <32 x i32> %46 to <64 x half>
+ %47 = extractelement <64 x half> %bc.i178, i64 0
+ %conv71 = fpext half %47 to double
+ %call72 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.27, double noundef %conv71) #6
+ %48 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.sf.128B(<32 x i32> %3, <32 x i32> %5)
+ %bc.i180 = bitcast <32 x i32> %48 to <32 x float>
+ %49 = extractelement <32 x float> %bc.i180, i64 0
+ %conv74 = fpext float %49 to double
+ %call75 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.28, double noundef %conv74) #6
+ %50 = tail call <32 x i32> @llvm.hexagon.V6.vsub.sf.sf.128B(<32 x i32> %3, <32 x i32> %5)
+ %bc.i181 = bitcast <32 x i32> %50 to <32 x float>
+ %51 = extractelement <32 x float> %bc.i181, i64 0
+ %conv77 = fpext float %51 to double
+ %call78 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.29, double noundef %conv77) #6
+ %52 = tail call <64 x i32> @llvm.hexagon.V6.vadd.sf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i182 = bitcast <64 x i32> %52 to <64 x float>
+ %53 = extractelement <64 x float> %bc.i182, i64 0
+ %conv80 = fpext float %53 to double
+ %call81 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.30, double noundef %conv80) #6
+ %54 = tail call <64 x i32> @llvm.hexagon.V6.vsub.sf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i183 = bitcast <64 x i32> %54 to <64 x float>
+ %55 = extractelement <64 x float> %bc.i183, i64 0
+ %conv83 = fpext float %55 to double
+ %call84 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.31, double noundef %conv83) #6
+ %puts151 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.44)
+ %56 = tail call <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i184 = bitcast <32 x i32> %56 to <64 x half>
+ %57 = extractelement <64 x half> %bc.i184, i64 0
+ %conv87 = fpext half %57 to double
+ %call88 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.33, double noundef %conv87) #6
+ %58 = tail call <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.acc.128B(<32 x i32> %56, <32 x i32> %1, <32 x i32> %2)
+ %bc.i186 = bitcast <32 x i32> %58 to <64 x half>
+ %59 = extractelement <64 x half> %bc.i186, i64 0
+ %conv90 = fpext half %59 to double
+ %call91 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.34, double noundef %conv90) #6
+ %60 = tail call <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i188 = bitcast <64 x i32> %60 to <64 x float>
+ %61 = extractelement <64 x float> %bc.i188, i64 0
+ %conv93 = fpext float %61 to double
+ %call94 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.35, double noundef %conv93) #6
+ %62 = tail call <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.acc.128B(<64 x i32> %60, <32 x i32> %1, <32 x i32> %2)
+ %bc.i189 = bitcast <64 x i32> %62 to <64 x float>
+ %63 = extractelement <64 x float> %bc.i189, i64 0
+ %conv96 = fpext float %63 to double
+ %call97 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.36, double noundef %conv96) #6
+ %64 = tail call <32 x i32> @llvm.hexagon.V6.vmpy.sf.sf.128B(<32 x i32> %3, <32 x i32> %5)
+ %bc.i190 = bitcast <32 x i32> %64 to <32 x float>
+ %65 = extractelement <32 x float> %bc.i190, i64 0
+ %conv99 = fpext float %65 to double
+ %call100 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.37, double noundef %conv99) #6
+ %puts152 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.45)
+ %66 = tail call <32 x i32> @llvm.hexagon.V6.vassign.fp.128B(<32 x i32> %3)
+ %bc.i191 = bitcast <32 x i32> %66 to <32 x float>
+ %67 = extractelement <32 x float> %bc.i191, i64 0
+ %conv103 = fpext float %67 to double
+ %call104 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.39, double noundef %conv103) #6
+ ret i32 0
+}
+
+; Function Attrs: optsize
+declare dso_local i32 @acquire_vector_unit(i8 noundef zeroext) local_unnamed_addr #2
+
+; Function Attrs: noreturn nounwind optsize
+declare dso_local void @_Assert(ptr noundef, ptr noundef) local_unnamed_addr #3
+
+; Function Attrs: optsize
+declare dso_local void @set_double_vector_mode(...) local_unnamed_addr #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.hf.h.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.hf.uh.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.hf.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.ub.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.uh.hf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vcvt.hf.b.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vcvt.hf.ub.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vcvt.sf.hf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.b.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.h.hf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfmax.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfmin.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfmax.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfmin.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfneg.hf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vabs.hf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfneg.sf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vabs.sf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vadd.hf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vsub.hf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vadd.sf.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vsub.sf.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vadd.sf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vsub.sf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.acc.128B(<32 x i32>, <32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.acc.128B(<64 x i32>, <32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vmpy.sf.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vassign.fp.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) #4
+
+; Function Attrs: nofree nounwind
+declare noundef i32 @putchar(i32 noundef) local_unnamed_addr #5
+
+; Function Attrs: nofree nounwind
+declare noundef i32 @puts(ptr nocapture noundef readonly) local_unnamed_addr #5
diff --git a/llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll b/llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll
new file mode 100644
index 000000000000..cdb779f5c4e7
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll
@@ -0,0 +1,60 @@
+;; RUN: llc --mtriple=hexagon --mcpu=hexagonv81 --mattr=+hvxv81,+hvx-length128b %s -o - | FileCheck %s
+
+define void @mul_and_sub_1(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) {
+entry:
+ %AVec = load <32 x float>, ptr %A, align 4
+ %BVec = load <32 x float>, ptr %B, align 4
+ %CVec = load <32 x float>, ptr %C, align 4
+ %AtBVec = fmul <32 x float> %AVec, %BVec
+
+ %DVec = fsub <32 x float> %CVec, %AtBVec
+ store <32 x float> %DVec, ptr %D, align 4
+ ret void
+}
+;; CHECK: mul_and_sub_1
+;; CHECK: vsub(v{{[0-9]+}}.sf,v{{[0-9]+}}.qf32)
+
+
+define void @mul_and_sub_2(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) {
+entry:
+ %AVec = load <32 x float>, ptr %A, align 4
+ %BVec = load <32 x float>, ptr %B, align 4
+ %CVec = load <32 x float>, ptr %C, align 4
+ %AtBVec = fmul <32 x float> %AVec, %BVec
+
+ %DVec = fsub <32 x float> %AtBVec, %CVec
+ store <32 x float> %DVec, ptr %D, align 4
+ ret void
+}
+;; CHECK: mul_and_sub_2
+;; CHECK: vsub(v{{[0-9]+}}.qf32,v{{[0-9]+}}.sf)
+
+
+define void @mul_and_sub_3(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) {
+entry:
+ %AVec = load <64 x half>, ptr %A, align 4
+ %BVec = load <64 x half>, ptr %B, align 4
+ %CVec = load <64 x half>, ptr %C, align 4
+ %AtBVec = fmul <64 x half> %AVec, %BVec
+
+ %DVec = fsub <64 x half> %CVec, %AtBVec
+ store <64 x half> %DVec, ptr %D, align 4
+ ret void
+}
+;; CHECK: mul_and_sub_3
+;; CHECK: vsub(v{{[0-9]+}}.hf,v{{[0-9]+}}.qf16)
+
+
+define void @mul_and_sub_4(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) {
+entry:
+ %AVec = load <64 x half>, ptr %A, align 4
+ %BVec = load <64 x half>, ptr %B, align 4
+ %CVec = load <64 x half>, ptr %C, align 4
+ %AtBVec = fmul <64 x half> %AVec, %BVec
+
+ %DVec = fsub <64 x half> %AtBVec, %CVec
+ store <64 x half> %DVec, ptr %D, align 4
+ ret void
+}
+;; CHECK: mul_and_sub_4
+;; CHECK: vsub(v{{[0-9]+}}.qf16,v{{[0-9]+}}.hf)
diff --git a/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll
index c16370c3b907..527f27e56c33 100644
--- a/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll
+++ b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll
@@ -2,7 +2,7 @@
; type as first parameter instead of a sf type without
; any conversion instruction of type sf = qf32
-; RUN: llc -mtriple=hexagon < %s -o - | FileCheck %s
+; RUN: llc -mtriple=hexagon -mattr=+hvx-length128b,+hvxv75,+v75 < %s -o - | FileCheck %s
; CHECK: [[V2:v[0-9]+]] = vxor([[V2]],[[V2]])
; CHECK: [[V0:v[0-9]+]].qf32 = vmpy([[V0]].sf,[[V2]].sf)
@@ -17,5 +17,3 @@ entry:
store <64 x half> %conv17.ripple.vectorized, ptr %out_ptr, align 2
ret void
}
-
-attributes #0 = { "target-features"="+hvx-length128b,+hvxv75,+v75,-long-calls,-small-data" }
diff --git a/llvm/test/CodeGen/Hexagon/vect-qfp.mir b/llvm/test/CodeGen/Hexagon/vect-qfp.mir
new file mode 100644
index 000000000000..6909591ffddf
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vect-qfp.mir
@@ -0,0 +1,202 @@
+# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \
+# RUN: -run-pass hexagon-qfp-optimizer -disable-qfp-opt-mul=false %s -o - | FileCheck %s --check-prefix=MUL-ENABLED
+# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \
+# RUN: -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s --check-prefix=DEFAULT
+# MUL-ENABLED-LABEL: name: qfpAdd32
+# MUL-ENABLED: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vadd_qf32_mix
+# MUL-ENABLED-NEXT: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# MUL-ENABLED-NEXT: V6_vadd_qf32
+# DEFAULT-LABEL: name: qfpAdd32
+# DEFAULT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vadd_qf32_mix
+# DEFAULT-NEXT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vS32Ub_ai
+# DEFAULT-NEXT: V6_vadd_qf32
+---
+name: qfpAdd32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vadd_sf %4:hvxvr, %5:hvxvr
+ %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr
+ %8:hvxvr = V6_vadd_sf %5:hvxvr, %7:hvxvr
+ %9:hvxvr = V6_vconv_sf_qf32 %8:hvxvr
+ V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr
+ %10:hvxvr = V6_vadd_sf %7:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_sf_qf32 %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
+...
+# MUL-ENABLED-LABEL: name: qfpAdd16
+# MUL-ENABLED: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vadd_qf16_mix
+# MUL-ENABLED-NEXT: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# MUL-ENABLED-NEXT: V6_vadd_qf16
+# DEFAULT-LABEL: name: qfpAdd16
+# DEFAULT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vadd_qf16_mix
+# DEFAULT-NEXT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vS32Ub_ai
+# DEFAULT-NEXT: V6_vadd_qf16
+---
+name: qfpAdd16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vadd_hf %4:hvxvr, %5:hvxvr
+ %7:hvxvr = V6_vconv_hf_qf16 %6:hvxvr
+ %8:hvxvr = V6_vadd_hf %5:hvxvr, %7:hvxvr
+ %9:hvxvr = V6_vconv_hf_qf16 %8:hvxvr
+ V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr
+ %10:hvxvr = V6_vadd_hf %7:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_hf_qf16 %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
+...
+# MUL-ENABLED-LABEL: name: qfpSub32
+# MUL-ENABLED: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vsub_qf32_mix
+# MUL-ENABLED-NEXT: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# MUL-ENABLED-NEXT: V6_vsub_qf32
+# DEFAULT-LABEL: name: qfpSub32
+# DEFAULT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vsub_qf32_mix
+# DEFAULT-NEXT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vS32Ub_ai
+# DEFAULT-NEXT: V6_vsub_qf32
+---
+name: qfpSub32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vsub_sf %4:hvxvr, %5:hvxvr
+ %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr
+ %8:hvxvr = V6_vsub_sf %7:hvxvr, %5:hvxvr
+ %9:hvxvr = V6_vconv_sf_qf32 %8:hvxvr
+ V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr
+ %10:hvxvr = V6_vsub_sf %7:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_sf_qf32 %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
+...
+# MUL-ENABLED-LABEL: name: qfpSub16
+# MUL-ENABLED: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vsub_qf16_mix
+# MUL-ENABLED-NEXT: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# MUL-ENABLED-NEXT: V6_vsub_qf16
+# DEFAULT-LABEL: name: qfpSub16
+# DEFAULT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vsub_qf16_mix
+# DEFAULT-NEXT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vS32Ub_ai
+# DEFAULT-NEXT: V6_vsub_qf16
+---
+name: qfpSub16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vsub_hf %4:hvxvr, %5:hvxvr
+ %7:hvxvr = V6_vconv_hf_qf16 %6:hvxvr
+ %8:hvxvr = V6_vsub_hf %7:hvxvr, %5:hvxvr
+ %9:hvxvr = V6_vconv_hf_qf16 %8:hvxvr
+ V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr
+ %10:hvxvr = V6_vsub_hf %7:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_hf_qf16 %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
+...
+# MUL-ENABLED-LABEL: name: qfpMul32
+# MUL-ENABLED: V6_vmpy_qf32_sf
+# MUL-ENABLED-NEXT: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vmpy_qf32_sf
+# MUL-ENABLED-NEXT: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vmpy_qf32
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# DEFAULT-LABEL: name: qfpMul32
+# DEFAULT: V6_vmpy_qf32_sf
+# DEFAULT-NEXT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vmpy_qf32_sf
+# DEFAULT-NEXT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vmpy_qf32_sf
+# DEFAULT-NEXT: V6_vS32Ub_ai
+---
+name: qfpMul32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vL32Ub_ai %2:intregs, 0
+ %7:hvxvr = V6_vmpy_qf32_sf %4:hvxvr, %5:hvxvr
+ %8:hvxvr = V6_vconv_sf_qf32 %7:hvxvr
+ %9:hvxvr = V6_vmpy_qf32_sf %5:hvxvr, %6:hvxvr
+ %10:hvxvr = V6_vconv_sf_qf32 %9:hvxvr
+ %11:hvxvr = V6_vmpy_qf32_sf %8:hvxvr, %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
+...
+# MUL-ENABLED-LABEL: name: qfpMul16
+# MUL-ENABLED: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vmpy_qf16_mix_hf
+# MUL-ENABLED-NEXT: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# MUL-ENABLED-NEXT: V6_vmpy_qf16
+# DEFAULT-LABEL: name: qfpMul16
+# DEFAULT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vmpy_qf16_hf
+# DEFAULT-NEXT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vS32Ub_ai
+# DEFAULT-NEXT: V6_vmpy_qf16_hf
+---
+name: qfpMul16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vmpy_qf16_hf %4:hvxvr, %5:hvxvr
+ %7:hvxvr = V6_vconv_hf_qf16 %6:hvxvr
+ %8:hvxvr = V6_vmpy_qf16_hf %5:hvxvr, %7:hvxvr
+ %9:hvxvr = V6_vconv_hf_qf16 %8:hvxvr
+ V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr
+ %10:hvxvr = V6_vmpy_qf16_hf %7:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_hf_qf16 %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir b/llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir
new file mode 100644
index 000000000000..482edc8dc242
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir
@@ -0,0 +1,97 @@
+# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \
+# RUN: -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s
+
+
+# CHECK: name: qfp_vilog32
+# CHECK: V6_vilog2_qf32
+---
+name: qfp_vilog32
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_sf_qf32 $v0
+ $v2 = V6_vilog2_sf $v1
+ V6_vS32Ub_ai $r2, 0, $v2
+...
+
+# CHECK-LABEL: name: qfp_vilog16
+# CHECK: V6_vilog2_qf16
+---
+name: qfp_vilog16
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_hf_qf16 $v0
+ $v2 = V6_vilog2_hf $v1
+ V6_vS32Ub_ai $r2, 0, $v2
+...
+
+# CHECK: name: qfp_vneg32
+# CHECK: V6_vneg_qf32_qf32
+---
+name: qfp_vneg32
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_sf_qf32 $v0
+ $v2 = V6_vneg_qf32_sf $v1
+ $v3 = V6_vconv_sf_qf32 $v2
+ V6_vS32Ub_ai $r2, 0, $v3
+...
+
+# CHECK-LABEL: name: qfp_vneg16
+# CHECK: V6_vneg_qf16_qf16
+---
+name: qfp_vneg16
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_hf_qf16 $v0
+ $v2 = V6_vneg_qf16_hf $v1
+ $v3 = V6_vconv_hf_qf16 $v2
+ V6_vS32Ub_ai $r2, 0, $v3
+...
+
+# CHECK: name: qfp_vabs32
+# CHECK: V6_vabs_qf32_qf32
+---
+name: qfp_vabs32
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_sf_qf32 $v0
+ $v2 = V6_vabs_qf32_sf $v1
+ $v3 = V6_vconv_sf_qf32 $v2
+ V6_vS32Ub_ai $r2, 0, $v3
+...
+
+# CHECK-LABEL: name: qfp_vabs16
+# CHECK: V6_vabs_qf16_qf16
+---
+name: qfp_vabs16
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_hf_qf16 $v0
+ $v2 = V6_vabs_qf16_hf $v1
+ $v3 = V6_vconv_hf_qf16 $v2
+ V6_vS32Ub_ai $r2, 0, $v3
+...
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
index 000c67efb1de..8af4277f12c6 100644
--- a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
@@ -1531,6 +1531,7 @@ Key: RDSSPQ: [ 0.00 0.00 ]
Key: RDTSC: [ 0.00 0.00 ]
Key: RDTSCP: [ 0.00 0.00 ]
Key: REG_SEQUENCE: [ 0.00 0.00 ]
+Key: RELOC_NONE: [ 0.00 0.00 ]
Key: REPNE_PREFIX: [ 0.00 0.00 ]
Key: REP_MOVSB: [ 0.00 0.00 ]
Key: REP_MOVSD: [ 0.00 0.00 ]
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
index bb72886f73bf..e13342641d35 100644
--- a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
@@ -1531,6 +1531,7 @@ Key: RDSSPQ: [ 0.00 0.00 ]
Key: RDTSC: [ 0.00 0.00 ]
Key: RDTSCP: [ 0.00 0.00 ]
Key: REG_SEQUENCE: [ 0.00 0.00 ]
+Key: RELOC_NONE: [ 0.00 0.00 ]
Key: REPNE_PREFIX: [ 0.00 0.00 ]
Key: REP_MOVSB: [ 0.00 0.00 ]
Key: REP_MOVSD: [ 0.00 0.00 ]
diff --git a/llvm/test/CodeGen/PowerPC/vp-ld-st.ll b/llvm/test/CodeGen/PowerPC/vp-ld-st.ll
new file mode 100644
index 000000000000..f0f9943e901e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vp-ld-st.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -check-prefix=FUTURE %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
+; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=future \
+; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck --check-prefix=FUTURE %s
+
+; Function Attrs: nounwind readnone
+define void @stxvl1(<16 x i8> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 56
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl1:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: stxvrl 34, 5, 6
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v16i8.p0(<16 x i8> %a, ptr %b, <16 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl2(<8 x i16> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 57
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl2:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 3, 6, 1
+; FUTURE-NEXT: stxvrl 34, 5, 3
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v8i16.p0(<8 x i16> %a, ptr %b, <8 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl4(<4 x i32> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 58
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl4:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 3, 6, 2
+; FUTURE-NEXT: stxvrl 34, 5, 3
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v4i32.p0(<4 x i32> %a, ptr %b, <4 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define void @stxvl8(<2 x i64> %a, ptr %b, i64 %c) {
+; CHECK-LABEL: stxvl8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 3, 6, 59
+; CHECK-NEXT: stxvl 34, 5, 3
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: stxvl8:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 3, 6, 3
+; FUTURE-NEXT: stxvrl 34, 5, 3
+; FUTURE-NEXT: blr
+entry:
+ %cconv = trunc i64 %c to i32
+ tail call void @llvm.vp.store.v2i64.p0(<2 x i64> %a, ptr %b, <2 x i1> splat (i1 true), i32 %cconv)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+define <16 x i8> @lxvl1(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 56
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl1:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %a, <16 x i1> splat (i1 true), i32 %bconv)
+ ret <16 x i8> %0
+}
+
+; Function Attrs: nounwind readnone
+define <8 x i16> @lxvl2(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 57
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl2:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 4, 4, 1
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %a, <8 x i1> splat (i1 true), i32 %bconv)
+ ret <8 x i16> %0
+}
+
+; Function Attrs: nounwind readnone
+define <4 x i32> @lxvl4(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 58
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl4:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 4, 4, 2
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %a, <4 x i1> splat (i1 true), i32 %bconv)
+ ret <4 x i32> %0
+}
+
+; Function Attrs: nounwind readnone
+define <2 x i64> @lxvl8(ptr %a, i64 %b) {
+; CHECK-LABEL: lxvl8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi 4, 4, 59
+; CHECK-NEXT: lxvl 34, 3, 4
+; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: lxvl8:
+; FUTURE: # %bb.0: # %entry
+; FUTURE-NEXT: sldi 4, 4, 3
+; FUTURE-NEXT: lxvrl 34, 3, 4
+; FUTURE-NEXT: blr
+entry:
+ %bconv = trunc i64 %b to i32
+ %0 = tail call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %a, <2 x i1> splat (i1 true), i32 %bconv)
+ ret <2 x i64> %0
+}
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
index 2acb1d43e01e..78d242b5a28b 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
@@ -3,27 +3,33 @@
# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
# RUN: | FileCheck -check-prefixes=OUTLINED,RV64I-MO %s
-# CFIs are invisible (they can be outlined, but won't actually impact the outlining result) if there
-# is no need to unwind. CFIs will be stripped when we build outlined functions.
+# Combined tests for outlining with CFI instructions on RISC-V:
+# 1) All CFIs present in candidate: outline as tail-call and keep CFIs.
+# 2) Partial CFIs in function (extra outside candidate): do not outline.
+# 3) CFIs present but candidate is not a tail-call: do not outline.
--- |
- define void @func1(i32 %a, i32 %b) nounwind { ret void }
-
- define void @func2(i32 %a, i32 %b) nounwind { ret void }
-
- define void @func3(i32 %a, i32 %b) nounwind { ret void }
+ define void @funcA(i32 %a, i32 %b) nounwind { ret void }
+ define void @funcB(i32 %a, i32 %b) nounwind { ret void }
+ define void @funcC(i32 %a, i32 %b) nounwind { ret void }
+ define void @funcD(i32 %a, i32 %b) nounwind { ret void }
+ define void @funcE(i32 %a, i32 %b) nounwind { ret void }
+ define void @funcF(i32 %a, i32 %b) nounwind { ret void }
...
+
+# Case 1: All CFIs present; expect outlining and CFIs retained in outlined body.
---
-name: func1
+name: funcA
tracksRegLiveness: true
body: |
bb.0:
liveins: $x10, $x11
- ; RV32I-MO-LABEL: name: func1
+ ; RV32I-MO-LABEL: name: funcA
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
- ; RV64I-MO-LABEL: name: func1
+ ;
+ ; RV64I-MO-LABEL: name: funcA
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
@@ -39,62 +45,213 @@ body: |
PseudoRET
...
---
-name: func2
+name: funcB
tracksRegLiveness: true
body: |
bb.0:
liveins: $x10, $x11
- ; RV32I-MO-LABEL: name: func2
+ ; RV32I-MO-LABEL: name: funcB
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
- ; RV64I-MO-LABEL: name: func2
+ ;
+ ; RV64I-MO-LABEL: name: funcB
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, 0
$x11 = ORI $x11, 1023
- CFI_INSTRUCTION offset $x1, -8
- $x12 = ADDI $x10, 17
CFI_INSTRUCTION offset $x1, -4
+ $x12 = ADDI $x10, 17
+ CFI_INSTRUCTION offset $x1, -8
$x11 = AND $x12, $x11
CFI_INSTRUCTION offset $x1, -12
$x10 = SUB $x10, $x11
PseudoRET
...
+
+# Case 2: Partial CFIs (extra CFI outside candidate in funcD); expect no outlining.
---
-name: func3
+name: funcC
tracksRegLiveness: true
body: |
bb.0:
liveins: $x10, $x11
- ; RV32I-MO-LABEL: name: func3
+ ; RV32I-MO-LABEL: name: funcC
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
- ; RV64I-MO-LABEL: name: func3
+ ;
+ ; RV64I-MO-LABEL: name: funcC
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
- CFI_INSTRUCTION offset $x1, -12
+ CFI_INSTRUCTION offset $x1, 0
$x11 = ORI $x11, 1023
+ CFI_INSTRUCTION offset $x1, -4
+ $x12 = ADDI $x10, 17
CFI_INSTRUCTION offset $x1, -8
+ $x11 = AND $x12, $x11
+ CFI_INSTRUCTION offset $x1, -12
+ $x10 = SUB $x10, $x11
+ PseudoRET
+...
+---
+name: funcD
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+ ; RV32I-MO-LABEL: name: funcD
+ ; RV32I-MO: liveins: $x10, $x11
+ ; RV32I-MO-NEXT: {{ $}}
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, -16
+ ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, 0
+ ; RV32I-MO-NEXT: $x11 = ORI $x11, 1023
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, -4
+ ; RV32I-MO-NEXT: $x12 = ADDI $x10, 17
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, -8
+ ; RV32I-MO-NEXT: $x11 = AND $x12, $x11
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, -12
+ ; RV32I-MO-NEXT: $x10 = SUB $x10, $x11
+ ; RV32I-MO-NEXT: PseudoRET
+ ;
+ ; RV64I-MO-LABEL: name: funcD
+ ; RV64I-MO: liveins: $x10, $x11
+ ; RV64I-MO-NEXT: {{ $}}
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, -16
+ ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, 0
+ ; RV64I-MO-NEXT: $x11 = ORI $x11, 1023
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, -4
+ ; RV64I-MO-NEXT: $x12 = ADDI $x10, 17
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, -8
+ ; RV64I-MO-NEXT: $x11 = AND $x12, $x11
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, -12
+ ; RV64I-MO-NEXT: $x10 = SUB $x10, $x11
+ ; RV64I-MO-NEXT: PseudoRET
+ CFI_INSTRUCTION offset $x1, -16
+ $x10 = ORI $x10, 1023
+ CFI_INSTRUCTION offset $x1, 0
+ $x11 = ORI $x11, 1023
+ CFI_INSTRUCTION offset $x1, -4
$x12 = ADDI $x10, 17
+ CFI_INSTRUCTION offset $x1, -8
+ $x11 = AND $x12, $x11
+ CFI_INSTRUCTION offset $x1, -12
+ $x10 = SUB $x10, $x11
+ PseudoRET
+...
+
+# Case 3: CFIs present but candidate is not a tail-call; expect no outlining.
+---
+name: funcE
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+ ; RV32I-MO-LABEL: name: funcE
+ ; RV32I-MO: liveins: $x10, $x11
+ ; RV32I-MO-NEXT: {{ $}}
+ ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, 0
+ ; RV32I-MO-NEXT: $x11 = ORI $x11, 1023
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, -4
+ ; RV32I-MO-NEXT: $x12 = ADDI $x10, 17
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, -8
+ ; RV32I-MO-NEXT: $x11 = AND $x12, $x11
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, -12
+ ; RV32I-MO-NEXT: $x10 = SUB $x10, $x11
+ ; RV32I-MO-NEXT: $x10 = ADDI $x10, 1
+ ; RV32I-MO-NEXT: PseudoRET
+ ;
+ ; RV64I-MO-LABEL: name: funcE
+ ; RV64I-MO: liveins: $x10, $x11
+ ; RV64I-MO-NEXT: {{ $}}
+ ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, 0
+ ; RV64I-MO-NEXT: $x11 = ORI $x11, 1023
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, -4
+ ; RV64I-MO-NEXT: $x12 = ADDI $x10, 17
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, -8
+ ; RV64I-MO-NEXT: $x11 = AND $x12, $x11
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, -12
+ ; RV64I-MO-NEXT: $x10 = SUB $x10, $x11
+ ; RV64I-MO-NEXT: $x10 = ADDI $x10, 1
+ ; RV64I-MO-NEXT: PseudoRET
+ $x10 = ORI $x10, 1023
+ CFI_INSTRUCTION offset $x1, 0
+ $x11 = ORI $x11, 1023
CFI_INSTRUCTION offset $x1, -4
+ $x12 = ADDI $x10, 17
+ CFI_INSTRUCTION offset $x1, -8
$x11 = AND $x12, $x11
+ CFI_INSTRUCTION offset $x1, -12
+ $x10 = SUB $x10, $x11
+ $x10 = ADDI $x10, 1
+ PseudoRET
+...
+---
+name: funcF
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11
+ ; RV32I-MO-LABEL: name: funcF
+ ; RV32I-MO: liveins: $x10, $x11
+ ; RV32I-MO-NEXT: {{ $}}
+ ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, 0
+ ; RV32I-MO-NEXT: $x11 = ORI $x11, 1023
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, -4
+ ; RV32I-MO-NEXT: $x12 = ADDI $x10, 17
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, -8
+ ; RV32I-MO-NEXT: $x11 = AND $x12, $x11
+ ; RV32I-MO-NEXT: CFI_INSTRUCTION offset $x1, -12
+ ; RV32I-MO-NEXT: $x10 = SUB $x10, $x11
+ ; RV32I-MO-NEXT: $x10 = ADDI $x10, 2
+ ; RV32I-MO-NEXT: PseudoRET
+ ;
+ ; RV64I-MO-LABEL: name: funcF
+ ; RV64I-MO: liveins: $x10, $x11
+ ; RV64I-MO-NEXT: {{ $}}
+ ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, 0
+ ; RV64I-MO-NEXT: $x11 = ORI $x11, 1023
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, -4
+ ; RV64I-MO-NEXT: $x12 = ADDI $x10, 17
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, -8
+ ; RV64I-MO-NEXT: $x11 = AND $x12, $x11
+ ; RV64I-MO-NEXT: CFI_INSTRUCTION offset $x1, -12
+ ; RV64I-MO-NEXT: $x10 = SUB $x10, $x11
+ ; RV64I-MO-NEXT: $x10 = ADDI $x10, 2
+ ; RV64I-MO-NEXT: PseudoRET
+ $x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, 0
+ $x11 = ORI $x11, 1023
+ CFI_INSTRUCTION offset $x1, -4
+ $x12 = ADDI $x10, 17
+ CFI_INSTRUCTION offset $x1, -8
+ $x11 = AND $x12, $x11
+ CFI_INSTRUCTION offset $x1, -12
$x10 = SUB $x10, $x11
+ $x10 = ADDI $x10, 2
PseudoRET
-
+...
# OUTLINED-LABEL: name: OUTLINED_FUNCTION_0
# OUTLINED: liveins: $x11, $x10
# OUTLINED-NEXT: {{ $}}
# OUTLINED-NEXT: $x10 = ORI $x10, 1023
+# OUTLINED-NEXT: CFI_INSTRUCTION offset $x1, 0
# OUTLINED-NEXT: $x11 = ORI $x11, 1023
+# OUTLINED-NEXT: CFI_INSTRUCTION offset $x1, -4
# OUTLINED-NEXT: $x12 = ADDI $x10, 17
+# OUTLINED-NEXT: CFI_INSTRUCTION offset $x1, -8
# OUTLINED-NEXT: $x11 = AND $x12, $x11
+# OUTLINED-NEXT: CFI_INSTRUCTION offset $x1, -12
# OUTLINED-NEXT: $x10 = SUB $x10, $x11
# OUTLINED-NEXT: PseudoRET
diff --git a/llvm/test/CodeGen/RISCV/remat.ll b/llvm/test/CodeGen/RISCV/remat.ll
index 92ae85f560cd..8490dd0877d3 100644
--- a/llvm/test/CodeGen/RISCV/remat.ll
+++ b/llvm/test/CodeGen/RISCV/remat.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O1 -mtriple=riscv32 -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -O1 -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s
@a = common global i32 0, align 4
@l = common global i32 0, align 4
@@ -21,113 +20,113 @@
; situation.
define i32 @test() nounwind {
-; RV32I-LABEL: test:
-; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -64
-; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lui s0, %hi(a)
-; RV32I-NEXT: lw a0, %lo(a)(s0)
-; RV32I-NEXT: beqz a0, .LBB0_11
-; RV32I-NEXT: # %bb.1: # %for.body.preheader
-; RV32I-NEXT: lui s1, %hi(l)
-; RV32I-NEXT: lui s2, %hi(k)
-; RV32I-NEXT: lui s3, %hi(j)
-; RV32I-NEXT: lui s4, %hi(i)
-; RV32I-NEXT: lui s5, %hi(d)
-; RV32I-NEXT: lui s6, %hi(e)
-; RV32I-NEXT: lui s7, %hi(f)
-; RV32I-NEXT: lui s8, %hi(g)
-; RV32I-NEXT: lui s9, %hi(h)
-; RV32I-NEXT: lui s10, %hi(c)
-; RV32I-NEXT: lui s11, %hi(b)
-; RV32I-NEXT: j .LBB0_3
-; RV32I-NEXT: .LBB0_2: # %for.inc
-; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT: lw a0, %lo(a)(s0)
-; RV32I-NEXT: addi a0, a0, -1
-; RV32I-NEXT: sw a0, %lo(a)(s0)
-; RV32I-NEXT: beqz a0, .LBB0_11
-; RV32I-NEXT: .LBB0_3: # %for.body
-; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: lw a1, %lo(l)(s1)
-; RV32I-NEXT: beqz a1, .LBB0_5
-; RV32I-NEXT: # %bb.4: # %if.then
-; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT: lw a1, %lo(b)(s11)
-; RV32I-NEXT: lw a2, %lo(c)(s10)
-; RV32I-NEXT: lw a3, %lo(d)(s5)
-; RV32I-NEXT: lw a4, %lo(e)(s6)
-; RV32I-NEXT: li a5, 32
-; RV32I-NEXT: call foo
-; RV32I-NEXT: .LBB0_5: # %if.end
-; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT: lw a0, %lo(k)(s2)
-; RV32I-NEXT: beqz a0, .LBB0_7
-; RV32I-NEXT: # %bb.6: # %if.then3
-; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT: lw a0, %lo(b)(s11)
-; RV32I-NEXT: lw a1, %lo(c)(s10)
-; RV32I-NEXT: lw a2, %lo(d)(s5)
-; RV32I-NEXT: lw a3, %lo(e)(s6)
-; RV32I-NEXT: lw a4, %lo(f)(s7)
-; RV32I-NEXT: li a5, 64
-; RV32I-NEXT: call foo
-; RV32I-NEXT: .LBB0_7: # %if.end5
-; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT: lw a0, %lo(j)(s3)
-; RV32I-NEXT: beqz a0, .LBB0_9
-; RV32I-NEXT: # %bb.8: # %if.then7
-; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT: lw a0, %lo(c)(s10)
-; RV32I-NEXT: lw a1, %lo(d)(s5)
-; RV32I-NEXT: lw a2, %lo(e)(s6)
-; RV32I-NEXT: lw a3, %lo(f)(s7)
-; RV32I-NEXT: lw a4, %lo(g)(s8)
-; RV32I-NEXT: li a5, 32
-; RV32I-NEXT: call foo
-; RV32I-NEXT: .LBB0_9: # %if.end9
-; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT: lw a0, %lo(i)(s4)
-; RV32I-NEXT: beqz a0, .LBB0_2
-; RV32I-NEXT: # %bb.10: # %if.then11
-; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT: lw a0, %lo(d)(s5)
-; RV32I-NEXT: lw a1, %lo(e)(s6)
-; RV32I-NEXT: lw a2, %lo(f)(s7)
-; RV32I-NEXT: lw a3, %lo(g)(s8)
-; RV32I-NEXT: lw a4, %lo(h)(s9)
-; RV32I-NEXT: li a5, 32
-; RV32I-NEXT: call foo
-; RV32I-NEXT: j .LBB0_2
-; RV32I-NEXT: .LBB0_11: # %for.end
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s9, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s10, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 64
-; RV32I-NEXT: ret
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -112
+; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: lui s0, %hi(a)
+; CHECK-NEXT: lw a0, %lo(a)(s0)
+; CHECK-NEXT: beqz a0, .LBB0_11
+; CHECK-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-NEXT: lui s1, %hi(l)
+; CHECK-NEXT: lui s2, %hi(k)
+; CHECK-NEXT: lui s3, %hi(j)
+; CHECK-NEXT: lui s4, %hi(i)
+; CHECK-NEXT: lui s5, %hi(d)
+; CHECK-NEXT: lui s6, %hi(e)
+; CHECK-NEXT: lui s7, %hi(f)
+; CHECK-NEXT: lui s8, %hi(g)
+; CHECK-NEXT: lui s9, %hi(h)
+; CHECK-NEXT: lui s10, %hi(c)
+; CHECK-NEXT: lui s11, %hi(b)
+; CHECK-NEXT: j .LBB0_3
+; CHECK-NEXT: .LBB0_2: # %for.inc
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: lw a0, %lo(a)(s0)
+; CHECK-NEXT: addiw a0, a0, -1
+; CHECK-NEXT: sw a0, %lo(a)(s0)
+; CHECK-NEXT: beqz a0, .LBB0_11
+; CHECK-NEXT: .LBB0_3: # %for.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: lw a1, %lo(l)(s1)
+; CHECK-NEXT: beqz a1, .LBB0_5
+; CHECK-NEXT: # %bb.4: # %if.then
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: lw a4, %lo(e)(s6)
+; CHECK-NEXT: lw a3, %lo(d)(s5)
+; CHECK-NEXT: lw a2, %lo(c)(s10)
+; CHECK-NEXT: lw a1, %lo(b)(s11)
+; CHECK-NEXT: li a5, 32
+; CHECK-NEXT: call foo
+; CHECK-NEXT: .LBB0_5: # %if.end
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: lw a0, %lo(k)(s2)
+; CHECK-NEXT: beqz a0, .LBB0_7
+; CHECK-NEXT: # %bb.6: # %if.then3
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: lw a4, %lo(f)(s7)
+; CHECK-NEXT: lw a3, %lo(e)(s6)
+; CHECK-NEXT: lw a2, %lo(d)(s5)
+; CHECK-NEXT: lw a1, %lo(c)(s10)
+; CHECK-NEXT: lw a0, %lo(b)(s11)
+; CHECK-NEXT: li a5, 64
+; CHECK-NEXT: call foo
+; CHECK-NEXT: .LBB0_7: # %if.end5
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: lw a0, %lo(j)(s3)
+; CHECK-NEXT: beqz a0, .LBB0_9
+; CHECK-NEXT: # %bb.8: # %if.then7
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: lw a4, %lo(g)(s8)
+; CHECK-NEXT: lw a3, %lo(f)(s7)
+; CHECK-NEXT: lw a2, %lo(e)(s6)
+; CHECK-NEXT: lw a1, %lo(d)(s5)
+; CHECK-NEXT: lw a0, %lo(c)(s10)
+; CHECK-NEXT: li a5, 32
+; CHECK-NEXT: call foo
+; CHECK-NEXT: .LBB0_9: # %if.end9
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: lw a0, %lo(i)(s4)
+; CHECK-NEXT: beqz a0, .LBB0_2
+; CHECK-NEXT: # %bb.10: # %if.then11
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: lw a4, %lo(h)(s9)
+; CHECK-NEXT: lw a3, %lo(g)(s8)
+; CHECK-NEXT: lw a2, %lo(f)(s7)
+; CHECK-NEXT: lw a1, %lo(e)(s6)
+; CHECK-NEXT: lw a0, %lo(d)(s5)
+; CHECK-NEXT: li a5, 32
+; CHECK-NEXT: call foo
+; CHECK-NEXT: j .LBB0_2
+; CHECK-NEXT: .LBB0_11: # %for.end
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 112
+; CHECK-NEXT: ret
entry:
%.pr = load i32, ptr @a, align 4
%tobool14 = icmp eq i32 %.pr, 0
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index d4b228828c04..e56c7b41d43c 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -1459,6 +1459,34 @@ define i64 @mul288(i64 %a) {
ret i64 %c
}
+define i64 @zext_mul44(i32 signext %a) {
+; RV64I-LABEL: zext_mul44:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 11
+; RV64I-NEXT: slli a1, a1, 34
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: mulhu a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: zext_mul44:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: slli.uw a0, a0, 2
+; RV64ZBA-NEXT: sh2add a1, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a1, a0
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: zext_mul44:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: slli a0, a0, 32
+; RV64XANDESPERF-NEXT: srli a0, a0, 30
+; RV64XANDESPERF-NEXT: nds.lea.w a1, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1
+; RV64XANDESPERF-NEXT: ret
+ %b = zext i32 %a to i64
+ %c = mul i64 %b, 44
+ ret i64 %c
+}
+
define i64 @zext_mul68(i32 signext %a) {
; RV64I-LABEL: zext_mul68:
; RV64I: # %bb.0:
@@ -1511,6 +1539,34 @@ define i64 @zext_mul96(i32 signext %a) {
ret i64 %c
}
+define i64 @zext_mul100(i32 signext %a) {
+; RV64I-LABEL: zext_mul100:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 25
+; RV64I-NEXT: slli a1, a1, 34
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: mulhu a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: zext_mul100:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: slli.uw a0, a0, 2
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: zext_mul100:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: slli a0, a0, 32
+; RV64XANDESPERF-NEXT: srli a0, a0, 30
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0
+; RV64XANDESPERF-NEXT: ret
+ %b = zext i32 %a to i64
+ %c = mul i64 %b, 100
+ ret i64 %c
+}
+
define i64 @zext_mul160(i32 signext %a) {
; RV64I-LABEL: zext_mul160:
; RV64I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index b1f0eee3e9f5..034186210513 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -595,12 +595,11 @@ define <vscale x 4 x i32> @mismatched_extend_sub_add_commuted(<vscale x 4 x i16>
; FOLDING: # %bb.0:
; FOLDING-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; FOLDING-NEXT: vzext.vf2 v10, v8
-; FOLDING-NEXT: vsext.vf2 v12, v9
; FOLDING-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; FOLDING-NEXT: vwsub.wv v10, v10, v9
-; FOLDING-NEXT: vwaddu.wv v12, v12, v8
+; FOLDING-NEXT: vwsub.wv v12, v10, v9
+; FOLDING-NEXT: vwadd.wv v10, v10, v9
; FOLDING-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; FOLDING-NEXT: vmul.vv v8, v10, v12
+; FOLDING-NEXT: vmul.vv v8, v12, v10
; FOLDING-NEXT: ret
%a = zext <vscale x 4 x i16> %x to <vscale x 4 x i32>
%b = sext <vscale x 4 x i16> %y to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index bf6802deeffd..93b68b0a95b4 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -1834,13 +1834,12 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV32ZICOND-NEXT: mul a5, a3, a0
; RV32ZICOND-NEXT: mul a6, a1, a2
; RV32ZICOND-NEXT: mulhu a7, a0, a2
-; RV32ZICOND-NEXT: snez t0, a3
+; RV32ZICOND-NEXT: add a5, a6, a5
+; RV32ZICOND-NEXT: snez a6, a3
; RV32ZICOND-NEXT: mulhu a3, a3, a0
-; RV32ZICOND-NEXT: mul t1, a0, a2
+; RV32ZICOND-NEXT: mul t0, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a1, a2
-; RV32ZICOND-NEXT: snez a1, a1
-; RV32ZICOND-NEXT: add a5, a6, a5
-; RV32ZICOND-NEXT: and a1, a1, t0
+; RV32ZICOND-NEXT: czero.eqz a1, a6, a1
; RV32ZICOND-NEXT: snez a0, a0
; RV32ZICOND-NEXT: snez a2, a3
; RV32ZICOND-NEXT: add a5, a7, a5
@@ -1848,7 +1847,7 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV32ZICOND-NEXT: sltu a1, a5, a7
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: sw t1, 0(a4)
+; RV32ZICOND-NEXT: sw t0, 0(a4)
; RV32ZICOND-NEXT: sw a5, 4(a4)
; RV32ZICOND-NEXT: ret
;
@@ -3690,11 +3689,10 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: snez a6, a3
; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: snez a5, a1
-; RV32ZICOND-NEXT: and a5, a5, a6
-; RV32ZICOND-NEXT: mulhu a6, a1, a2
-; RV32ZICOND-NEXT: snez a6, a6
-; RV32ZICOND-NEXT: or a5, a5, a6
+; RV32ZICOND-NEXT: mulhu a5, a1, a2
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a1
+; RV32ZICOND-NEXT: snez a5, a5
+; RV32ZICOND-NEXT: or a5, a6, a5
; RV32ZICOND-NEXT: mulhu a6, a0, a2
; RV32ZICOND-NEXT: add a4, a6, a4
; RV32ZICOND-NEXT: sltu a4, a4, a6
@@ -3783,18 +3781,17 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
; RV32ZICOND: # %bb.0: # %entry
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
-; RV32ZICOND-NEXT: mulhu a6, a0, a2
+; RV32ZICOND-NEXT: add a4, a5, a4
+; RV32ZICOND-NEXT: mulhu a5, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a3, a0
; RV32ZICOND-NEXT: snez a3, a3
; RV32ZICOND-NEXT: mulhu a2, a1, a2
-; RV32ZICOND-NEXT: snez a1, a1
-; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: and a1, a1, a3
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: add a4, a6, a4
+; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: or a1, a1, a2
-; RV32ZICOND-NEXT: sltu a2, a4, a6
+; RV32ZICOND-NEXT: sltu a2, a4, a5
; RV32ZICOND-NEXT: or a0, a1, a0
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: xori a0, a0, 1
@@ -5156,18 +5153,17 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
; RV32ZICOND: # %bb.0: # %entry
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
-; RV32ZICOND-NEXT: mulhu a6, a0, a2
+; RV32ZICOND-NEXT: add a4, a5, a4
+; RV32ZICOND-NEXT: mulhu a5, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a3, a0
; RV32ZICOND-NEXT: snez a3, a3
; RV32ZICOND-NEXT: mulhu a2, a1, a2
-; RV32ZICOND-NEXT: snez a1, a1
-; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: and a1, a1, a3
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: add a4, a6, a4
+; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: or a1, a1, a2
-; RV32ZICOND-NEXT: sltu a2, a4, a6
+; RV32ZICOND-NEXT: sltu a2, a4, a5
; RV32ZICOND-NEXT: or a0, a1, a0
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: beqz a0, .LBB64_2
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
index 305ab934e44a..c6d72981eff3 100644
--- a/llvm/test/CodeGen/RISCV/zicond-opts.ll
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -7,22 +7,132 @@ define i32 @icmp_and(i64 %x, i64 %y) {
; RV32ZICOND-LABEL: icmp_and:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: snez a2, a2
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+; Make sure we choose the replace the single use icmp
+define i32 @icmp_and_x_multiple_uses(i64 %x, i64 %y) {
+; RV32ZICOND-LABEL: icmp_and_x_multiple_uses:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: czero.eqz a1, a0, a2
+; RV32ZICOND-NEXT: add a0, a1, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_x_multiple_uses:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: czero.eqz a1, a0, a1
+; RV64ZICOND-NEXT: add a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ %7 = zext i1 %4 to i32
+ %8 = add i32 %6, %7
+ ret i32 %8
+}
+
+; Make sure we choose the replace the single use icmp
+define i32 @icmp_and_y_multiple_uses(i64 %x, i64 %y) {
+; RV32ZICOND-LABEL: icmp_and_y_multiple_uses:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: snez a2, a2
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: add a0, a0, a2
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_y_multiple_uses:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: add a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ %7 = zext i1 %3 to i32
+ %8 = add i32 %6, %7
+ ret i32 %8
+}
+
+; Both icmp's have multiple uses, don't optimize
+define i32 @icmp_and_xy_multiple_uses(i64 %x, i64 %y) {
+; RV32ZICOND-LABEL: icmp_and_xy_multiple_uses:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
; RV32ZICOND-NEXT: snez a1, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: and a0, a0, a1
+; RV32ZICOND-NEXT: and a2, a0, a1
+; RV32ZICOND-NEXT: add a0, a1, a0
+; RV32ZICOND-NEXT: add a0, a2, a0
; RV32ZICOND-NEXT: ret
;
-; RV64ZICOND-LABEL: icmp_and:
+; RV64ZICOND-LABEL: icmp_and_xy_multiple_uses:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: snez a0, a0
-; RV64ZICOND-NEXT: and a0, a0, a1
+; RV64ZICOND-NEXT: and a2, a0, a1
+; RV64ZICOND-NEXT: add a0, a1, a0
+; RV64ZICOND-NEXT: add a0, a2, a0
; RV64ZICOND-NEXT: ret
%3 = icmp ne i64 %y, 0
%4 = icmp ne i64 %x, 0
%5 = and i1 %4, %3
%6 = zext i1 %5 to i32
+ %7 = zext i1 %3 to i32
+ %8 = zext i1 %4 to i32
+ %9 = add i32 %6, %7
+ %10 = add i32 %9, %8
+ ret i32 %10
+}
+
+
+; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y)
+define i32 @icmp_and_select(i64 %x, i64 %y, i32 %z) {
+; RV32ZICOND-LABEL: icmp_and_select:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: sgtz a5, a3
+; RV32ZICOND-NEXT: snez a2, a2
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a3
+; RV32ZICOND-NEXT: czero.nez a2, a2, a3
+; RV32ZICOND-NEXT: or a2, a2, a5
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a4, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_select:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: sgtz a1, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp sgt i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = select i1 %5, i32 %z, i32 0
ret i32 %6
}
@@ -32,21 +142,17 @@ define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) {
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: or a4, a4, a5
-; RV32ZICOND-NEXT: snez a1, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: and a0, a1, a0
-; RV32ZICOND-NEXT: snez a1, a4
-; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and_and:
; RV64ZICOND: # %bb.0:
-; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: snez a0, a0
-; RV64ZICOND-NEXT: and a0, a1, a0
-; RV64ZICOND-NEXT: snez a1, a2
-; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
; RV64ZICOND-NEXT: ret
%4 = icmp ne i64 %y, 0
%5 = icmp ne i64 %x, 0
diff --git a/llvm/test/CodeGen/SPIRV/ComparePointers.ll b/llvm/test/CodeGen/SPIRV/ComparePointers.ll
index 408b95579502..bc1514e145cb 100644
--- a/llvm/test/CodeGen/SPIRV/ComparePointers.ll
+++ b/llvm/test/CodeGen/SPIRV/ComparePointers.ll
@@ -12,7 +12,7 @@
;; return;
;; }
-; CHECK-SPIRV: OpConvertPtrToU
+; CHECK-SPIRV: OpSpecConstantOp %[[#]] ConvertPtrToU
; CHECK-SPIRV: OpConvertPtrToU
; CHECK-SPIRV: OpINotEqual
; CHECK-SPIRV: OpConvertPtrToU
diff --git a/llvm/test/CodeGen/SPIRV/complex-constexpr.ll b/llvm/test/CodeGen/SPIRV/complex-constexpr.ll
index e2c1d00ba4c0..a97a124ad2c6 100644
--- a/llvm/test/CodeGen/SPIRV/complex-constexpr.ll
+++ b/llvm/test/CodeGen/SPIRV/complex-constexpr.ll
@@ -6,7 +6,7 @@
define linkonce_odr hidden spir_func void @test() {
entry:
; CHECK: %[[#MinusOne:]] = OpConstant %[[#]] 18446744073709551615
-; CHECK: %[[#Ptr:]] = OpConvertUToPtr %[[#]] %[[#MinusOne]]
+; CHECK: %[[#Ptr:]] = OpSpecConstantOp %[[#]] ConvertUToPtr %[[#MinusOne]]
; CHECK: %[[#PtrCast:]] = OpPtrCastToGeneric %[[#]] %[[#]]
; CHECK: %[[#]] = OpFunctionCall %[[#]] %[[#]] %[[#PtrCast]] %[[#Ptr]]
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/ConvertPtrInGlobalInit.ll b/llvm/test/CodeGen/SPIRV/transcoding/ConvertPtrInGlobalInit.ll
new file mode 100644
index 000000000000..f397030c7bdb
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/transcoding/ConvertPtrInGlobalInit.ll
@@ -0,0 +1,49 @@
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: %[[Int8Ty:[0-9]+]] = OpTypeInt 8 0
+; CHECK: %[[Int8PtrTy:[0-9]+]] = OpTypePointer Generic %[[Int8Ty]]
+; CHECK-DAG: %[[GlobInt8PtrTy:[0-9]+]] = OpTypePointer CrossWorkgroup %[[Int8Ty]]
+; CHECK: %[[GlobInt8PtrPtrTy:[0-9]+]] = OpTypePointer CrossWorkgroup %[[GlobInt8PtrTy]]
+; CHECK: %[[Int8PtrGlobPtrPtrTy:[0-9]+]] = OpTypePointer Generic %[[GlobInt8PtrPtrTy]]
+; CHECK: %[[Int32Ty:[0-9]+]] = OpTypeInt 32 0
+; CHECK: %[[Const5:[0-9]+]] = OpConstant %[[Int32Ty]] 5
+; CHECK: %[[ArrTy:[0-9]+]] = OpTypeArray %[[GlobInt8PtrTy]] %[[Const5]]
+; CHECK: %[[VtblTy:[0-9]+]] = OpTypeStruct %[[ArrTy]] %[[ArrTy]] %[[ArrTy]] %[[ArrTy]] %[[ArrTy]]
+; CHECK: %[[Int64Ty:[0-9]+]] = OpTypeInt 64 0
+; CHECK: %[[GlobVtblPtrTy:[0-9]+]] = OpTypePointer CrossWorkgroup %[[VtblTy]]
+; CHECK: %[[ConstMinus184:[0-9]+]] = OpConstant %[[Int64Ty]] 18446744073709551432
+; CHECK: %[[ConstMinus16:[0-9]+]] = OpConstant %[[Int64Ty]] 18446744073709551600
+; CHECK: %[[Const168:[0-9]+]] = OpConstant %[[Int64Ty]] 168
+; CHECK: %[[Nullptr:[0-9]+]] = OpConstantNull %[[GlobInt8PtrTy]]
+; CHECK: %[[Const184:[0-9]+]] = OpConstant %[[Int64Ty]] 184
+; CHECK: %[[Const184toPtr:[0-9]+]] = OpSpecConstantOp %[[GlobInt8PtrTy]] ConvertUToPtr %[[Const184]]
+; CHECK: %[[Const168toPtr:[0-9]+]] = OpSpecConstantOp %[[GlobInt8PtrTy]] ConvertUToPtr %[[Const168]]
+; CHECK: %[[ConstMinus16toPtr:[0-9]+]] = OpSpecConstantOp %[[GlobInt8PtrTy]] ConvertUToPtr %[[ConstMinus16]]
+; CHECK: %[[ConstMinus184toPtr:[0-9]+]] = OpSpecConstantOp %[[GlobInt8PtrTy]] ConvertUToPtr %[[ConstMinus184]]
+; CHECK: %[[Vtbl012:[0-9]+]] = OpConstantComposite %[[ArrTy]] %[[Const184toPtr]] %[[Nullptr]] %[[Nullptr]] %[[Nullptr]] %[[Nullptr]]
+; CHECK: %[[Vtbl3:[0-9]+]] = OpConstantComposite %[[ArrTy]] %[[Const168toPtr]] %[[ConstMinus16toPtr]] %[[Nullptr]] %[[Nullptr]] %[[Nullptr]]
+; CHECK: %[[Vtbl4:[0-9]+]] = OpConstantComposite %[[ArrTy]] %[[ConstMinus184toPtr]] %[[ConstMinus184toPtr]] %[[Nullptr]] %[[Nullptr]] %[[Nullptr]]
+; CHECK: %[[Vtbl:[0-9]+]] = OpConstantComposite %[[VtblTy]] %[[Vtbl012]] %[[Vtbl012]] %[[Vtbl012]] %[[Vtbl3]] %[[Vtbl4]]
+; CHECK: %[[#]] = OpVariable %[[GlobVtblPtrTy]] CrossWorkgroup %[[Vtbl]]
+
+@vtable = linkonce_odr unnamed_addr addrspace(1) constant { [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)] }
+ { [5 x ptr addrspace(1)] [ptr addrspace(1) inttoptr (i64 184 to ptr addrspace(1)), ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) null],
+ [5 x ptr addrspace(1)] [ptr addrspace(1) inttoptr (i64 184 to ptr addrspace(1)), ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) null],
+ [5 x ptr addrspace(1)] [ptr addrspace(1) inttoptr (i64 184 to ptr addrspace(1)), ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) null],
+ [5 x ptr addrspace(1)] [ptr addrspace(1) inttoptr (i64 168 to ptr addrspace(1)), ptr addrspace(1) inttoptr (i64 -16 to ptr addrspace(1)), ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) null],
+ [5 x ptr addrspace(1)] [ptr addrspace(1) inttoptr (i64 -184 to ptr addrspace(1)), ptr addrspace(1) inttoptr (i64 -184 to ptr addrspace(1)), ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) null] }
+
+define linkonce_odr spir_func void @foo(ptr addrspace(4) %this) {
+entry:
+ %0 = getelementptr inbounds i8, ptr addrspace(4) %this, i64 184
+ store ptr addrspace(1) getelementptr inbounds inrange(-24, 16) ({ [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)] }, ptr addrspace(1) @vtable, i32 0, i32 0, i32 3), ptr addrspace(4) %this
+ store ptr addrspace(1) getelementptr inbounds inrange(-24, 16) ({ [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)] }, ptr addrspace(1) @vtable, i32 0, i32 1, i32 3), ptr addrspace(4) %this
+ store ptr addrspace(1) getelementptr inbounds inrange(-24, 16) ({ [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)] }, ptr addrspace(1) @vtable, i32 0, i32 2, i32 3), ptr addrspace(4) %this
+ %add.ptr = getelementptr inbounds i8, ptr addrspace(4) %this, i64 184
+ store ptr addrspace(1) getelementptr inbounds inrange(-24, 16) ({ [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)] }, ptr addrspace(1) @vtable, i32 0, i32 4, i32 3), ptr addrspace(4) %add.ptr
+ %add.ptr2 = getelementptr inbounds i8, ptr addrspace(4) %this, i64 16
+ store ptr addrspace(1) getelementptr inbounds inrange(-24, 16) ({ [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)], [5 x ptr addrspace(1)] }, ptr addrspace(1) @vtable, i32 0, i32 3, i32 3), ptr addrspace(4) %add.ptr2
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/vec-load-element.ll b/llvm/test/CodeGen/SystemZ/vec-load-element.ll
index 2baaed19546d..9bef279d7c0f 100644
--- a/llvm/test/CodeGen/SystemZ/vec-load-element.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-load-element.ll
@@ -5,8 +5,8 @@
; CHECK-LABEL: .LBB0_1:
; CHECK-NOT: l %r
; CHECK-NOT: vlvgf
-; CHECK: pfd
-; CHECK: vlef
+; CHECK-DAG: pfd
+; CHECK-DAG: vlef
%type0 = type { i32, [400 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
@Mem = external global [150 x %type0], align 4
diff --git a/llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll b/llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll
new file mode 100644
index 000000000000..841c9a6d62d9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/reloc-none.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK
+
+define void @test_reloc_none() {
+; CHECK-LABEL: test_reloc_none:
+; CHECK: # %bb.0:
+; CHECK-NEXT: .Lreloc_none0:
+; CHECK-NEXT: .reloc .Lreloc_none0, BFD_RELOC_NONE, foo
+; CHECK-NEXT: retq
+ call void @llvm.reloc.none(metadata !"foo")
+ ret void
+}
+
+declare void @llvm.reloc.none(metadata)
diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
index 79849a7153c9..d9b463504225 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
@@ -94,8 +94,8 @@ define <32 x bfloat> @test_int_x86_avx10_maskz_sub_bf16_512(<32 x bfloat> %src,
;
; X86-LABEL: test_int_x86_avx10_maskz_sub_bf16_512:
; X86: # %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vsubbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5c,0xc2]
; X86-NEXT: vsubbf16 (%eax), %zmm1, %zmm1 # encoding: [0x62,0xf5,0x75,0x48,0x5c,0x08]
; X86-NEXT: vsubbf16 %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x5c,0xc1]
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
index 0f2c75b15d5b..01b7618753a2 100644
--- a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
@@ -147,8 +147,8 @@ define <16 x bfloat> @test_int_x86_avx10_maskz_sub_bf16_256(<16 x bfloat> %src,
;
; X86-LABEL: test_int_x86_avx10_maskz_sub_bf16_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vsubbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5c,0xc2]
; X86-NEXT: vsubbf16 (%eax), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x5c,0x08]
; X86-NEXT: vsubbf16 %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x5c,0xc1]
@@ -201,8 +201,8 @@ define <8 x bfloat> @test_int_x86_avx10_maskz_sub_bf16_128(<8 x bfloat> %src, <8
;
; X86-LABEL: test_int_x86_avx10_maskz_sub_bf16_128:
; X86: # %bb.0:
-; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vsubbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5c,0xc2]
; X86-NEXT: vsubbf16 (%eax), %xmm1, %xmm1 # encoding: [0x62,0xf5,0x75,0x08,0x5c,0x08]
; X86-NEXT: vsubbf16 %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x5c,0xc1]
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
index 32d225273a6e..9d31c298bfb9 100644
--- a/llvm/test/CodeGen/X86/bittest-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -1056,26 +1056,45 @@ define i32 @chain_reset_i256(ptr %p0, ptr %p1, ptr %p2, i32 %position) nounwind
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
-; X64-LABEL: chain_reset_i256:
-; X64: # %bb.0:
-; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
-; X64-NEXT: movl $-2, %eax
-; X64-NEXT: roll %cl, %eax
-; X64-NEXT: shrl $3, %ecx
-; X64-NEXT: andl $28, %ecx
-; X64-NEXT: andl %eax, (%rdi,%rcx)
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq 8(%rdi), %r8
-; X64-NEXT: orq 24(%rdi), %r8
-; X64-NEXT: movq 16(%rdi), %rdi
-; X64-NEXT: orq %rcx, %rdi
-; X64-NEXT: movl (%rsi), %eax
-; X64-NEXT: movl %ecx, (%rsi)
-; X64-NEXT: movl (%rdx), %ecx
-; X64-NEXT: addl %ecx, %eax
-; X64-NEXT: orq %r8, %rdi
-; X64-NEXT: cmovnel %ecx, %eax
-; X64-NEXT: retq
+; SSE-LABEL: chain_reset_i256:
+; SSE: # %bb.0:
+; SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; SSE-NEXT: movl $-2, %eax
+; SSE-NEXT: roll %cl, %eax
+; SSE-NEXT: shrl $3, %ecx
+; SSE-NEXT: andl $28, %ecx
+; SSE-NEXT: andl %eax, (%rdi,%rcx)
+; SSE-NEXT: movq (%rdi), %rcx
+; SSE-NEXT: movq 8(%rdi), %r8
+; SSE-NEXT: orq 24(%rdi), %r8
+; SSE-NEXT: movq 16(%rdi), %rdi
+; SSE-NEXT: orq %rcx, %rdi
+; SSE-NEXT: movl (%rsi), %eax
+; SSE-NEXT: movl %ecx, (%rsi)
+; SSE-NEXT: movl (%rdx), %ecx
+; SSE-NEXT: addl %ecx, %eax
+; SSE-NEXT: orq %r8, %rdi
+; SSE-NEXT: cmovnel %ecx, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: chain_reset_i256:
+; AVX: # %bb.0:
+; AVX-NEXT: # kill: def $ecx killed $ecx def $rcx
+; AVX-NEXT: movl $-2, %eax
+; AVX-NEXT: roll %cl, %eax
+; AVX-NEXT: shrl $3, %ecx
+; AVX-NEXT: andl $28, %ecx
+; AVX-NEXT: andl %eax, (%rdi,%rcx)
+; AVX-NEXT: vmovdqu (%rdi), %ymm0
+; AVX-NEXT: movl (%rdi), %ecx
+; AVX-NEXT: movl (%rsi), %eax
+; AVX-NEXT: movl %ecx, (%rsi)
+; AVX-NEXT: movl (%rdx), %ecx
+; AVX-NEXT: addl %ecx, %eax
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: cmovnel %ecx, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%rem = and i32 %position, 255
%ofs = zext nneg i32 %rem to i256
%bit = shl nuw i256 1, %ofs
diff --git a/llvm/test/CodeGen/X86/narrow-add-i64.ll b/llvm/test/CodeGen/X86/narrow-add-i64.ll
new file mode 100644
index 000000000000..a7a54fd57413
--- /dev/null
+++ b/llvm/test/CodeGen/X86/narrow-add-i64.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+define i64 @test_add_i64_i16_const(i16 %a) nounwind {
+; X86-LABEL: test_add_i64_i16_const:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $42, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: test_add_i64_i16_const:
+; X64: # %bb.0:
+; X64-NEXT: movzwl %di, %eax
+; X64-NEXT: addq $42, %rax
+; X64-NEXT: retq
+ %zext_a = zext i16 %a to i64
+ %sum = add nuw nsw i64 %zext_a, 42
+ ret i64 %sum
+}
+
+; TODO: First 48 bits are all zeros so we can safely truncate to 32 bit additon
+define i64 @test_add_i64_i16_zext(i16 %a, i16 %b) nounwind {
+; X86-LABEL: test_add_i64_i16_zext:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: test_add_i64_i16_zext:
+; X64: # %bb.0:
+; X64-NEXT: movzwl %di, %ecx
+; X64-NEXT: movzwl %si, %eax
+; X64-NEXT: addq %rcx, %rax
+; X64-NEXT: retq
+ %zext_a = zext i16 %a to i64
+ %zext_b = zext i16 %b to i64
+ %sum = add nuw nsw i64 %zext_a, %zext_b
+ ret i64 %sum
+}
+
+; Negative: Set the 32nd bit of a to force 64 bit addition, we do not truncate to 32 bit addition in this case
+define i64 @negative_test_add_i64_i16(i16 %a) nounwind {
+; X86-LABEL: negative_test_add_i64_i16:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $42, %eax
+; X86-NEXT: movl $1, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: negative_test_add_i64_i16:
+; X64: # %bb.0:
+; X64-NEXT: movzwl %di, %ecx
+; X64-NEXT: movabsq $4294967338, %rax # imm = 0x10000002A
+; X64-NEXT: addq %rcx, %rax
+; X64-NEXT: retq
+ %zext_a = zext i16 %a to i64
+ %or_a = or i64 %zext_a, 4294967296
+ %sum = add nuw nsw i64 %or_a, 42
+ ret i64 %sum
+}
+
+; Negative: We don't truncate to 32 bit addition in case of sign extension
+define i64 @negative_test_add_i64_i16_sext(i16 %a, i16 %b) nounwind {
+; X86-LABEL: negative_test_add_i64_i16_sext:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: adcl %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: negative_test_add_i64_i16_sext:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: movswq %di, %rcx
+; X64-NEXT: movswq %si, %rax
+; X64-NEXT: addq %rcx, %rax
+; X64-NEXT: retq
+ %sext_a = sext i16 %a to i64
+ %sext_b = sext i16 %b to i64
+ %sum = add nuw nsw i64 %sext_a, %sext_b
+ ret i64 %sum
+}
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
index 84c2cc6d5ec3..7735500bd3a8 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
@@ -168,8 +168,8 @@ define void @load_2byte_chunk_of_4byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca:
; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: movq (%rdi), %rax
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT: movq (%rdi), %rax
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movb %al, (%rdx)
@@ -188,17 +188,15 @@ define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-NO-BMI2-NO-SHLD-NEXT: pushl %edi
; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax
-; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
-; X86-NO-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx
+; X86-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %esi
+; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %ebx
; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
-; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
; X86-NO-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %edi
+; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi
; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
@@ -215,13 +213,11 @@ define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-NO-BMI2-HAVE-SHLD: # %bb.0:
; X86-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
-; X86-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movl (%edx), %esi
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %edx
; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi
; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx
; X86-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
@@ -236,14 +232,11 @@ define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx
-; X86-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
-; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx
+; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %esi
+; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%edx), %edx
; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
@@ -260,23 +253,19 @@ define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
;
; X86-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca:
; X86-HAVE-BMI2-HAVE-SHLD: # %bb.0:
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ebx
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%edx), %esi
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %edx
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax)
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movb %dl, (%eax)
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: retl
%init = load <8 x i8>, ptr %src, align 1
%byteOff.numbits = shl nuw nsw i64 %byteOff, 3
@@ -292,8 +281,8 @@ define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca:
; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: movq (%rdi), %rax
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT: movq (%rdi), %rax
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movw %ax, (%rdx)
@@ -312,17 +301,15 @@ define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-NO-BMI2-NO-SHLD-NEXT: pushl %edi
; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax
-; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi
-; X86-NO-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
+; X86-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %edi
+; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi
; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
-; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
; X86-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx
+; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
; X86-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx
; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
@@ -339,18 +326,16 @@ define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-NO-BMI2-HAVE-SHLD: # %bb.0:
; X86-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
-; X86-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
-; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
-; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movl (%edx), %esi
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %edx
+; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi
+; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx
; X86-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
-; X86-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax)
+; X86-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movw %dx, (%eax)
; X86-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
; X86-NO-BMI2-HAVE-SHLD-NEXT: retl
;
@@ -360,14 +345,11 @@ define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx
-; X86-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
-; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx
+; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %esi
+; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%edx), %edx
; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
@@ -386,18 +368,16 @@ define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-HAVE-BMI2-HAVE-SHLD: # %bb.0:
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%edx), %esi
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %edx
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax)
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movw %dx, (%eax)
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: retl
%init = load <8 x i8>, ptr %src, align 1
@@ -413,8 +393,8 @@ define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca:
; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: movq (%rdi), %rax
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT: movq (%rdi), %rax
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
@@ -433,17 +413,15 @@ define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-NO-BMI2-NO-SHLD-NEXT: pushl %edi
; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax
-; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi
-; X86-NO-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
+; X86-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %edi
+; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi
; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
-; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
; X86-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx
+; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
; X86-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx
; X86-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
@@ -460,18 +438,16 @@ define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-NO-BMI2-HAVE-SHLD: # %bb.0:
; X86-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
-; X86-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
-; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
-; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movl (%edx), %esi
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %edx
+; X86-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi
+; X86-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx
; X86-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
-; X86-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi
-; X86-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax)
+; X86-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx
+; X86-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax)
; X86-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
; X86-NO-BMI2-HAVE-SHLD-NEXT: retl
;
@@ -481,14 +457,11 @@ define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
; X86-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx
-; X86-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi
-; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx
+; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %esi
+; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%edx), %edx
; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
; X86-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
@@ -507,18 +480,16 @@ define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X86-HAVE-BMI2-HAVE-SHLD: # %bb.0:
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%edx), %esi
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %edx
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi
-; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax)
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx
+; X86-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax)
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
; X86-HAVE-BMI2-HAVE-SHLD-NEXT: retl
%init = load <8 x i8>, ptr %src, align 1
@@ -536,8 +507,8 @@ define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-NO-SHLD: # %bb.0:
-; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
@@ -557,8 +528,8 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
-; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -571,8 +542,8 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
;
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
@@ -591,8 +562,8 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -698,8 +669,8 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-NO-SHLD: # %bb.0:
-; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
@@ -719,8 +690,8 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
-; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -733,8 +704,8 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
;
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
@@ -753,8 +724,8 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -859,8 +830,8 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-NO-SHLD: # %bb.0:
-; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
@@ -880,8 +851,8 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
-; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -894,8 +865,8 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
;
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
@@ -914,8 +885,8 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -1020,8 +991,8 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-NO-SHLD: # %bb.0:
-; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
@@ -1041,8 +1012,8 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
-; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -1055,8 +1026,8 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
;
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
@@ -1075,8 +1046,8 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td
index 18960b43ab97..3170f2c06c00 100644
--- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td
+++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td
@@ -96,7 +96,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [
// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const {
// CHECK-NEXT: constexpr static uint8_t MatchTable0[] = {
-// CHECK-NEXT: /* 0 */ GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(99), GIMT_Encode2(211), /*)*//*default:*//*Label 5*/ GIMT_Encode4(524),
+// CHECK-NEXT: /* 0 */ GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(100), GIMT_Encode2(212), /*)*//*default:*//*Label 5*/ GIMT_Encode4(524),
// CHECK-NEXT: /* 10 */ /*TargetOpcode::G_STORE*//*Label 0*/ GIMT_Encode4(458), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
// CHECK-NEXT: /* 182 */ /*TargetOpcode::G_SEXT*//*Label 1*/ GIMT_Encode4(476), GIMT_Encode4(0),
// CHECK-NEXT: /* 190 */ /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(488), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0),
diff --git a/llvm/test/TableGen/get-named-operand-idx.td b/llvm/test/TableGen/get-named-operand-idx.td
index 4500ad1638c1..8bb4f2f68b5f 100644
--- a/llvm/test/TableGen/get-named-operand-idx.td
+++ b/llvm/test/TableGen/get-named-operand-idx.td
@@ -95,7 +95,8 @@ def InstD : InstBase {
// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0,
+// CHECK-NEXT: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2,
+// CHECK-NEXT: 0,
// CHECK-NEXT: };
// CHECK-NEXT: return InstructionIndex[Opcode];
// CHECK-NEXT: }
diff --git a/llvm/test/Transforms/InstCombine/known-bits-lerp-pattern.ll b/llvm/test/Transforms/InstCombine/known-bits-lerp-pattern.ll
new file mode 100644
index 000000000000..5a33d35aa1cf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/known-bits-lerp-pattern.ll
@@ -0,0 +1,181 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Test known bits refinements for pattern: a * (b - c) + c * d
+; where a > 0, c > 0, b > 0, d > 0, and b > c.
+; This pattern is a generalization of lerp and it appears frequently in graphics operations.
+
+define i32 @test_clamp(i8 %a, i8 %c, i8 %d) {
+; CHECK-LABEL: define i32 @test_clamp(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT: [[A32:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[C32:%.*]] = zext i8 [[C]] to i32
+; CHECK-NEXT: [[D32:%.*]] = zext i8 [[D]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = xor i32 [[C32]], 255
+; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i32 [[SUB]], [[A32]]
+; CHECK-NEXT: [[MUL2:%.*]] = mul nuw nsw i32 [[C32]], [[D32]]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[MUL1]], [[MUL2]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %a32 = zext i8 %a to i32
+ %c32 = zext i8 %c to i32
+ %d32 = zext i8 %d to i32
+ %sub = sub i32 255, %c32
+ %mul1 = mul i32 %a32, %sub
+ %mul2 = mul i32 %c32, %d32
+ %add = add i32 %mul1, %mul2
+ %cmp = icmp ugt i32 %add, 65535
+ %result = select i1 %cmp, i32 65535, i32 %add
+ ret i32 %result
+}
+
+define i1 @test_trunc_cmp(i8 %a, i8 %c, i8 %d) {
+; CHECK-LABEL: define i1 @test_trunc_cmp(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT: [[A32:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[C32:%.*]] = zext i8 [[C]] to i32
+; CHECK-NEXT: [[D32:%.*]] = zext i8 [[D]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = xor i32 [[C32]], 255
+; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i32 [[SUB]], [[A32]]
+; CHECK-NEXT: [[MUL2:%.*]] = mul nuw nsw i32 [[C32]], [[D32]]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[MUL1]], [[MUL2]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[ADD]], 1234
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a32 = zext i8 %a to i32
+ %c32 = zext i8 %c to i32
+ %d32 = zext i8 %d to i32
+ %sub = sub i32 255, %c32
+ %mul1 = mul i32 %a32, %sub
+ %mul2 = mul i32 %c32, %d32
+ %add = add i32 %mul1, %mul2
+ %trunc = trunc i32 %add to i16
+ %cmp = icmp eq i16 %trunc, 1234
+ ret i1 %cmp
+}
+
+define i1 @test_trunc_cmp_xor(i8 %a, i8 %c, i8 %d) {
+; CHECK-LABEL: define i1 @test_trunc_cmp_xor(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT: [[A32:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[C32:%.*]] = zext i8 [[C]] to i32
+; CHECK-NEXT: [[D32:%.*]] = zext i8 [[D]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = xor i32 [[C32]], 255
+; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i32 [[SUB]], [[A32]]
+; CHECK-NEXT: [[MUL2:%.*]] = mul nuw nsw i32 [[C32]], [[D32]]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[MUL1]], [[MUL2]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[ADD]], 1234
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a32 = zext i8 %a to i32
+ %c32 = zext i8 %c to i32
+ %d32 = zext i8 %d to i32
+ %sub = xor i32 255, %c32
+ %mul1 = mul i32 %a32, %sub
+ %mul2 = mul i32 %c32, %d32
+ %add = add i32 %mul1, %mul2
+ %trunc = trunc i32 %add to i16
+ %cmp = icmp eq i16 %trunc, 1234
+ ret i1 %cmp
+}
+
+define i1 @test_trunc_cmp_arbitrary_b(i8 %a, i8 %b, i8 %c, i8 %d) {
+; CHECK-LABEL: define i1 @test_trunc_cmp_arbitrary_b(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT: [[A32:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[B32:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: [[C32:%.*]] = zext i8 [[C]] to i32
+; CHECK-NEXT: [[D32:%.*]] = zext i8 [[D]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 [[B32]], [[C32]]
+; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i32 [[SUB]], [[A32]]
+; CHECK-NEXT: [[MUL2:%.*]] = mul nuw nsw i32 [[C32]], [[D32]]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[MUL1]], [[MUL2]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[ADD]], 1234
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a32 = zext i8 %a to i32
+ %b32 = zext i8 %b to i32
+ %c32 = zext i8 %c to i32
+ %d32 = zext i8 %d to i32
+ %sub = sub nsw nuw i32 %b32, %c32
+ %mul1 = mul i32 %a32, %sub
+ %mul2 = mul i32 %c32, %d32
+ %add = add i32 %mul1, %mul2
+ %trunc = trunc i32 %add to i16
+ %cmp = icmp eq i16 %trunc, 1234
+ ret i1 %cmp
+}
+
+
+define i1 @test_trunc_cmp_no_a(i8 %b, i8 %c, i8 %d) {
+; CHECK-LABEL: define i1 @test_trunc_cmp_no_a(
+; CHECK-SAME: i8 [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT: [[B32:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: [[C32:%.*]] = zext i8 [[C]] to i32
+; CHECK-NEXT: [[D32:%.*]] = zext i8 [[D]] to i32
+; CHECK-NEXT: [[MUL1:%.*]] = sub nuw nsw i32 [[B32]], [[C32]]
+; CHECK-NEXT: [[MUL2:%.*]] = mul nuw nsw i32 [[C32]], [[D32]]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[MUL1]], [[MUL2]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[ADD]], 1234
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %b32 = zext i8 %b to i32
+ %c32 = zext i8 %c to i32
+ %d32 = zext i8 %d to i32
+ %sub = sub nuw i32 %b32, %c32
+ %mul2 = mul i32 %c32, %d32
+ %add = add i32 %sub, %mul2
+ %trunc = trunc i32 %add to i16
+ %cmp = icmp eq i16 %trunc, 1234
+ ret i1 %cmp
+}
+
+define i1 @test_trunc_cmp_no_d(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i1 @test_trunc_cmp_no_d(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[A32:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[B32:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: [[C32:%.*]] = zext i8 [[C]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 [[B32]], [[C32]]
+; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i32 [[SUB]], [[A32]]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[MUL1]], [[C32]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[ADD]], 1234
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a32 = zext i8 %a to i32
+ %b32 = zext i8 %b to i32
+ %c32 = zext i8 %c to i32
+ %sub = sub nsw nuw i32 %b32, %c32
+ %mul1 = mul i32 %a32, %sub
+ %add = add i32 %mul1, %c32
+ %trunc = trunc i32 %add to i16
+ %cmp = icmp eq i16 %trunc, 1234
+ ret i1 %cmp
+}
+
+define i1 @test_trunc_cmp_xor_negative(i8 %a, i8 %c, i8 %d) {
+; CHECK-LABEL: define i1 @test_trunc_cmp_xor_negative(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) {
+; CHECK-NEXT: [[A32:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[C32:%.*]] = zext i8 [[C]] to i32
+; CHECK-NEXT: [[D32:%.*]] = zext i8 [[D]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = xor i32 [[C32]], 234
+; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i32 [[SUB]], [[A32]]
+; CHECK-NEXT: [[MUL2:%.*]] = mul nuw nsw i32 [[C32]], [[D32]]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[MUL1]], [[MUL2]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[ADD]] to i16
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[TRUNC]], 1234
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a32 = zext i8 %a to i32
+ %c32 = zext i8 %c to i32
+ %d32 = zext i8 %d to i32
+ %sub = xor i32 234, %c32
+ %mul1 = mul i32 %a32, %sub
+ %mul2 = mul i32 %c32, %d32
+ %add = add i32 %mul1, %mul2
+ ; We should keep the trunc in this case
+ %trunc = trunc i32 %add to i16
+ %cmp = icmp eq i16 %trunc, 1234
+ ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll b/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll
new file mode 100644
index 000000000000..953132309900
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -p instcombine -S %s | FileCheck %s
+
+define i64 @test_sink_with_dereferenceable_assume(ptr %p, ptr %q, i1 %cond) {
+; CHECK-LABEL: define i64 @test_sink_with_dereferenceable_assume(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
+; CHECK-NEXT: ret i64 [[DIFF]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: ret i64 0
+;
+entry:
+ %p_int = ptrtoint ptr %p to i64
+ %q_int = ptrtoint ptr %q to i64
+ %diff = sub i64 %q_int, %p_int
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
+ br i1 %cond, label %then, label %else
+
+then:
+ ret i64 %diff
+
+else:
+ ret i64 0
+}
+
+declare void @llvm.assume(i1 noundef)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
index 3c83c01929aa..7e58d9d6a8ec 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
@@ -59,13 +59,13 @@ define float @fmaxnum(ptr %src, i64 %n) {
; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP8]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD2]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP18]], [[TMP15]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -112,27 +112,84 @@ exit:
ret float %max.next
}
+; TODO: Could fold pairs of `fcmp uno` together.
define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) {
; CHECK-LABEL: define float @test_fmax_and_fmin(
; CHECK-SAME: ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MIN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[MIN_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MAX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV]]
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV]]
-; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[GEP_SRC_0]], align 4
-; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i32 4
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC_0]], align 4
+; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i32 4
+; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[GEP_SRC_1]], align 4
+; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP5]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI3]], <4 x float> [[WIDE_LOAD4]])
+; CHECK-NEXT: [[TMP6]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD5]])
+; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD6]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
+; CHECK-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD5]]
+; CHECK-NEXT: [[TMP9:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD6]]
+; CHECK-NEXT: [[TMP14:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP15:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP8]], [[TMP14]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP15]]
+; CHECK-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP12]]
+; CHECK-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP13]]
+; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[TMP21]]
+; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP19]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]]
+; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP19]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP7]]
+; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], <4 x float> [[VEC_PHI2]], <4 x float> [[TMP4]]
+; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], <4 x float> [[VEC_PHI3]], <4 x float> [[TMP5]]
+; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP19]], i64 [[IV]], i64 [[N_VEC]]
+; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP23]], <4 x float> [[TMP24]])
+; CHECK-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[RDX_MINMAX]])
+; CHECK-NEXT: [[RDX_MINMAX9:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP25]], <4 x float> [[TMP26]])
+; CHECK-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX9]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: [[TMP30:%.*]] = xor i1 [[TMP19]], true
+; CHECK-NEXT: [[TMP31:%.*]] = and i1 [[CMP_N]], [[TMP30]]
+; CHECK-NEXT: br i1 [[TMP31]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP27]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP28]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi float [ [[TMP29]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX8]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV1]]
+; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV1]]
+; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
+; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_3]], align 4
; CHECK-NEXT: [[MAX_NEXT]] = tail call noundef float @llvm.maxnum.f32(float [[MAX]], float [[L_0]])
; CHECK-NEXT: [[MIN_NEXT]] = tail call noundef float @llvm.minnum.f32(float [[MIN]], float [[L_1]])
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
-; CHECK-NEXT: [[MIN_NEXT_LCSSA:%.*]] = phi float [ [[MIN_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP29]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[MIN_NEXT_LCSSA:%.*]] = phi float [ [[MIN_NEXT]], %[[LOOP]] ], [ [[TMP28]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[SUB:%.*]] = fsub float [[MAX_NEXT_LCSSA]], [[MIN_NEXT_LCSSA]]
; CHECK-NEXT: ret float [[SUB]]
;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll
index 711a9cd03ac1..1cc4c152649b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll
@@ -59,13 +59,13 @@ define float @fminnum(ptr %src, i64 %n) {
; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP8]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD2]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll
index d80178fde45d..866487d2620e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll
@@ -70,3 +70,28 @@ loop:
exit:
ret i32 %red.next
}
+
+define i16 @test_incomplete_chain_without_mul(ptr noalias %dst, ptr %A, ptr %B) #0 {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %red = phi i16 [ 0, %entry ], [ %red.next, %loop ]
+ %l.a = load i8, ptr %A, align 1
+ %a.ext = zext i8 %l.a to i16
+ store i16 %a.ext, ptr %dst, align 2
+ %l.b = load i8, ptr %B, align 1
+ %b.ext = zext i8 %l.b to i16
+ %add = add i16 %red, %b.ext
+ %add.1 = add i16 %add, %a.ext
+ %red.next = add i16 %add.1, %b.ext
+ %iv.next = add i64 %iv, 1
+ %ec = icmp ult i64 %iv, 1024
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret i16 %red.next
+}
+
+attributes #0 = { "target-cpu"="grace" }
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
deleted file mode 100644
index c05878995f47..000000000000
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
+++ /dev/null
@@ -1,511 +0,0 @@
-; REQUIRES: asserts
-
-; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
-; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
-; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
-
-define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.smax(ir<[[LD1]]>, ir<[[LD2]]>)
-; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv
- %1 = load i32, ptr %gep3, align 4
- %. = tail call i32 @llvm.smax.i32(i32 %0, i32 %1)
- %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %., ptr %gep11, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.smin(ir<[[LD1]]>, ir<[[LD2]]>)
-; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv
- %1 = load i32, ptr %gep3, align 4
- %. = tail call i32 @llvm.smin.i32(i32 %0, i32 %1)
- %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %., ptr %gep11, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.umax(ir<[[LD1]]>, ir<[[LD2]]>)
-; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv
- %1 = load i32, ptr %gep3, align 4
- %. = tail call i32 @llvm.umax.i32(i32 %0, i32 %1)
- %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %., ptr %gep11, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.umin(ir<[[LD1]]>, ir<[[LD2]]>)
-; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv
- %1 = load i32, ptr %gep3, align 4
- %. = tail call i32 @llvm.umin.i32(i32 %0, i32 %1)
- %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %., ptr %gep11, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CTLZ:%.+]]> = call llvm.ctlz(ir<[[LD1]]>, ir<true>)
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTLZ]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %1 = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %0, i1 true)
- %gep3 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %1, ptr %gep3, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CTTZ:%.+]]> = call llvm.cttz(ir<[[LD1]]>, ir<true>)
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTTZ]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %1 = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 %0, i1 true)
- %gep3 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %1, ptr %gep3, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LRINT:%.+]]> = call llvm.lrint(ir<[[FPEXT]]>)
-; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LRINT]]> to i32
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds float, ptr %b, i64 %iv
- %0 = load float, ptr %gep, align 4
- %conv2 = fpext float %0 to double
- %1 = tail call i64 @llvm.lrint.i64.f64(double %conv2)
- %conv3 = trunc i64 %1 to i32
- %gep5 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %conv3, ptr %gep5, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LLRINT:%.+]]> = call llvm.llrint(ir<[[FPEXT]]>)
-; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LLRINT]]> to i32
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds float, ptr %b, i64 %iv
- %0 = load float, ptr %gep, align 4
- %conv2 = fpext float %0 to double
- %1 = tail call i64 @llvm.llrint.i64.f64(double %conv2)
- %conv3 = trunc i64 %1 to i32
- %gep5 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %conv3, ptr %gep5, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_abs(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[ABS:%.+]]> = call llvm.abs(ir<[[LD1]]>, ir<true>)
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ABS]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %cond = tail call i32 @llvm.abs.i32(i32 %0, i1 true)
- %gep9 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %cond, ptr %gep9, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-declare i32 @llvm.smax.i32(i32, i32)
-declare i32 @llvm.smin.i32(i32, i32)
-declare i32 @llvm.umax.i32(i32, i32)
-declare i32 @llvm.umin.i32(i32, i32)
-declare i32 @llvm.ctlz.i32(i32, i1 immarg)
-declare i32 @llvm.cttz.i32(i32, i1 immarg)
-declare i64 @llvm.lrint.i64.f64(double)
-declare i64 @llvm.llrint.i64.f64(double)
-declare i32 @llvm.abs.i32(i32, i1 immarg)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
deleted file mode 100644
index 8d3fe484e646..000000000000
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+++ /dev/null
@@ -1,576 +0,0 @@
-; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
-; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
-; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
-
-define void @vp_sext(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[SEXT:%.+]]> = sext ir<[[LD1]]> to i64
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SEXT]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-; IF-EVL-NEXT: Successor(s): middle.block
-
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %conv2 = sext i32 %0 to i64
- %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv
- store i64 %conv2, ptr %gep4, align 8
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_zext(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[ZEXT:%.+]]> = zext ir<[[LD1]]> to i64
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ZEXT]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %conv2 = zext i32 %0 to i64
- %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv
- store i64 %conv2, ptr %gep4, align 8
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LD1]]> to i16
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %conv2 = trunc i32 %0 to i16
- %gep4 = getelementptr inbounds i16, ptr %a, i64 %iv
- store i16 %conv2, ptr %gep4, align 2
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPEXT]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds float, ptr %b, i64 %iv
- %0 = load float, ptr %gep, align 4
- %conv2 = fpext float %0 to double
- %gep4 = getelementptr inbounds double, ptr %a, i64 %iv
- store double %conv2, ptr %gep4, align 8
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTRUNC:%.+]]> = fptrunc ir<[[LD1]]> to float
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTRUNC]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds double, ptr %b, i64 %iv
- %0 = load double, ptr %gep, align 8
- %conv2 = fptrunc double %0 to float
- %gep4 = getelementptr inbounds float, ptr %a, i64 %iv
- store float %conv2, ptr %gep4, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[SITOFP:%.+]]> = sitofp ir<[[LD1]]> to float
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SITOFP]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %conv2 = sitofp i32 %0 to float
- %gep4 = getelementptr inbounds float, ptr %a, i64 %iv
- store float %conv2, ptr %gep4, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[UITOFP:%.+]]> = uitofp ir<[[LD1]]> to float
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[UITOFP]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = load i32, ptr %gep, align 4
- %conv2 = uitofp i32 %0 to float
- %gep4 = getelementptr inbounds float, ptr %a, i64 %iv
- store float %conv2, ptr %gep4, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOSI:%.+]]> = fptosi ir<[[LD1]]> to i32
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOSI]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds float, ptr %b, i64 %iv
- %0 = load float, ptr %gep, align 4
- %conv2 = fptosi float %0 to i32
- %gep4 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %conv2, ptr %gep4, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOUI:%.+]]> = fptoui ir<[[LD1]]> to i32
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOUI]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
- %gep = getelementptr inbounds float, ptr %b, i64 %iv
- %0 = load float, ptr %gep, align 4
- %conv2 = fptoui float %0 to i32
- %gep4 = getelementptr inbounds i32, ptr %a, i64 %iv
- store i32 %conv2, ptr %gep4, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-;
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
-; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[INTTOPTR:%.+]]> = inttoptr ir<[[LD1]]> to ptr
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
-; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[INTTOPTR]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
-; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
- %gep = getelementptr inbounds i64, ptr %b, i64 %iv
- %0 = load i64, ptr %gep, align 8
- %1 = inttoptr i64 %0 to ptr
- %gep2 = getelementptr inbounds ptr, ptr %a, i64 %iv
- store ptr %1, ptr %gep2, align 8
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
-
-define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
-; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
-
-; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
-; IF-EVL-NEXT: Live-in vp<[[VFUF:%.+]]> = VF * UF
-; IF-EVL-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
-; IF-EVL-NEXT: Live-in ir<[[N:%.+]]> = original trip-count
-
-; IF-EVL: vector.ph:
-; IF-EVL-NEXT: Successor(s): vector loop
-
-; IF-EVL: <x1> vector loop: {
-; IF-EVL-NEXT: vector.body:
-; IF-EVL-NEXT: EMIT vp<[[INDEX:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[INDEX_NEXT:%.+]]>
-; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[INDEX_EVL:%.+]]> = phi ir<0>, vp<[[INDEX_EVL_NEXT:%.+]]>
-; IF-EVL-NEXT: ir<[[IV:%.+]]> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
-; IF-EVL-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[INDEX_EVL]]>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-GEP Inv[Var] ir<[[GEP:%.+]]> = getelementptr inbounds ir<%b>, ir<[[IV]]>
-; IF-EVL-NEXT: WIDEN-CAST ir<[[PTRTOINT:%.+]]> = ptrtoint ir<[[GEP]]> to i64
-; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[SCALAR_STEPS]]>
-; IF-EVL-NEXT: vp<[[VECTOR_PTR:%.+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[VECTOR_PTR]]>, ir<[[PTRTOINT]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT-SCALAR vp<[[ZEXT:%.+]]> = zext vp<[[EVL]]> to i64
-; IF-EVL-NEXT: EMIT vp<[[INDEX_EVL_NEXT]]> = add vp<[[ZEXT]]>, vp<[[INDEX_EVL]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[ZEXT]]>
-; IF-EVL-NEXT: EMIT vp<[[INDEX_NEXT]]> = add vp<[[INDEX]]>, vp<[[VFUF]]>
-; IF-EVL-NEXT: EMIT branch-on-count vp<[[INDEX_NEXT]]>, vp<[[VTC]]>
-; IF-EVL-NEXT: No successors
-; IF-EVL-NEXT: }
-; IF-EVL-NEXT: Successor(s): middle.block
-entry:
- br label %loop
-
-loop:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
- %gep = getelementptr inbounds i32, ptr %b, i64 %iv
- %0 = ptrtoint ptr %gep to i64
- %gep2 = getelementptr inbounds i64, ptr %a, i64 %iv
- store i64 %0, ptr %gep2, align 8
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %N
- br i1 %exitcond.not, label %exit, label %loop
-
-exit:
- ret void
-}
diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
index af648df9fc5c..01fab87209a3 100644
--- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
@@ -59,13 +59,13 @@ define float @fmaxnum(ptr %src, i64 %n) {
; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP8]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD2]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -116,23 +116,79 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) {
; CHECK-LABEL: define float @test_fmax_and_fmin(
; CHECK-SAME: ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MIN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[MIN_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MAX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV]]
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV]]
-; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[GEP_SRC_0]], align 4
-; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i32 4
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC_0]], align 4
+; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i32 4
+; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[GEP_SRC_1]], align 4
+; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP5]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI3]], <4 x float> [[WIDE_LOAD4]])
+; CHECK-NEXT: [[TMP6]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD5]])
+; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD6]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
+; CHECK-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD5]]
+; CHECK-NEXT: [[TMP9:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD6]]
+; CHECK-NEXT: [[TMP14:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP15:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP8]], [[TMP14]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP15]]
+; CHECK-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP12]]
+; CHECK-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP13]]
+; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[TMP21]]
+; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP19]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]]
+; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP19]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP7]]
+; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], <4 x float> [[VEC_PHI2]], <4 x float> [[TMP4]]
+; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], <4 x float> [[VEC_PHI3]], <4 x float> [[TMP5]]
+; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP19]], i64 [[IV]], i64 [[N_VEC]]
+; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP23]], <4 x float> [[TMP24]])
+; CHECK-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[RDX_MINMAX]])
+; CHECK-NEXT: [[RDX_MINMAX9:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP25]], <4 x float> [[TMP26]])
+; CHECK-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX9]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: [[TMP30:%.*]] = xor i1 [[TMP19]], true
+; CHECK-NEXT: [[TMP31:%.*]] = and i1 [[CMP_N]], [[TMP30]]
+; CHECK-NEXT: br i1 [[TMP31]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP27]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP28]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi float [ [[TMP29]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX8]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV1]]
+; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV1]]
+; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
+; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_3]], align 4
; CHECK-NEXT: [[MAX_NEXT]] = tail call noundef float @llvm.maxnum.f32(float [[MAX]], float [[L_0]])
; CHECK-NEXT: [[MIN_NEXT]] = tail call noundef float @llvm.minnum.f32(float [[MIN]], float [[L_1]])
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
-; CHECK-NEXT: [[MIN_NEXT_LCSSA:%.*]] = phi float [ [[MIN_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP29]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[MIN_NEXT_LCSSA:%.*]] = phi float [ [[MIN_NEXT]], %[[LOOP]] ], [ [[TMP28]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[SUB:%.*]] = fsub float [[MAX_NEXT_LCSSA]], [[MIN_NEXT_LCSSA]]
; CHECK-NEXT: ret float [[SUB]]
;
diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll
index 242df1fcf761..e028bec138fa 100644
--- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll
@@ -205,10 +205,10 @@ define float @fmaxnum_1(ptr %src, i64 %n) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -270,10 +270,10 @@ define float @fmaxnum_2(ptr %src, i64 %n) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -337,10 +337,10 @@ define float @fmaxnum_induction_starts_at_10(ptr %src, i64 %n) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: [[TMP3]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -405,10 +405,10 @@ define float @fmaxnum_induction_starts_at_value(ptr %src, i64 %start, i64 %n) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: [[TMP3]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -680,23 +680,62 @@ define float @test_fmax_and_fmax(ptr %src.0, ptr %src.1, i64 %n) {
; CHECK-LABEL: define float @test_fmax_and_fmax(
; CHECK-SAME: ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MIN:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[MIN_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MAX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV]]
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV]]
-; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[GEP_SRC_0]], align 4
-; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC_0]], align 4
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[GEP_SRC_1]], align 4
+; CHECK-NEXT: [[TMP2]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP3]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD2]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[TMP11]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]]
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP9]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP9]], i64 [[IV]], i64 [[N_VEC]]
+; CHECK-NEXT: [[TMP16:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP13]])
+; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP14]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP9]], true
+; CHECK-NEXT: [[TMP19:%.*]] = and i1 [[CMP_N]], [[TMP18]]
+; CHECK-NEXT: br i1 [[TMP19]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP15]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX3:%.*]] = phi float [ [[TMP17]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX3]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV1]]
+; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV1]]
+; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
+; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_3]], align 4
; CHECK-NEXT: [[MAX_NEXT]] = tail call noundef float @llvm.maxnum.f32(float [[MAX]], float [[L_0]])
; CHECK-NEXT: [[MIN_NEXT]] = tail call noundef float @llvm.minnum.f32(float [[MIN]], float [[L_1]])
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
-; CHECK-NEXT: [[MIN_NEXT_LCSSA:%.*]] = phi float [ [[MIN_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP17]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[MIN_NEXT_LCSSA:%.*]] = phi float [ [[MIN_NEXT]], %[[LOOP]] ], [ [[TMP16]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[SUB:%.*]] = fsub float [[MAX_NEXT_LCSSA]], [[MIN_NEXT_LCSSA]]
; CHECK-NEXT: ret float [[SUB]]
;
diff --git a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll
index 7f65306bcbe5..368553dc2a7d 100644
--- a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll
@@ -205,10 +205,10 @@ define float @fminnum_1(ptr %src, i64 %n) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -270,10 +270,10 @@ define float @fminnum_2(ptr %src, i64 %n) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
index 338d9259b635..33e3e83770e7 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
@@ -49,10 +49,10 @@ entry:
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 256) ]
%start.ptr = load ptr, ptr %first, align 8
%1 = load i64, ptr %first, align 8
- %coerce.val.pi.i = add i64 %1, 256
- %coerce.val.ip = inttoptr i64 %coerce.val.pi.i to ptr
- %cmp.not6.i.i = icmp eq ptr %start.ptr, %coerce.val.ip
- br i1 %cmp.not6.i.i, label %return, label %loop.ph
+ %coerce.val.p = add i64 %1, 256
+ %coerce.val.ip = inttoptr i64 %coerce.val.p to ptr
+ %ec6. = icmp eq ptr %start.ptr, %coerce.val.ip
+ br i1 %ec6., label %return, label %loop.ph
loop.ph:
%2 = load i16, ptr %s.addr, align 2
@@ -61,13 +61,13 @@ loop.ph:
loop.header:
%ptr.iv = phi ptr [ %start.ptr, %loop.ph ], [ %ptr.iv.next, %loop.latch ]
%3 = load i16, ptr %ptr.iv, align 2
- %cmp2.i.i = icmp eq i16 %3, %2
- br i1 %cmp2.i.i, label %return, label %loop.latch
+ %cmp2. = icmp eq i16 %3, %2
+ br i1 %cmp2., label %return, label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 2
- %cmp.not.i.i = icmp eq ptr %ptr.iv.next, %coerce.val.ip
- br i1 %cmp.not.i.i, label %return, label %loop.header
+ %ec. = icmp eq ptr %ptr.iv.next, %coerce.val.ip
+ br i1 %ec., label %return, label %loop.header
return:
%merge = phi ptr [ %start.ptr, %entry ], [ %coerce.val.ip, %loop.latch ], [ %ptr.iv, %loop.header ]
@@ -103,10 +103,10 @@ entry:
%0 = load ptr, ptr %first, align 8
%start.ptr = load ptr, ptr %first, align 8
%1 = load i64, ptr %first, align 8
- %coerce.val.pi.i = add i64 %1, 256
- %coerce.val.ip = inttoptr i64 %coerce.val.pi.i to ptr
- %cmp.not6.i.i = icmp eq ptr %start.ptr, %coerce.val.ip
- br i1 %cmp.not6.i.i, label %return, label %loop.ph
+ %coerce.val.p = add i64 %1, 256
+ %coerce.val.ip = inttoptr i64 %coerce.val.p to ptr
+ %ec6. = icmp eq ptr %start.ptr, %coerce.val.ip
+ br i1 %ec6., label %return, label %loop.ph
loop.ph:
%2 = load i16, ptr %s.addr, align 2
@@ -115,13 +115,13 @@ loop.ph:
loop.header:
%ptr.iv = phi ptr [ %start.ptr, %loop.ph ], [ %ptr.iv.next, %loop.latch ]
%3 = load i16, ptr %ptr.iv, align 2
- %cmp2.i.i = icmp eq i16 %3, %2
- br i1 %cmp2.i.i, label %return, label %loop.latch
+ %cmp2. = icmp eq i16 %3, %2
+ br i1 %cmp2., label %return, label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 2
- %cmp.not.i.i = icmp eq ptr %ptr.iv.next, %coerce.val.ip
- br i1 %cmp.not.i.i, label %return, label %loop.header
+ %ec. = icmp eq ptr %ptr.iv.next, %coerce.val.ip
+ br i1 %ec., label %return, label %loop.header
return:
%merge = phi ptr [ %start.ptr, %entry ], [ %coerce.val.ip, %loop.latch ], [ %ptr.iv, %loop.header ]
@@ -129,9 +129,118 @@ return:
ret i64 %res
}
+define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) {
+; CHECK-LABEL: define noundef ptr @std_find_caller(
+; CHECK-SAME: ptr noundef [[FIRST:%.*]], ptr noundef [[LAST:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ]
+; CHECK-NEXT: [[PRE_I:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
+; CHECK-NEXT: br i1 [[PRE_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT:.*]], label %[[LOOP_HEADER_I_PREHEADER:.*]]
+; CHECK: [[LOOP_HEADER_I_PREHEADER]]:
+; CHECK-NEXT: [[LAST2:%.*]] = ptrtoint ptr [[LAST]] to i64
+; CHECK-NEXT: [[FIRST3:%.*]] = ptrtoint ptr [[FIRST]] to i64
+; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64
+; CHECK-NEXT: [[FIRST1:%.*]] = ptrtoint ptr [[FIRST]] to i64
+; CHECK-NEXT: [[PTR_SUB:%.*]] = sub i64 [[LAST_I64]], [[FIRST1]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[PTR_SUB]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST2]], -2
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST3]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP1]], 6
+; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP4]], 6
+; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label %[[LOOP_HEADER_I_PROL_LOOPEXIT:.*]], label %[[LOOP_HEADER_I_PROL:.*]]
+; CHECK: [[LOOP_HEADER_I_PROL]]:
+; CHECK-NEXT: [[PTR_IV_I_PROL:%.*]] = phi ptr [ [[PTR_IV_NEXT_I_PROL:%.*]], %[[LOOP_LATCH_I_PROL:.*]] ], [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ]
+; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_I_PROL]] ], [ 0, %[[LOOP_HEADER_I_PREHEADER]] ]
+; CHECK-NEXT: [[L_I_PROL:%.*]] = load i16, ptr [[PTR_IV_I_PROL]], align 2
+; CHECK-NEXT: [[C_1_I_PROL:%.*]] = icmp eq i16 [[L_I_PROL]], 1
+; CHECK-NEXT: br i1 [[C_1_I_PROL]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_LATCH_I_PROL]]
+; CHECK: [[LOOP_LATCH_I_PROL]]:
+; CHECK-NEXT: [[PTR_IV_NEXT_I_PROL]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I_PROL]], i64 2
+; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[LOOP_HEADER_I_PROL_LOOPEXIT]], label %[[LOOP_HEADER_I_PROL]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[LOOP_HEADER_I_PROL_LOOPEXIT]]:
+; CHECK-NEXT: [[PTR_IV_I_UNR:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[PTR_IV_NEXT_I_PROL]], %[[LOOP_LATCH_I_PROL]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP1]], 6
+; CHECK-NEXT: br i1 [[TMP5]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I:.*]]
+; CHECK: [[LOOP_HEADER_I]]:
+; CHECK-NEXT: [[PTR_IV_I:%.*]] = phi ptr [ [[PTR_IV_NEXT_I_3:%.*]], %[[LOOP_LATCH_I_3:.*]] ], [ [[PTR_IV_I_UNR]], %[[LOOP_HEADER_I_PROL_LOOPEXIT]] ]
+; CHECK-NEXT: [[L_I:%.*]] = load i16, ptr [[PTR_IV_I]], align 2
+; CHECK-NEXT: [[C_1_I:%.*]] = icmp eq i16 [[L_I]], 1
+; CHECK-NEXT: br i1 [[C_1_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_LATCH_I:.*]]
+; CHECK: [[LOOP_LATCH_I]]:
+; CHECK-NEXT: [[PTR_IV_NEXT_I:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 2
+; CHECK-NEXT: [[L_I_1:%.*]] = load i16, ptr [[PTR_IV_NEXT_I]], align 2
+; CHECK-NEXT: [[C_1_I_1:%.*]] = icmp eq i16 [[L_I_1]], 1
+; CHECK-NEXT: br i1 [[C_1_I_1]], label %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT11:.*]], label %[[LOOP_LATCH_I_1:.*]]
+; CHECK: [[LOOP_LATCH_I_1]]:
+; CHECK-NEXT: [[PTR_IV_NEXT_I_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 4
+; CHECK-NEXT: [[L_I_2:%.*]] = load i16, ptr [[PTR_IV_NEXT_I_1]], align 2
+; CHECK-NEXT: [[C_1_I_2:%.*]] = icmp eq i16 [[L_I_2]], 1
+; CHECK-NEXT: br i1 [[C_1_I_2]], label %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT9:.*]], label %[[LOOP_LATCH_I_2:.*]]
+; CHECK: [[LOOP_LATCH_I_2]]:
+; CHECK-NEXT: [[PTR_IV_NEXT_I_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 6
+; CHECK-NEXT: [[L_I_3:%.*]] = load i16, ptr [[PTR_IV_NEXT_I_2]], align 2
+; CHECK-NEXT: [[C_1_I_3:%.*]] = icmp eq i16 [[L_I_3]], 1
+; CHECK-NEXT: br i1 [[C_1_I_3]], label %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT7:.*]], label %[[LOOP_LATCH_I_3]]
+; CHECK: [[LOOP_LATCH_I_3]]:
+; CHECK-NEXT: [[PTR_IV_NEXT_I_3]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 8
+; CHECK-NEXT: [[EC_I_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT_I_3]], [[LAST]]
+; CHECK-NEXT: br i1 [[EC_I_3]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I]]
+; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT7]]:
+; CHECK-NEXT: [[PTR_IV_NEXT_I_2_LE:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 6
+; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]]
+; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT9]]:
+; CHECK-NEXT: [[PTR_IV_NEXT_I_1_LE:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 4
+; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]]
+; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT11]]:
+; CHECK-NEXT: [[PTR_IV_NEXT_I_LE:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 2
+; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]]
+; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT]]:
+; CHECK-NEXT: [[RES_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[SCEVGEP]], %[[LOOP_HEADER_I_PROL_LOOPEXIT]] ], [ [[PTR_IV_NEXT_I_2_LE]], %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT7]] ], [ [[PTR_IV_NEXT_I_1_LE]], %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT9]] ], [ [[PTR_IV_NEXT_I_LE]], %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT11]] ], [ [[SCEVGEP]], %[[LOOP_LATCH_I_3]] ], [ [[PTR_IV_I]], %[[LOOP_HEADER_I]] ], [ [[PTR_IV_I_PROL]], %[[LOOP_HEADER_I_PROL]] ]
+; CHECK-NEXT: ret ptr [[RES_I]]
+;
+entry:
+ %last.i64 = ptrtoint ptr %last to i64
+ %first.i64 = ptrtoint ptr %first to i64
+ %ptr.sub = sub i64 %last.i64, %first.i64
+ call void @llvm.assume(i1 true) [ "align"(ptr %first, i64 2) ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %last, i64 2) ]
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %first, i64 %ptr.sub) ]
+ %call = call noundef ptr @std_find_generic_impl(ptr noundef nonnull %first, ptr noundef %last, i16 noundef signext 1)
+ ret ptr %call
+}
+
+define linkonce_odr noundef ptr @std_find_generic_impl(ptr noundef %first, ptr noundef %last, i16 noundef %value) {
+entry:
+ %pre = icmp eq ptr %first, %last
+ br i1 %pre, label %exit, label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %first, %entry ]
+ %l = load i16, ptr %ptr.iv, align 2
+ %c.1 = icmp eq i16 %l, %value
+ br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 2
+ %ec = icmp eq ptr %ptr.iv.next, %last
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %first, %entry ], [ %ptr.iv, %loop.header ], [ %ptr.iv.next, %loop.latch ]
+ ret ptr %res
+}
+
declare void @llvm.assume(i1 noundef)
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+; CHECK: [[META4]] = !{!"llvm.loop.unroll.disable"}
;.
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll
index 2c1d73eaafc5..9f3244ded92f 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll
@@ -498,11 +498,9 @@ define void @add_aggregate_store(<2 x float> %a0, <2 x float> %a1, <2 x float> %
; PR58139
define <2 x double> @_mm_complexmult_pd_naive(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: @_mm_complexmult_pd_naive(
-; SSE-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 1
-; SSE-NEXT: [[TMP1:%.*]] = fneg double [[B1]]
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
-; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TMP1]], i64 0
+; SSE-NEXT: [[TMP3:%.*]] = fneg <2 x double> [[B:%.*]]
+; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[B]], <2 x i32> <i32 1, i32 2>
; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
; SSE-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer
; SSE-NEXT: [[TMP7:%.*]] = tail call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> [[B]], <2 x double> [[TMP5]])
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll
index fa6403f3d426..de64bf2657f7 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll
@@ -502,11 +502,9 @@ define void @add_aggregate_store(<2 x float> %a0, <2 x float> %a1, <2 x float> %
; PR58139
define <2 x double> @_mm_complexmult_pd_naive(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: @_mm_complexmult_pd_naive(
-; SSE-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 1
-; SSE-NEXT: [[TMP1:%.*]] = fneg double [[B1]]
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
-; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TMP1]], i64 0
+; SSE-NEXT: [[TMP3:%.*]] = fneg <2 x double> [[B:%.*]]
+; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[B]], <2 x i32> <i32 1, i32 2>
; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
; SSE-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer
; SSE-NEXT: [[TMP7:%.*]] = tail call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> [[B]], <2 x double> [[TMP5]])
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll b/llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll
new file mode 100644
index 000000000000..65975199e46b
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define <4 x i32> @test() {
+; CHECK-LABEL: define <4 x i32> @test() {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT: br label %[[BB1:.*]]
+; CHECK: [[BB1]]:
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[TRUNC]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[TRUNC]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 0, i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[OR]] to i64
+; CHECK-NEXT: br label %[[BB3:.*]]
+; CHECK: [[BB3]]:
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+bb:
+ %trunc = trunc i64 0 to i32
+ br label %bb1
+
+bb1:
+ %or = or i32 %trunc, 0
+ %zext = zext i32 %or to i64
+ %and = and i32 0, 0
+ %or2 = or i32 %trunc, 0
+ br label %bb3
+
+bb3:
+ %0 = insertelement <4 x i32> zeroinitializer, i32 %trunc, i32 0
+ %1 = insertelement <4 x i32> %0, i32 %and, i32 1
+ %2 = insertelement <4 x i32> %1, i32 %or2, i32 2
+ %3 = insertelement <4 x i32> %2, i32 %or, i32 3
+ ret <4 x i32> %3
+}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
index 5358e0419e7a..88fcf359f7c8 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
@@ -58,6 +58,19 @@ define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
+define <2 x float> @ext2_v4f32v2f32(<4 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @ext2_v4f32v2f32(
+; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 poison, i32 3>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %e = extractelement <4 x float> %x, i32 3
+ %n = fneg float %e
+ %r = insertelement <2 x float> %y, float %n, i32 1
+ ret <2 x float> %r
+}
+
; Eliminating extract/insert is still profitable. Flags propagate.
define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
@@ -73,17 +86,11 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
}
define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
-; SSE-LABEL: @ext1_v2f64v4f64(
-; SSE-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; SSE-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; SSE-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
-; SSE-NEXT: ret <4 x double> [[R]]
-;
-; AVX-LABEL: @ext1_v2f64v4f64(
-; AVX-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; AVX-NEXT: ret <4 x double> [[R]]
+; CHECK-LABEL: @ext1_v2f64v4f64(
+; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x double> [[R]]
;
%e = extractelement <2 x double> %x, i32 1
%n = fneg nsz double %e
@@ -91,6 +98,19 @@ define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
ret <4 x double> %r
}
+define <2 x double> @ext1_v4f64v2f64(<4 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: @ext1_v4f64v2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <4 x double> [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <2 x i32> <i32 poison, i32 3>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x double> [[R]]
+;
+ %e = extractelement <4 x double> %x, i32 3
+ %n = fneg nsz double %e
+ %r = insertelement <2 x double> %y, double %n, i32 1
+ ret <2 x double> %r
+}
+
define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
; CHECK-LABEL: @ext7_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X:%.*]]
@@ -105,9 +125,9 @@ define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) {
; CHECK-LABEL: @ext7_v4f32v8f32(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
+; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x float> [[Y:%.*]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
; CHECK-NEXT: ret <8 x float> [[R]]
;
%e = extractelement <4 x float> %x, i32 3
@@ -116,6 +136,19 @@ define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
+define <4 x float> @ext7_v8f32v4f32(<8 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @ext7_v8f32v4f32(
+; CHECK-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 7>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: ret <4 x float> [[R]]
+;
+ %e = extractelement <8 x float> %x, i32 7
+ %n = fneg float %e
+ %r = insertelement <4 x float> %y, float %n, i32 3
+ ret <4 x float> %r
+}
+
; Same as above with an extra use of the extracted element.
define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
@@ -141,12 +174,20 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
}
define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32_use1(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT: call void @use(float [[E]])
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
-; CHECK-NEXT: ret <8 x float> [[R]]
+; SSE-LABEL: @ext7_v4f32v8f32_use1(
+; SSE-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
+; SSE-NEXT: call void @use(float [[E]])
+; SSE-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X]]
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[R:%.*]] = shufflevector <8 x float> [[Y:%.*]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT: ret <8 x float> [[R]]
+;
+; AVX-LABEL: @ext7_v4f32v8f32_use1(
+; AVX-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
+; AVX-NEXT: call void @use(float [[E]])
+; AVX-NEXT: [[N:%.*]] = fneg float [[E]]
+; AVX-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
+; AVX-NEXT: ret <8 x float> [[R]]
;
%e = extractelement <4 x float> %x, i32 3
call void @use(float %e)
@@ -155,6 +196,29 @@ define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
+define <4 x float> @ext7_v8f32v4f32_use1(<8 x float> %x, <4 x float> %y) {
+; SSE-LABEL: @ext7_v8f32v4f32_use1(
+; SSE-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
+; SSE-NEXT: call void @use(float [[E]])
+; SSE-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X]]
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 7>
+; SSE-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; SSE-NEXT: ret <4 x float> [[R]]
+;
+; AVX-LABEL: @ext7_v8f32v4f32_use1(
+; AVX-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
+; AVX-NEXT: call void @use(float [[E]])
+; AVX-NEXT: [[N:%.*]] = fneg float [[E]]
+; AVX-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 3
+; AVX-NEXT: ret <4 x float> [[R]]
+;
+ %e = extractelement <8 x float> %x, i32 7
+ call void @use(float %e)
+ %n = fneg float %e
+ %r = insertelement <4 x float> %y, float %n, i32 3
+ ret <4 x float> %r
+}
+
; Negative test - the transform is likely not profitable if the fneg has another use.
define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
@@ -187,6 +251,21 @@ define <8 x float> @ext7_v4f32v8f32_use2(<4 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
+define <4 x float> @ext7_v8f32v4f32_use2(<8 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @ext7_v8f32v4f32_use2(
+; CHECK-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
+; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
+; CHECK-NEXT: call void @use(float [[N]])
+; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 3
+; CHECK-NEXT: ret <4 x float> [[R]]
+;
+ %e = extractelement <8 x float> %x, i32 7
+ %n = fneg float %e
+ call void @use(float %n)
+ %r = insertelement <4 x float> %y, float %n, i32 3
+ ret <4 x float> %r
+}
+
; Negative test - can't convert variable index to a shuffle.
define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %index) {
@@ -215,14 +294,10 @@ define <4 x double> @ext_index_var_v2f64v4f64(<2 x double> %x, <4 x double> %y,
ret <4 x double> %r
}
-; Negative test - require same extract/insert index for simple shuffle.
-; TODO: We could handle this by adjusting the cost calculation.
-
define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
; CHECK-LABEL: @ext1_v2f64_ins0(
-; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 3, i32 1>
; CHECK-NEXT: ret <2 x double> [[R]]
;
%e = extractelement <2 x double> %x, i32 1
@@ -231,12 +306,11 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
-; Negative test - extract from an index greater than the vector width of the destination
define <2 x double> @ext3_v4f64v2f64(<4 x double> %x, <2 x double> %y) {
; CHECK-LABEL: @ext3_v4f64v2f64(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x double> [[X:%.*]], i32 3
-; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <4 x double> [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <2 x i32> <i32 poison, i32 3>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: ret <2 x double> [[R]]
;
%e = extractelement <4 x double> %x, i32 3
@@ -246,11 +320,17 @@ define <2 x double> @ext3_v4f64v2f64(<4 x double> %x, <2 x double> %y) {
}
define <4 x double> @ext1_v2f64v4f64_ins0(<2 x double> %x, <4 x double> %y) {
-; CHECK-LABEL: @ext1_v2f64v4f64_ins0(
-; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0
-; CHECK-NEXT: ret <4 x double> [[R]]
+; SSE-LABEL: @ext1_v2f64v4f64_ins0(
+; SSE-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; SSE-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 5, i32 1, i32 2, i32 3>
+; SSE-NEXT: ret <4 x double> [[R]]
+;
+; AVX-LABEL: @ext1_v2f64v4f64_ins0(
+; AVX-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
+; AVX-NEXT: [[N:%.*]] = fneg nsz double [[E]]
+; AVX-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0
+; AVX-NEXT: ret <4 x double> [[R]]
;
%e = extractelement <2 x double> %x, i32 1
%n = fneg nsz double %e
diff --git a/llvm/test/Verifier/reloc-none.ll b/llvm/test/Verifier/reloc-none.ll
new file mode 100644
index 000000000000..9c96799a36a3
--- /dev/null
+++ b/llvm/test/Verifier/reloc-none.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as -disable-output 2>&1 %s | FileCheck %s
+
+; CHECK: llvm.reloc.none argument must be a metadata string
+; CHECK-NEXT: call void @llvm.reloc.none(metadata !0)
+
+define void @test_reloc_none_bad_arg() {
+ call void @llvm.reloc.none(metadata !0)
+ ret void
+}
+
+declare void @llvm.reloc.none(metadata)
+
+!0 = !{}
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 35ea8b84d7ec..d30cd2002d36 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -233,6 +233,7 @@ tools.extend(
"llvm-addr2line",
"llvm-bcanalyzer",
"llvm-bitcode-strip",
+ "llvm-cas",
"llvm-cgdata",
"llvm-config",
"llvm-cov",
@@ -796,6 +797,9 @@ if config.have_opt_viewer_modules:
if config.expensive_checks:
config.available_features.add("expensive_checks")
+if config.have_ondisk_cas:
+ config.available_features.add("ondisk_cas")
+
if "MemoryWithOrigins" in config.llvm_use_sanitizer:
config.available_features.add("use_msan_with_origins")
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
index 973e0ec934a5..c5cb7160a3d4 100644
--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
@@ -66,6 +66,7 @@ config.spirv_tools_tests = @LLVM_INCLUDE_SPIRV_TOOLS_TESTS@
config.have_vc_rev = @LLVM_APPEND_VC_REV@
config.force_vc_rev = "@LLVM_FORCE_VC_REVISION@"
config.has_logf128 = @LLVM_HAS_LOGF128@
+config.have_ondisk_cas = @LLVM_ENABLE_ONDISK_CAS@
import lit.llvm
lit.llvm.initialize(lit_config, config)
diff --git a/llvm/test/tools/dsymutil/Inputs/typedefs-with-same-name.o b/llvm/test/tools/dsymutil/Inputs/typedefs-with-same-name.o
new file mode 100644
index 000000000000..6cc47c1a783b
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/typedefs-with-same-name.o
Binary files differ
diff --git a/llvm/test/tools/dsymutil/X86/DWARFLinkerParallel/odr-fwd-declaration.test b/llvm/test/tools/dsymutil/X86/DWARFLinkerParallel/odr-fwd-declaration.test
index d028194f7e83..fd15ce3e1897 100644
--- a/llvm/test/tools/dsymutil/X86/DWARFLinkerParallel/odr-fwd-declaration.test
+++ b/llvm/test/tools/dsymutil/X86/DWARFLinkerParallel/odr-fwd-declaration.test
@@ -35,14 +35,14 @@ void foo() { Sptrptr ptr1 = 0; }
// CHECK: DW_TAG_member
// CHECK-NEXT: DW_AT_name{{.*}}"field"
-// CHECK: 0x[[TYPEDEF_PTR_S]]: DW_TAG_typedef
-// CHECK-NEXT: DW_AT_type{{.*}}{0x[[PTR_S]]} "S *"
-// CHECK-NEXT: DW_AT_name{{.*}}"Sptr"
-
// CHECK: 0x[[TYPEDEF_PTR_PTR_S:[a-f0-9]*]]: DW_TAG_typedef
// CHECK-NEXT: DW_AT_type{{.*}}{0x[[PTR_PTR_S]]} "Sptr *"
// CHECK-NEXT: DW_AT_name{{.*}}"Sptrptr"
+// CHECK: 0x[[TYPEDEF_PTR_S]]: DW_TAG_typedef
+// CHECK-NEXT: DW_AT_type{{.*}}{0x[[PTR_S]]} "S *"
+// CHECK-NEXT: DW_AT_name{{.*}}"Sptr"
+
// First we confirm that first compile unit properly references type.
//
// CHECK: DW_TAG_compile_unit
diff --git a/llvm/test/tools/dsymutil/typedefs-with-same-name.test b/llvm/test/tools/dsymutil/typedefs-with-same-name.test
new file mode 100644
index 000000000000..2312f820b045
--- /dev/null
+++ b/llvm/test/tools/dsymutil/typedefs-with-same-name.test
@@ -0,0 +1,41 @@
+#RUN: dsymutil --linker=parallel -f -oso-prepend-path=%p/Inputs/ -y %s -o %t.dwarf
+#RUN: llvm-dwarfdump %t.dwarf | FileCheck %s
+
+# There should be two typedef DIE named "BarInt" in the resultant .dwarf file.
+# The second should refer to the first, which refer to "Foo<int>".
+# CHECK: 0x[[FIRST_BARINT_ADDR:[0-9a-f]*]]: DW_TAG_typedef
+# CHECK-NEXT: DW_AT_type (0x{{([[:xdigit:]]*)}} "Foo<int>")
+# CHECK-NEXT: DW_AT_name ("BarInt")
+# CHECK: 0x{{([[:xdigit:]]*)}}: DW_TAG_typedef
+# CHECK-NEXT: DW_AT_type (0x[[FIRST_BARINT_ADDR]] "BarInt")
+# CHECK-NEXT: DW_AT_name ("BarInt")
+
+# Source:
+#
+# template <typename T> struct Foo;
+# typedef Foo<int> BarInt;
+# template <typename T>
+# struct [[clang::preferred_name(BarInt)]] Foo{};
+# int main() {
+# BarInt barInt;
+# return 0;
+# }
+#
+# Compile with:
+#
+# $ clang++ -g -O0 -c typedefs-with-same-name.cpp -o typedefs-with-same-name.o
+#
+# To generate the debug map:
+#
+# $ clang++ typedefs-with-same-name.o -o typedefs-with-same-name
+# $ dsymutil -dump-debug-map typedefs-with-same-name
+
+---
+triple: 'arm64-apple-darwin'
+objects:
+ - filename: '/typedefs-with-same-name.o'
+ timestamp: 1762438746
+ type: 102
+ symbols:
+ - { sym: _main, objAddr: 0x0, binAddr: 0x100000360, size: 0x14 }
+...
diff --git a/llvm/test/tools/dxil-dis/llvm_assume.ll b/llvm/test/tools/dxil-dis/llvm_assume.ll
deleted file mode 100644
index f5be66c0d192..000000000000
--- a/llvm/test/tools/dxil-dis/llvm_assume.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc --filetype=obj %s -o - | dxil-dis -o - | FileCheck %s
-
-target triple = "dxil-pc-shadermodel6.7-library"
-
-define void @test_llvm_assume(i1 %0) {
-; CHECK-LABEL: test_llvm_assume
-; CHECK-NEXT: tail call void @llvm.assume(i1 %0)
-tail call void @llvm.assume(i1 %0)
-ret void
-}
-
diff --git a/llvm/test/tools/llvm-cas/Inputs/oneline b/llvm/test/tools/llvm-cas/Inputs/oneline
new file mode 100644
index 000000000000..d95f3ad14dee
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/Inputs/oneline
@@ -0,0 +1 @@
+content
diff --git a/llvm/test/tools/llvm-cas/Inputs/oneline-nonewline b/llvm/test/tools/llvm-cas/Inputs/oneline-nonewline
new file mode 100644
index 000000000000..6b584e8ece56
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/Inputs/oneline-nonewline
@@ -0,0 +1 @@
+content \ No newline at end of file
diff --git a/llvm/test/tools/llvm-cas/action-cache.test b/llvm/test/tools/llvm-cas/action-cache.test
new file mode 100644
index 000000000000..fcb212c24e21
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/action-cache.test
@@ -0,0 +1,14 @@
+RUN: rm -rf %t %t.cas
+RUN: mkdir %t
+
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN: --data %S/Inputs/oneline >%t/oneline.casid
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN: --data %S/Inputs/oneline-nonewline >%t/oneline-nonewline.casid
+
+RUN: llvm-cas --cas %t.cas --put-cache-key @%t/oneline.casid @%t/oneline-nonewline.casid
+RUN: llvm-cas --cas %t.cas --get-cache-result @%t/oneline.casid > %t/result.casid
+RUN: diff %t/oneline-nonewline.casid %t/result.casid
+
+RUN: not llvm-cas --cas %t.cas --get-cache-result @%t/oneline-nonewline.casid 2>&1 | FileCheck %s
+CHECK: result not found
diff --git a/llvm/test/tools/llvm-cas/cache.test b/llvm/test/tools/llvm-cas/cache.test
new file mode 100644
index 000000000000..f0ce69190d41
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/cache.test
@@ -0,0 +1,14 @@
+RUN: rm -rf %t %t.cas
+RUN: mkdir %t
+
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN: --data /dev/null > %t/empty.casid
+RUN: echo "abc" | \
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN: --data - >%t/abc.casid
+
+RUN: llvm-cas --cas %t/cas --put-cache-key @%t/abc.casid @%t/empty.casid
+RUN: llvm-cas --cas %t/cas --get-cache-result @%t/abc.casid > %t/empty2.casid
+RUN: diff %t/empty.casid %t/empty2.casid
+
+RUN: not llvm-cas --cas %t/cas --get-cache-result @%t/empty.casid
diff --git a/llvm/test/tools/llvm-cas/dump.test b/llvm/test/tools/llvm-cas/dump.test
new file mode 100644
index 000000000000..f23bac6cdf84
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/dump.test
@@ -0,0 +1,27 @@
+RUN: rm -rf %t
+RUN: mkdir %t
+
+RUN: llvm-cas --cas %t/cas --make-blob \
+RUN: --data - </dev/null
+
+RUN: llvm-cas --cas %t/cas --make-blob \
+RUN: --data %s
+
+RUN: llvm-cas --cas %t/cas --dump | FileCheck %s
+
+// check the dump format.
+CHECK: index:
+CHECK-NEXT: hash-num-bits=
+CHECK-NEXT: root addr=
+// it should has at least one index
+CHECK-NEXT: - index=
+
+// two records
+CHECK: record
+CHECK-NEXT: - addr=
+CHECK-NEXT: - addr=
+
+// both should be small enough to be in data pool
+CHECK: pool:
+CHECK-NEXT: - addr=
+CHECK-NEXT: - addr=
diff --git a/llvm/test/tools/llvm-cas/lit.local.cfg b/llvm/test/tools/llvm-cas/lit.local.cfg
new file mode 100644
index 000000000000..379945b68925
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/lit.local.cfg
@@ -0,0 +1,2 @@
+if not config.have_ondisk_cas:
+ config.unsupported = True
diff --git a/llvm/test/tools/llvm-cas/make-blob.test b/llvm/test/tools/llvm-cas/make-blob.test
new file mode 100644
index 000000000000..532a3a3351f8
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/make-blob.test
@@ -0,0 +1,41 @@
+RUN: rm -rf %t %t.cas
+RUN: mkdir %t
+
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN: --data - </dev/null >%t/empty.casid
+RUN: sed -e 's,^.,CHECK: ,' <%t/empty.casid >%t/empty.check
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN: --data /dev/null | FileCheck %t/empty.check
+RUN: echo "abc" | \
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN: --data - >%t/abc.casid
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN: --data %S/Inputs/oneline >%t/oneline.casid
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN: --data %S/Inputs/oneline-nonewline >%t/oneline-nonewline.casid
+
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/empty.casid |\
+RUN: FileCheck %s -check-prefix CHECK-EMPTY -allow-empty
+CHECK-EMPTY-NOT: {{.}}
+
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/abc.casid |\
+RUN: FileCheck %s -check-prefix CHECK-ABC
+CHECK-ABC: abc
+
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline-nonewline.casid |\
+RUN: FileCheck %s -check-prefix CHECK-ONELINE
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline.casid |\
+RUN: FileCheck %s -check-prefix CHECK-ONELINE
+CHECK-ONELINE: content
+
+# Double-check newlines.
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline-nonewline.casid \
+RUN: >%t/oneline-nonewline
+RUN: diff %S/Inputs/oneline-nonewline %t/oneline-nonewline
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline.casid \
+RUN: >%t/oneline
+RUN: diff %S/Inputs/oneline %t/oneline
+
+# Validate
+RUN: llvm-cas --cas %t.cas --validate-object @%t/oneline-nonewline.casid
+RUN: llvm-cas --cas %t.cas --validate-object @%t/oneline.casid
diff --git a/llvm/test/tools/llvm-cas/make-node.test b/llvm/test/tools/llvm-cas/make-node.test
new file mode 100644
index 000000000000..de548af8fa2b
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/make-node.test
@@ -0,0 +1,37 @@
+RUN: rm -rf %t
+RUN: mkdir %t
+
+# Make some empty objects.
+RUN: llvm-cas --cas %t/cas --make-node \
+RUN: --data - </dev/null >%t/empty.casid
+
+RUN: llvm-cas --cas %t/cas --cat-node-data @%t/empty.casid |\
+RUN: FileCheck %s -check-prefix CHECK-EMPTY -allow-empty
+RUN: llvm-cas --cas %t/cas --ls-node-refs @%t/empty.casid |\
+RUN: FileCheck %s -check-prefix CHECK-EMPTY -allow-empty
+CHECK-EMPTY-NOT: {{.}}
+
+# Make a complex object, which references existing ones. Reference a blob and
+# other objects, and reference one of them twice to be sure they don't get
+# deduped.
+RUN: llvm-cas --cas %t/cas --make-blob --data /dev/null \
+RUN: >%t/empty-blob.casid
+RUN: cat %t/empty.casid %t/empty.casid %t/empty-blob.casid \
+RUN: >%t/complex.refs
+RUN: cat %t/complex.refs | sed -e 's,^.,CHECK: ,' > %t/complex.check
+RUN: llvm-cas --cas %t/cas --make-node \
+RUN: --data %S/Inputs/oneline @%t/complex.refs \
+RUN: >%t/complex.casid
+RUN: llvm-cas --cas %t/cas --cat-node-data \
+RUN: @%t/complex.casid | FileCheck %s -check-prefix COMPLEX-DATA
+RUN: llvm-cas --cas %t/cas --ls-node-refs @%t/complex.casid |\
+RUN: FileCheck %t/complex.check
+COMPLEX-DATA: content
+
+RUN: llvm-cas --cas %t/cas --validate-object @%t/complex.casid
+
+# Import from a new CAS.
+RUN: llvm-cas --cas %t/new-cas --upstream-cas %t/cas --import @%t/complex.casid
+RUN: llvm-cas --cas %t/new-cas --cat-node-data \
+RUN: @%t/complex.casid | FileCheck %s -check-prefix COMPLEX-DATA
+RUN: llvm-cas --cas %t/new-cas --validate
diff --git a/llvm/test/tools/llvm-cas/print-id.test b/llvm/test/tools/llvm-cas/print-id.test
new file mode 100644
index 000000000000..5a2efd58dde1
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/print-id.test
@@ -0,0 +1,13 @@
+RUN: rm -rf %t
+RUN: mkdir %t
+
+RUN: llvm-cas --cas %t/cas --make-blob --data %S/Inputs/oneline > %t/id
+
+# Confirm that the ID has the right prefix, is well-formed, and that there's
+# nothing else on the line.
+RUN: FileCheck %s --match-full-lines --strict-whitespace <%t/id
+CHECK:llvmcas://{{[a-z0-9]+}}
+
+# Confirm that there's a newline after.
+RUN: wc -l <%t/id | FileCheck %s -check-prefix=NEWLINE
+NEWLINE: 1
diff --git a/llvm/test/tools/llvm-cas/validation.test b/llvm/test/tools/llvm-cas/validation.test
new file mode 100644
index 000000000000..13f24f087346
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/validation.test
@@ -0,0 +1,31 @@
+RUN: rm -rf %t
+RUN: mkdir %t
+
+# Ingest a blob which just fits inside the CAS data pool to make sure the validate passes.
+RUN: truncate -s 7 %t/file
+RUN: cat %t/file | \
+RUN: llvm-cas --cas %t/cas --make-blob \
+RUN: --data -
+RUN: llvm-cas --cas %t/cas --validate --check-hash
+
+RUN: llvm-cas --cas %t/cas --validate
+RUN: llvm-cas --cas %t/cas --validate --check-hash
+
+RUN: rm %t/cas/v1.1/data.v1
+RUN: not llvm-cas --cas %t/cas --validate
+RUN: not llvm-cas --cas %t/cas --validate --check-hash
+
+RUN: mkdir %t/ac
+
+RUN: llvm-cas --cas %t/ac --make-blob \
+RUN: --data /dev/null > %t/empty.casid
+RUN: echo "abc" | \
+RUN: llvm-cas --cas %t/ac --make-blob \
+RUN: --data - >%t/abc.casid
+
+RUN: llvm-cas --cas %t/ac --put-cache-key @%t/abc.casid @%t/empty.casid
+RUN: llvm-cas --cas %t/ac --validate
+# Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last
+# allocated record, leaving it invalid.
+RUN: truncate -s -40 %t/ac/v1.1/actions.v1
+RUN: not llvm-cas --cas %t/ac --validate
diff --git a/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt b/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt
index 141a56ad1090..ec061ff9185f 100644
--- a/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt
+++ b/llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt
@@ -1,33 +1,33 @@
MAX_RELATION=4
-187 7051 1
-187 6948 2
-187 187 0
-187 7051 1
+187 7052 1
187 6949 2
+187 187 0
+187 7052 1
+187 6950 2
187 10 0
-10 7051 1
-10 7051 2
-10 7051 3
-10 6941 4
+10 7052 1
+10 7052 2
+10 7052 3
+10 6942 4
10 187 0
-187 6932 1
-187 7051 2
-187 1543 0
-1543 6862 1
-1543 6932 2
-187 7051 1
-187 6948 2
-187 187 0
-187 7051 1
+187 6933 1
+187 7052 2
+187 1544 0
+1544 6863 1
+1544 6933 2
+187 7052 1
187 6949 2
+187 187 0
+187 7052 1
+187 6950 2
187 601 0
-601 7051 1
-601 7051 2
-601 7051 3
-601 6941 4
+601 7052 1
+601 7052 2
+601 7052 3
+601 6942 4
601 187 0
-187 6932 1
-187 7051 2
-187 1543 0
-1543 6862 1
-1543 6932 2
+187 6933 1
+187 7052 2
+187 1544 0
+1544 6863 1
+1544 6933 2
diff --git a/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt b/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt
index dbbbbc746a76..1b90a8a75a80 100644
--- a/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt
+++ b/llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt
@@ -1,4 +1,4 @@
-7151
+7152
AAA 0
AAD 1
AADD 2
@@ -1532,5621 +1532,5622 @@ RDSSPQ 1529
RDTSC 1530
RDTSCP 1531
REG_SEQUENCE 1532
-REPNE_PREFIX 1533
-REP_MOVSB 1534
-REP_MOVSD 1535
-REP_MOVSQ 1536
-REP_MOVSW 1537
-REP_PREFIX 1538
-REP_STOSB 1539
-REP_STOSD 1540
-REP_STOSQ 1541
-REP_STOSW 1542
-RET 1543
-RETI 1544
-REX 1545
-RMPADJUST 1546
-RMPQUERY 1547
-RMPUPDATE 1548
-ROL 1549
-ROR 1550
-RORX 1551
-ROUNDPDmi 1552
-ROUNDPDri 1553
-ROUNDPSmi 1554
-ROUNDPSri 1555
-ROUNDSDmi 1556
-ROUNDSDmi_Int 1557
-ROUNDSDri 1558
-ROUNDSDri_Int 1559
-ROUNDSSmi 1560
-ROUNDSSmi_Int 1561
-ROUNDSSri 1562
-ROUNDSSri_Int 1563
-RSM 1564
-RSQRTPSm 1565
-RSQRTPSr 1566
-RSQRTSSm 1567
-RSQRTSSm_Int 1568
-RSQRTSSr 1569
-RSQRTSSr_Int 1570
-RSTORSSP 1571
-SAHF 1572
-SALC 1573
-SAR 1574
-SARX 1575
-SAVEPREVSSP 1576
-SBB 1577
-SCASB 1578
-SCASL 1579
-SCASQ 1580
-SCASW 1581
-SEAMCALL 1582
-SEAMOPS 1583
-SEAMRET 1584
-SEG_ALLOCA 1585
-SEH_BeginEpilogue 1586
-SEH_EndEpilogue 1587
-SEH_EndPrologue 1588
-SEH_PushFrame 1589
-SEH_PushReg 1590
-SEH_SaveReg 1591
-SEH_SaveXMM 1592
-SEH_SetFrame 1593
-SEH_StackAlign 1594
-SEH_StackAlloc 1595
-SEH_UnwindV 1596
-SEH_UnwindVersion 1597
-SENDUIPI 1598
-SERIALIZE 1599
-SETB_C 1600
-SETCCm 1601
-SETCCm_EVEX 1602
-SETCCr 1603
-SETCCr_EVEX 1604
-SETSSBSY 1605
-SETZUCCm 1606
-SETZUCCr 1607
-SFENCE 1608
-SGDT 1609
-SHA 1610
-SHL 1611
-SHLD 1612
-SHLDROT 1613
-SHLX 1614
-SHR 1615
-SHRD 1616
-SHRDROT 1617
-SHRX 1618
-SHUFPDrmi 1619
-SHUFPDrri 1620
-SHUFPSrmi 1621
-SHUFPSrri 1622
-SIDT 1623
-SKINIT 1624
-SLDT 1625
-SLWPCB 1626
-SMSW 1627
-SQRTPDm 1628
-SQRTPDr 1629
-SQRTPSm 1630
-SQRTPSr 1631
-SQRTSDm 1632
-SQRTSDm_Int 1633
-SQRTSDr 1634
-SQRTSDr_Int 1635
-SQRTSSm 1636
-SQRTSSm_Int 1637
-SQRTSSr 1638
-SQRTSSr_Int 1639
-SQRT_F 1640
-SQRT_Fp 1641
-SS_PREFIX 1642
-STAC 1643
-STACKALLOC_W_PROBING 1644
-STACKMAP 1645
-STATEPOINT 1646
-STC 1647
-STD 1648
-STGI 1649
-STI 1650
-STMXCSR 1651
-STOSB 1652
-STOSL 1653
-STOSQ 1654
-STOSW 1655
-STR 1656
-STRm 1657
-STTILECFG 1658
-STTILECFG_EVEX 1659
-STUI 1660
-ST_F 1661
-ST_FP 1662
-ST_FPrr 1663
-ST_Fp 1664
-ST_FpP 1665
-ST_Frr 1666
-SUB 1667
-SUBPDrm 1668
-SUBPDrr 1669
-SUBPSrm 1670
-SUBPSrr 1671
-SUBREG_TO_REG 1672
-SUBR_F 1673
-SUBR_FI 1674
-SUBR_FPrST 1675
-SUBR_FST 1676
-SUBR_Fp 1677
-SUBR_FpI 1678
-SUBR_FrST 1679
-SUBSDrm 1680
-SUBSDrm_Int 1681
-SUBSDrr 1682
-SUBSDrr_Int 1683
-SUBSSrm 1684
-SUBSSrm_Int 1685
-SUBSSrr 1686
-SUBSSrr_Int 1687
-SUB_F 1688
-SUB_FI 1689
-SUB_FPrST 1690
-SUB_FST 1691
-SUB_Fp 1692
-SUB_FpI 1693
-SUB_FrST 1694
-SWAPGS 1695
-SYSCALL 1696
-SYSENTER 1697
-SYSEXIT 1698
-SYSRET 1699
-T 1700
-TAILJMPd 1701
-TAILJMPd_CC 1702
-TAILJMPm 1703
-TAILJMPr 1704
-TCMMIMFP 1705
-TCMMRLFP 1706
-TCRETURN_HIPE 1707
-TCRETURN_WIN 1708
-TCRETURN_WINmi 1709
-TCRETURNdi 1710
-TCRETURNdicc 1711
-TCRETURNmi 1712
-TCRETURNri 1713
-TCVTROWD 1714
-TCVTROWPS 1715
-TDCALL 1716
-TDPBF 1717
-TDPBHF 1718
-TDPBSSD 1719
-TDPBSUD 1720
-TDPBUSD 1721
-TDPBUUD 1722
-TDPFP 1723
-TDPHBF 1724
-TDPHF 1725
-TEST 1726
-TESTUI 1727
-TILELOADD 1728
-TILELOADDRS 1729
-TILELOADDRST 1730
-TILELOADDRS_EVEX 1731
-TILELOADDT 1732
-TILELOADD_EVEX 1733
-TILEMOVROWrre 1734
-TILEMOVROWrri 1735
-TILERELEASE 1736
-TILESTORED 1737
-TILESTORED_EVEX 1738
-TILEZERO 1739
-TLBSYNC 1740
-TLSCall 1741
-TLS_addr 1742
-TLS_addrX 1743
-TLS_base_addr 1744
-TLS_base_addrX 1745
-TLS_desc 1746
-TMMULTF 1747
-TPAUSE 1748
-TRAP 1749
-TST_F 1750
-TST_Fp 1751
-TZCNT 1752
-TZMSK 1753
-UBSAN_UD 1754
-UCOMISDrm 1755
-UCOMISDrm_Int 1756
-UCOMISDrr 1757
-UCOMISDrr_Int 1758
-UCOMISSrm 1759
-UCOMISSrm_Int 1760
-UCOMISSrr 1761
-UCOMISSrr_Int 1762
-UCOM_FIPr 1763
-UCOM_FIr 1764
-UCOM_FPPr 1765
-UCOM_FPr 1766
-UCOM_FpIr 1767
-UCOM_Fpr 1768
-UCOM_Fr 1769
-UD 1770
-UIRET 1771
-UMONITOR 1772
-UMWAIT 1773
-UNPCKHPDrm 1774
-UNPCKHPDrr 1775
-UNPCKHPSrm 1776
-UNPCKHPSrr 1777
-UNPCKLPDrm 1778
-UNPCKLPDrr 1779
-UNPCKLPSrm 1780
-UNPCKLPSrr 1781
-URDMSRri 1782
-URDMSRri_EVEX 1783
-URDMSRrr 1784
-URDMSRrr_EVEX 1785
-UWRMSRir 1786
-UWRMSRir_EVEX 1787
-UWRMSRrr 1788
-UWRMSRrr_EVEX 1789
-V 1790
-VAARG 1791
-VAARG_X 1792
-VADDBF 1793
-VADDPDYrm 1794
-VADDPDYrr 1795
-VADDPDZ 1796
-VADDPDZrm 1797
-VADDPDZrmb 1798
-VADDPDZrmbk 1799
-VADDPDZrmbkz 1800
-VADDPDZrmk 1801
-VADDPDZrmkz 1802
-VADDPDZrr 1803
-VADDPDZrrb 1804
-VADDPDZrrbk 1805
-VADDPDZrrbkz 1806
-VADDPDZrrk 1807
-VADDPDZrrkz 1808
-VADDPDrm 1809
-VADDPDrr 1810
-VADDPHZ 1811
-VADDPHZrm 1812
-VADDPHZrmb 1813
-VADDPHZrmbk 1814
-VADDPHZrmbkz 1815
-VADDPHZrmk 1816
-VADDPHZrmkz 1817
-VADDPHZrr 1818
-VADDPHZrrb 1819
-VADDPHZrrbk 1820
-VADDPHZrrbkz 1821
-VADDPHZrrk 1822
-VADDPHZrrkz 1823
-VADDPSYrm 1824
-VADDPSYrr 1825
-VADDPSZ 1826
-VADDPSZrm 1827
-VADDPSZrmb 1828
-VADDPSZrmbk 1829
-VADDPSZrmbkz 1830
-VADDPSZrmk 1831
-VADDPSZrmkz 1832
-VADDPSZrr 1833
-VADDPSZrrb 1834
-VADDPSZrrbk 1835
-VADDPSZrrbkz 1836
-VADDPSZrrk 1837
-VADDPSZrrkz 1838
-VADDPSrm 1839
-VADDPSrr 1840
-VADDSDZrm 1841
-VADDSDZrm_Int 1842
-VADDSDZrmk_Int 1843
-VADDSDZrmkz_Int 1844
-VADDSDZrr 1845
-VADDSDZrr_Int 1846
-VADDSDZrrb_Int 1847
-VADDSDZrrbk_Int 1848
-VADDSDZrrbkz_Int 1849
-VADDSDZrrk_Int 1850
-VADDSDZrrkz_Int 1851
-VADDSDrm 1852
-VADDSDrm_Int 1853
-VADDSDrr 1854
-VADDSDrr_Int 1855
-VADDSHZrm 1856
-VADDSHZrm_Int 1857
-VADDSHZrmk_Int 1858
-VADDSHZrmkz_Int 1859
-VADDSHZrr 1860
-VADDSHZrr_Int 1861
-VADDSHZrrb_Int 1862
-VADDSHZrrbk_Int 1863
-VADDSHZrrbkz_Int 1864
-VADDSHZrrk_Int 1865
-VADDSHZrrkz_Int 1866
-VADDSSZrm 1867
-VADDSSZrm_Int 1868
-VADDSSZrmk_Int 1869
-VADDSSZrmkz_Int 1870
-VADDSSZrr 1871
-VADDSSZrr_Int 1872
-VADDSSZrrb_Int 1873
-VADDSSZrrbk_Int 1874
-VADDSSZrrbkz_Int 1875
-VADDSSZrrk_Int 1876
-VADDSSZrrkz_Int 1877
-VADDSSrm 1878
-VADDSSrm_Int 1879
-VADDSSrr 1880
-VADDSSrr_Int 1881
-VADDSUBPDYrm 1882
-VADDSUBPDYrr 1883
-VADDSUBPDrm 1884
-VADDSUBPDrr 1885
-VADDSUBPSYrm 1886
-VADDSUBPSYrr 1887
-VADDSUBPSrm 1888
-VADDSUBPSrr 1889
-VAESDECLASTYrm 1890
-VAESDECLASTYrr 1891
-VAESDECLASTZ 1892
-VAESDECLASTZrm 1893
-VAESDECLASTZrr 1894
-VAESDECLASTrm 1895
-VAESDECLASTrr 1896
-VAESDECYrm 1897
-VAESDECYrr 1898
-VAESDECZ 1899
-VAESDECZrm 1900
-VAESDECZrr 1901
-VAESDECrm 1902
-VAESDECrr 1903
-VAESENCLASTYrm 1904
-VAESENCLASTYrr 1905
-VAESENCLASTZ 1906
-VAESENCLASTZrm 1907
-VAESENCLASTZrr 1908
-VAESENCLASTrm 1909
-VAESENCLASTrr 1910
-VAESENCYrm 1911
-VAESENCYrr 1912
-VAESENCZ 1913
-VAESENCZrm 1914
-VAESENCZrr 1915
-VAESENCrm 1916
-VAESENCrr 1917
-VAESIMCrm 1918
-VAESIMCrr 1919
-VAESKEYGENASSISTrmi 1920
-VAESKEYGENASSISTrri 1921
-VALIGNDZ 1922
-VALIGNDZrmbi 1923
-VALIGNDZrmbik 1924
-VALIGNDZrmbikz 1925
-VALIGNDZrmi 1926
-VALIGNDZrmik 1927
-VALIGNDZrmikz 1928
-VALIGNDZrri 1929
-VALIGNDZrrik 1930
-VALIGNDZrrikz 1931
-VALIGNQZ 1932
-VALIGNQZrmbi 1933
-VALIGNQZrmbik 1934
-VALIGNQZrmbikz 1935
-VALIGNQZrmi 1936
-VALIGNQZrmik 1937
-VALIGNQZrmikz 1938
-VALIGNQZrri 1939
-VALIGNQZrrik 1940
-VALIGNQZrrikz 1941
-VANDNPDYrm 1942
-VANDNPDYrr 1943
-VANDNPDZ 1944
-VANDNPDZrm 1945
-VANDNPDZrmb 1946
-VANDNPDZrmbk 1947
-VANDNPDZrmbkz 1948
-VANDNPDZrmk 1949
-VANDNPDZrmkz 1950
-VANDNPDZrr 1951
-VANDNPDZrrk 1952
-VANDNPDZrrkz 1953
-VANDNPDrm 1954
-VANDNPDrr 1955
-VANDNPSYrm 1956
-VANDNPSYrr 1957
-VANDNPSZ 1958
-VANDNPSZrm 1959
-VANDNPSZrmb 1960
-VANDNPSZrmbk 1961
-VANDNPSZrmbkz 1962
-VANDNPSZrmk 1963
-VANDNPSZrmkz 1964
-VANDNPSZrr 1965
-VANDNPSZrrk 1966
-VANDNPSZrrkz 1967
-VANDNPSrm 1968
-VANDNPSrr 1969
-VANDPDYrm 1970
-VANDPDYrr 1971
-VANDPDZ 1972
-VANDPDZrm 1973
-VANDPDZrmb 1974
-VANDPDZrmbk 1975
-VANDPDZrmbkz 1976
-VANDPDZrmk 1977
-VANDPDZrmkz 1978
-VANDPDZrr 1979
-VANDPDZrrk 1980
-VANDPDZrrkz 1981
-VANDPDrm 1982
-VANDPDrr 1983
-VANDPSYrm 1984
-VANDPSYrr 1985
-VANDPSZ 1986
-VANDPSZrm 1987
-VANDPSZrmb 1988
-VANDPSZrmbk 1989
-VANDPSZrmbkz 1990
-VANDPSZrmk 1991
-VANDPSZrmkz 1992
-VANDPSZrr 1993
-VANDPSZrrk 1994
-VANDPSZrrkz 1995
-VANDPSrm 1996
-VANDPSrr 1997
-VASTART_SAVE_XMM_REGS 1998
-VBCSTNEBF 1999
-VBCSTNESH 2000
-VBLENDMPDZ 2001
-VBLENDMPDZrm 2002
-VBLENDMPDZrmb 2003
-VBLENDMPDZrmbk 2004
-VBLENDMPDZrmbkz 2005
-VBLENDMPDZrmk 2006
-VBLENDMPDZrmkz 2007
-VBLENDMPDZrr 2008
-VBLENDMPDZrrk 2009
-VBLENDMPDZrrkz 2010
-VBLENDMPSZ 2011
-VBLENDMPSZrm 2012
-VBLENDMPSZrmb 2013
-VBLENDMPSZrmbk 2014
-VBLENDMPSZrmbkz 2015
-VBLENDMPSZrmk 2016
-VBLENDMPSZrmkz 2017
-VBLENDMPSZrr 2018
-VBLENDMPSZrrk 2019
-VBLENDMPSZrrkz 2020
-VBLENDPDYrmi 2021
-VBLENDPDYrri 2022
-VBLENDPDrmi 2023
-VBLENDPDrri 2024
-VBLENDPSYrmi 2025
-VBLENDPSYrri 2026
-VBLENDPSrmi 2027
-VBLENDPSrri 2028
-VBLENDVPDYrmr 2029
-VBLENDVPDYrrr 2030
-VBLENDVPDrmr 2031
-VBLENDVPDrrr 2032
-VBLENDVPSYrmr 2033
-VBLENDVPSYrrr 2034
-VBLENDVPSrmr 2035
-VBLENDVPSrrr 2036
-VBROADCASTF 2037
-VBROADCASTI 2038
-VBROADCASTSDYrm 2039
-VBROADCASTSDYrr 2040
-VBROADCASTSDZ 2041
-VBROADCASTSDZrm 2042
-VBROADCASTSDZrmk 2043
-VBROADCASTSDZrmkz 2044
-VBROADCASTSDZrr 2045
-VBROADCASTSDZrrk 2046
-VBROADCASTSDZrrkz 2047
-VBROADCASTSSYrm 2048
-VBROADCASTSSYrr 2049
-VBROADCASTSSZ 2050
-VBROADCASTSSZrm 2051
-VBROADCASTSSZrmk 2052
-VBROADCASTSSZrmkz 2053
-VBROADCASTSSZrr 2054
-VBROADCASTSSZrrk 2055
-VBROADCASTSSZrrkz 2056
-VBROADCASTSSrm 2057
-VBROADCASTSSrr 2058
-VCMPBF 2059
-VCMPPDYrmi 2060
-VCMPPDYrri 2061
-VCMPPDZ 2062
-VCMPPDZrmbi 2063
-VCMPPDZrmbik 2064
-VCMPPDZrmi 2065
-VCMPPDZrmik 2066
-VCMPPDZrri 2067
-VCMPPDZrrib 2068
-VCMPPDZrribk 2069
-VCMPPDZrrik 2070
-VCMPPDrmi 2071
-VCMPPDrri 2072
-VCMPPHZ 2073
-VCMPPHZrmbi 2074
-VCMPPHZrmbik 2075
-VCMPPHZrmi 2076
-VCMPPHZrmik 2077
-VCMPPHZrri 2078
-VCMPPHZrrib 2079
-VCMPPHZrribk 2080
-VCMPPHZrrik 2081
-VCMPPSYrmi 2082
-VCMPPSYrri 2083
-VCMPPSZ 2084
-VCMPPSZrmbi 2085
-VCMPPSZrmbik 2086
-VCMPPSZrmi 2087
-VCMPPSZrmik 2088
-VCMPPSZrri 2089
-VCMPPSZrrib 2090
-VCMPPSZrribk 2091
-VCMPPSZrrik 2092
-VCMPPSrmi 2093
-VCMPPSrri 2094
-VCMPSDZrmi 2095
-VCMPSDZrmi_Int 2096
-VCMPSDZrmik_Int 2097
-VCMPSDZrri 2098
-VCMPSDZrri_Int 2099
-VCMPSDZrrib_Int 2100
-VCMPSDZrribk_Int 2101
-VCMPSDZrrik_Int 2102
-VCMPSDrmi 2103
-VCMPSDrmi_Int 2104
-VCMPSDrri 2105
-VCMPSDrri_Int 2106
-VCMPSHZrmi 2107
-VCMPSHZrmi_Int 2108
-VCMPSHZrmik_Int 2109
-VCMPSHZrri 2110
-VCMPSHZrri_Int 2111
-VCMPSHZrrib_Int 2112
-VCMPSHZrribk_Int 2113
-VCMPSHZrrik_Int 2114
-VCMPSSZrmi 2115
-VCMPSSZrmi_Int 2116
-VCMPSSZrmik_Int 2117
-VCMPSSZrri 2118
-VCMPSSZrri_Int 2119
-VCMPSSZrrib_Int 2120
-VCMPSSZrribk_Int 2121
-VCMPSSZrrik_Int 2122
-VCMPSSrmi 2123
-VCMPSSrmi_Int 2124
-VCMPSSrri 2125
-VCMPSSrri_Int 2126
-VCOMISBF 2127
-VCOMISDZrm 2128
-VCOMISDZrm_Int 2129
-VCOMISDZrr 2130
-VCOMISDZrr_Int 2131
-VCOMISDZrrb 2132
-VCOMISDrm 2133
-VCOMISDrm_Int 2134
-VCOMISDrr 2135
-VCOMISDrr_Int 2136
-VCOMISHZrm 2137
-VCOMISHZrm_Int 2138
-VCOMISHZrr 2139
-VCOMISHZrr_Int 2140
-VCOMISHZrrb 2141
-VCOMISSZrm 2142
-VCOMISSZrm_Int 2143
-VCOMISSZrr 2144
-VCOMISSZrr_Int 2145
-VCOMISSZrrb 2146
-VCOMISSrm 2147
-VCOMISSrm_Int 2148
-VCOMISSrr 2149
-VCOMISSrr_Int 2150
-VCOMPRESSPDZ 2151
-VCOMPRESSPDZmr 2152
-VCOMPRESSPDZmrk 2153
-VCOMPRESSPDZrr 2154
-VCOMPRESSPDZrrk 2155
-VCOMPRESSPDZrrkz 2156
-VCOMPRESSPSZ 2157
-VCOMPRESSPSZmr 2158
-VCOMPRESSPSZmrk 2159
-VCOMPRESSPSZrr 2160
-VCOMPRESSPSZrrk 2161
-VCOMPRESSPSZrrkz 2162
-VCOMXSDZrm_Int 2163
-VCOMXSDZrr_Int 2164
-VCOMXSDZrrb_Int 2165
-VCOMXSHZrm_Int 2166
-VCOMXSHZrr_Int 2167
-VCOMXSHZrrb_Int 2168
-VCOMXSSZrm_Int 2169
-VCOMXSSZrr_Int 2170
-VCOMXSSZrrb_Int 2171
-VCVT 2172
-VCVTBF 2173
-VCVTBIASPH 2174
-VCVTDQ 2175
-VCVTHF 2176
-VCVTNE 2177
-VCVTNEEBF 2178
-VCVTNEEPH 2179
-VCVTNEOBF 2180
-VCVTNEOPH 2181
-VCVTNEPS 2182
-VCVTPD 2183
-VCVTPH 2184
-VCVTPS 2185
-VCVTQQ 2186
-VCVTSD 2187
-VCVTSH 2188
-VCVTSI 2189
-VCVTSS 2190
-VCVTTBF 2191
-VCVTTPD 2192
-VCVTTPH 2193
-VCVTTPS 2194
-VCVTTSD 2195
-VCVTTSH 2196
-VCVTTSS 2197
-VCVTUDQ 2198
-VCVTUQQ 2199
-VCVTUSI 2200
-VCVTUW 2201
-VCVTW 2202
-VDBPSADBWZ 2203
-VDBPSADBWZrmi 2204
-VDBPSADBWZrmik 2205
-VDBPSADBWZrmikz 2206
-VDBPSADBWZrri 2207
-VDBPSADBWZrrik 2208
-VDBPSADBWZrrikz 2209
-VDIVBF 2210
-VDIVPDYrm 2211
-VDIVPDYrr 2212
-VDIVPDZ 2213
-VDIVPDZrm 2214
-VDIVPDZrmb 2215
-VDIVPDZrmbk 2216
-VDIVPDZrmbkz 2217
-VDIVPDZrmk 2218
-VDIVPDZrmkz 2219
-VDIVPDZrr 2220
-VDIVPDZrrb 2221
-VDIVPDZrrbk 2222
-VDIVPDZrrbkz 2223
-VDIVPDZrrk 2224
-VDIVPDZrrkz 2225
-VDIVPDrm 2226
-VDIVPDrr 2227
-VDIVPHZ 2228
-VDIVPHZrm 2229
-VDIVPHZrmb 2230
-VDIVPHZrmbk 2231
-VDIVPHZrmbkz 2232
-VDIVPHZrmk 2233
-VDIVPHZrmkz 2234
-VDIVPHZrr 2235
-VDIVPHZrrb 2236
-VDIVPHZrrbk 2237
-VDIVPHZrrbkz 2238
-VDIVPHZrrk 2239
-VDIVPHZrrkz 2240
-VDIVPSYrm 2241
-VDIVPSYrr 2242
-VDIVPSZ 2243
-VDIVPSZrm 2244
-VDIVPSZrmb 2245
-VDIVPSZrmbk 2246
-VDIVPSZrmbkz 2247
-VDIVPSZrmk 2248
-VDIVPSZrmkz 2249
-VDIVPSZrr 2250
-VDIVPSZrrb 2251
-VDIVPSZrrbk 2252
-VDIVPSZrrbkz 2253
-VDIVPSZrrk 2254
-VDIVPSZrrkz 2255
-VDIVPSrm 2256
-VDIVPSrr 2257
-VDIVSDZrm 2258
-VDIVSDZrm_Int 2259
-VDIVSDZrmk_Int 2260
-VDIVSDZrmkz_Int 2261
-VDIVSDZrr 2262
-VDIVSDZrr_Int 2263
-VDIVSDZrrb_Int 2264
-VDIVSDZrrbk_Int 2265
-VDIVSDZrrbkz_Int 2266
-VDIVSDZrrk_Int 2267
-VDIVSDZrrkz_Int 2268
-VDIVSDrm 2269
-VDIVSDrm_Int 2270
-VDIVSDrr 2271
-VDIVSDrr_Int 2272
-VDIVSHZrm 2273
-VDIVSHZrm_Int 2274
-VDIVSHZrmk_Int 2275
-VDIVSHZrmkz_Int 2276
-VDIVSHZrr 2277
-VDIVSHZrr_Int 2278
-VDIVSHZrrb_Int 2279
-VDIVSHZrrbk_Int 2280
-VDIVSHZrrbkz_Int 2281
-VDIVSHZrrk_Int 2282
-VDIVSHZrrkz_Int 2283
-VDIVSSZrm 2284
-VDIVSSZrm_Int 2285
-VDIVSSZrmk_Int 2286
-VDIVSSZrmkz_Int 2287
-VDIVSSZrr 2288
-VDIVSSZrr_Int 2289
-VDIVSSZrrb_Int 2290
-VDIVSSZrrbk_Int 2291
-VDIVSSZrrbkz_Int 2292
-VDIVSSZrrk_Int 2293
-VDIVSSZrrkz_Int 2294
-VDIVSSrm 2295
-VDIVSSrm_Int 2296
-VDIVSSrr 2297
-VDIVSSrr_Int 2298
-VDPBF 2299
-VDPPDrmi 2300
-VDPPDrri 2301
-VDPPHPSZ 2302
-VDPPHPSZm 2303
-VDPPHPSZmb 2304
-VDPPHPSZmbk 2305
-VDPPHPSZmbkz 2306
-VDPPHPSZmk 2307
-VDPPHPSZmkz 2308
-VDPPHPSZr 2309
-VDPPHPSZrk 2310
-VDPPHPSZrkz 2311
-VDPPSYrmi 2312
-VDPPSYrri 2313
-VDPPSrmi 2314
-VDPPSrri 2315
-VERRm 2316
-VERRr 2317
-VERWm 2318
-VERWr 2319
-VEXP 2320
-VEXPANDPDZ 2321
-VEXPANDPDZrm 2322
-VEXPANDPDZrmk 2323
-VEXPANDPDZrmkz 2324
-VEXPANDPDZrr 2325
-VEXPANDPDZrrk 2326
-VEXPANDPDZrrkz 2327
-VEXPANDPSZ 2328
-VEXPANDPSZrm 2329
-VEXPANDPSZrmk 2330
-VEXPANDPSZrmkz 2331
-VEXPANDPSZrr 2332
-VEXPANDPSZrrk 2333
-VEXPANDPSZrrkz 2334
-VEXTRACTF 2335
-VEXTRACTI 2336
-VEXTRACTPSZmri 2337
-VEXTRACTPSZrri 2338
-VEXTRACTPSmri 2339
-VEXTRACTPSrri 2340
-VFCMADDCPHZ 2341
-VFCMADDCPHZm 2342
-VFCMADDCPHZmb 2343
-VFCMADDCPHZmbk 2344
-VFCMADDCPHZmbkz 2345
-VFCMADDCPHZmk 2346
-VFCMADDCPHZmkz 2347
-VFCMADDCPHZr 2348
-VFCMADDCPHZrb 2349
-VFCMADDCPHZrbk 2350
-VFCMADDCPHZrbkz 2351
-VFCMADDCPHZrk 2352
-VFCMADDCPHZrkz 2353
-VFCMADDCSHZm 2354
-VFCMADDCSHZmk 2355
-VFCMADDCSHZmkz 2356
-VFCMADDCSHZr 2357
-VFCMADDCSHZrb 2358
-VFCMADDCSHZrbk 2359
-VFCMADDCSHZrbkz 2360
-VFCMADDCSHZrk 2361
-VFCMADDCSHZrkz 2362
-VFCMULCPHZ 2363
-VFCMULCPHZrm 2364
-VFCMULCPHZrmb 2365
-VFCMULCPHZrmbk 2366
-VFCMULCPHZrmbkz 2367
-VFCMULCPHZrmk 2368
-VFCMULCPHZrmkz 2369
-VFCMULCPHZrr 2370
-VFCMULCPHZrrb 2371
-VFCMULCPHZrrbk 2372
-VFCMULCPHZrrbkz 2373
-VFCMULCPHZrrk 2374
-VFCMULCPHZrrkz 2375
-VFCMULCSHZrm 2376
-VFCMULCSHZrmk 2377
-VFCMULCSHZrmkz 2378
-VFCMULCSHZrr 2379
-VFCMULCSHZrrb 2380
-VFCMULCSHZrrbk 2381
-VFCMULCSHZrrbkz 2382
-VFCMULCSHZrrk 2383
-VFCMULCSHZrrkz 2384
-VFIXUPIMMPDZ 2385
-VFIXUPIMMPDZrmbi 2386
-VFIXUPIMMPDZrmbik 2387
-VFIXUPIMMPDZrmbikz 2388
-VFIXUPIMMPDZrmi 2389
-VFIXUPIMMPDZrmik 2390
-VFIXUPIMMPDZrmikz 2391
-VFIXUPIMMPDZrri 2392
-VFIXUPIMMPDZrrib 2393
-VFIXUPIMMPDZrribk 2394
-VFIXUPIMMPDZrribkz 2395
-VFIXUPIMMPDZrrik 2396
-VFIXUPIMMPDZrrikz 2397
-VFIXUPIMMPSZ 2398
-VFIXUPIMMPSZrmbi 2399
-VFIXUPIMMPSZrmbik 2400
-VFIXUPIMMPSZrmbikz 2401
-VFIXUPIMMPSZrmi 2402
-VFIXUPIMMPSZrmik 2403
-VFIXUPIMMPSZrmikz 2404
-VFIXUPIMMPSZrri 2405
-VFIXUPIMMPSZrrib 2406
-VFIXUPIMMPSZrribk 2407
-VFIXUPIMMPSZrribkz 2408
-VFIXUPIMMPSZrrik 2409
-VFIXUPIMMPSZrrikz 2410
-VFIXUPIMMSDZrmi 2411
-VFIXUPIMMSDZrmik 2412
-VFIXUPIMMSDZrmikz 2413
-VFIXUPIMMSDZrri 2414
-VFIXUPIMMSDZrrib 2415
-VFIXUPIMMSDZrribk 2416
-VFIXUPIMMSDZrribkz 2417
-VFIXUPIMMSDZrrik 2418
-VFIXUPIMMSDZrrikz 2419
-VFIXUPIMMSSZrmi 2420
-VFIXUPIMMSSZrmik 2421
-VFIXUPIMMSSZrmikz 2422
-VFIXUPIMMSSZrri 2423
-VFIXUPIMMSSZrrib 2424
-VFIXUPIMMSSZrribk 2425
-VFIXUPIMMSSZrribkz 2426
-VFIXUPIMMSSZrrik 2427
-VFIXUPIMMSSZrrikz 2428
-VFMADD 2429
-VFMADDCPHZ 2430
-VFMADDCPHZm 2431
-VFMADDCPHZmb 2432
-VFMADDCPHZmbk 2433
-VFMADDCPHZmbkz 2434
-VFMADDCPHZmk 2435
-VFMADDCPHZmkz 2436
-VFMADDCPHZr 2437
-VFMADDCPHZrb 2438
-VFMADDCPHZrbk 2439
-VFMADDCPHZrbkz 2440
-VFMADDCPHZrk 2441
-VFMADDCPHZrkz 2442
-VFMADDCSHZm 2443
-VFMADDCSHZmk 2444
-VFMADDCSHZmkz 2445
-VFMADDCSHZr 2446
-VFMADDCSHZrb 2447
-VFMADDCSHZrbk 2448
-VFMADDCSHZrbkz 2449
-VFMADDCSHZrk 2450
-VFMADDCSHZrkz 2451
-VFMADDPD 2452
-VFMADDPS 2453
-VFMADDSD 2454
-VFMADDSS 2455
-VFMADDSUB 2456
-VFMADDSUBPD 2457
-VFMADDSUBPS 2458
-VFMSUB 2459
-VFMSUBADD 2460
-VFMSUBADDPD 2461
-VFMSUBADDPS 2462
-VFMSUBPD 2463
-VFMSUBPS 2464
-VFMSUBSD 2465
-VFMSUBSS 2466
-VFMULCPHZ 2467
-VFMULCPHZrm 2468
-VFMULCPHZrmb 2469
-VFMULCPHZrmbk 2470
-VFMULCPHZrmbkz 2471
-VFMULCPHZrmk 2472
-VFMULCPHZrmkz 2473
-VFMULCPHZrr 2474
-VFMULCPHZrrb 2475
-VFMULCPHZrrbk 2476
-VFMULCPHZrrbkz 2477
-VFMULCPHZrrk 2478
-VFMULCPHZrrkz 2479
-VFMULCSHZrm 2480
-VFMULCSHZrmk 2481
-VFMULCSHZrmkz 2482
-VFMULCSHZrr 2483
-VFMULCSHZrrb 2484
-VFMULCSHZrrbk 2485
-VFMULCSHZrrbkz 2486
-VFMULCSHZrrk 2487
-VFMULCSHZrrkz 2488
-VFNMADD 2489
-VFNMADDPD 2490
-VFNMADDPS 2491
-VFNMADDSD 2492
-VFNMADDSS 2493
-VFNMSUB 2494
-VFNMSUBPD 2495
-VFNMSUBPS 2496
-VFNMSUBSD 2497
-VFNMSUBSS 2498
-VFPCLASSBF 2499
-VFPCLASSPDZ 2500
-VFPCLASSPDZmbi 2501
-VFPCLASSPDZmbik 2502
-VFPCLASSPDZmi 2503
-VFPCLASSPDZmik 2504
-VFPCLASSPDZri 2505
-VFPCLASSPDZrik 2506
-VFPCLASSPHZ 2507
-VFPCLASSPHZmbi 2508
-VFPCLASSPHZmbik 2509
-VFPCLASSPHZmi 2510
-VFPCLASSPHZmik 2511
-VFPCLASSPHZri 2512
-VFPCLASSPHZrik 2513
-VFPCLASSPSZ 2514
-VFPCLASSPSZmbi 2515
-VFPCLASSPSZmbik 2516
-VFPCLASSPSZmi 2517
-VFPCLASSPSZmik 2518
-VFPCLASSPSZri 2519
-VFPCLASSPSZrik 2520
-VFPCLASSSDZmi 2521
-VFPCLASSSDZmik 2522
-VFPCLASSSDZri 2523
-VFPCLASSSDZrik 2524
-VFPCLASSSHZmi 2525
-VFPCLASSSHZmik 2526
-VFPCLASSSHZri 2527
-VFPCLASSSHZrik 2528
-VFPCLASSSSZmi 2529
-VFPCLASSSSZmik 2530
-VFPCLASSSSZri 2531
-VFPCLASSSSZrik 2532
-VFRCZPDYrm 2533
-VFRCZPDYrr 2534
-VFRCZPDrm 2535
-VFRCZPDrr 2536
-VFRCZPSYrm 2537
-VFRCZPSYrr 2538
-VFRCZPSrm 2539
-VFRCZPSrr 2540
-VFRCZSDrm 2541
-VFRCZSDrr 2542
-VFRCZSSrm 2543
-VFRCZSSrr 2544
-VGATHERDPDYrm 2545
-VGATHERDPDZ 2546
-VGATHERDPDZrm 2547
-VGATHERDPDrm 2548
-VGATHERDPSYrm 2549
-VGATHERDPSZ 2550
-VGATHERDPSZrm 2551
-VGATHERDPSrm 2552
-VGATHERPF 2553
-VGATHERQPDYrm 2554
-VGATHERQPDZ 2555
-VGATHERQPDZrm 2556
-VGATHERQPDrm 2557
-VGATHERQPSYrm 2558
-VGATHERQPSZ 2559
-VGATHERQPSZrm 2560
-VGATHERQPSrm 2561
-VGETEXPBF 2562
-VGETEXPPDZ 2563
-VGETEXPPDZm 2564
-VGETEXPPDZmb 2565
-VGETEXPPDZmbk 2566
-VGETEXPPDZmbkz 2567
-VGETEXPPDZmk 2568
-VGETEXPPDZmkz 2569
-VGETEXPPDZr 2570
-VGETEXPPDZrb 2571
-VGETEXPPDZrbk 2572
-VGETEXPPDZrbkz 2573
-VGETEXPPDZrk 2574
-VGETEXPPDZrkz 2575
-VGETEXPPHZ 2576
-VGETEXPPHZm 2577
-VGETEXPPHZmb 2578
-VGETEXPPHZmbk 2579
-VGETEXPPHZmbkz 2580
-VGETEXPPHZmk 2581
-VGETEXPPHZmkz 2582
-VGETEXPPHZr 2583
-VGETEXPPHZrb 2584
-VGETEXPPHZrbk 2585
-VGETEXPPHZrbkz 2586
-VGETEXPPHZrk 2587
-VGETEXPPHZrkz 2588
-VGETEXPPSZ 2589
-VGETEXPPSZm 2590
-VGETEXPPSZmb 2591
-VGETEXPPSZmbk 2592
-VGETEXPPSZmbkz 2593
-VGETEXPPSZmk 2594
-VGETEXPPSZmkz 2595
-VGETEXPPSZr 2596
-VGETEXPPSZrb 2597
-VGETEXPPSZrbk 2598
-VGETEXPPSZrbkz 2599
-VGETEXPPSZrk 2600
-VGETEXPPSZrkz 2601
-VGETEXPSDZm 2602
-VGETEXPSDZmk 2603
-VGETEXPSDZmkz 2604
-VGETEXPSDZr 2605
-VGETEXPSDZrb 2606
-VGETEXPSDZrbk 2607
-VGETEXPSDZrbkz 2608
-VGETEXPSDZrk 2609
-VGETEXPSDZrkz 2610
-VGETEXPSHZm 2611
-VGETEXPSHZmk 2612
-VGETEXPSHZmkz 2613
-VGETEXPSHZr 2614
-VGETEXPSHZrb 2615
-VGETEXPSHZrbk 2616
-VGETEXPSHZrbkz 2617
-VGETEXPSHZrk 2618
-VGETEXPSHZrkz 2619
-VGETEXPSSZm 2620
-VGETEXPSSZmk 2621
-VGETEXPSSZmkz 2622
-VGETEXPSSZr 2623
-VGETEXPSSZrb 2624
-VGETEXPSSZrbk 2625
-VGETEXPSSZrbkz 2626
-VGETEXPSSZrk 2627
-VGETEXPSSZrkz 2628
-VGETMANTBF 2629
-VGETMANTPDZ 2630
-VGETMANTPDZrmbi 2631
-VGETMANTPDZrmbik 2632
-VGETMANTPDZrmbikz 2633
-VGETMANTPDZrmi 2634
-VGETMANTPDZrmik 2635
-VGETMANTPDZrmikz 2636
-VGETMANTPDZrri 2637
-VGETMANTPDZrrib 2638
-VGETMANTPDZrribk 2639
-VGETMANTPDZrribkz 2640
-VGETMANTPDZrrik 2641
-VGETMANTPDZrrikz 2642
-VGETMANTPHZ 2643
-VGETMANTPHZrmbi 2644
-VGETMANTPHZrmbik 2645
-VGETMANTPHZrmbikz 2646
-VGETMANTPHZrmi 2647
-VGETMANTPHZrmik 2648
-VGETMANTPHZrmikz 2649
-VGETMANTPHZrri 2650
-VGETMANTPHZrrib 2651
-VGETMANTPHZrribk 2652
-VGETMANTPHZrribkz 2653
-VGETMANTPHZrrik 2654
-VGETMANTPHZrrikz 2655
-VGETMANTPSZ 2656
-VGETMANTPSZrmbi 2657
-VGETMANTPSZrmbik 2658
-VGETMANTPSZrmbikz 2659
-VGETMANTPSZrmi 2660
-VGETMANTPSZrmik 2661
-VGETMANTPSZrmikz 2662
-VGETMANTPSZrri 2663
-VGETMANTPSZrrib 2664
-VGETMANTPSZrribk 2665
-VGETMANTPSZrribkz 2666
-VGETMANTPSZrrik 2667
-VGETMANTPSZrrikz 2668
-VGETMANTSDZrmi 2669
-VGETMANTSDZrmik 2670
-VGETMANTSDZrmikz 2671
-VGETMANTSDZrri 2672
-VGETMANTSDZrrib 2673
-VGETMANTSDZrribk 2674
-VGETMANTSDZrribkz 2675
-VGETMANTSDZrrik 2676
-VGETMANTSDZrrikz 2677
-VGETMANTSHZrmi 2678
-VGETMANTSHZrmik 2679
-VGETMANTSHZrmikz 2680
-VGETMANTSHZrri 2681
-VGETMANTSHZrrib 2682
-VGETMANTSHZrribk 2683
-VGETMANTSHZrribkz 2684
-VGETMANTSHZrrik 2685
-VGETMANTSHZrrikz 2686
-VGETMANTSSZrmi 2687
-VGETMANTSSZrmik 2688
-VGETMANTSSZrmikz 2689
-VGETMANTSSZrri 2690
-VGETMANTSSZrrib 2691
-VGETMANTSSZrribk 2692
-VGETMANTSSZrribkz 2693
-VGETMANTSSZrrik 2694
-VGETMANTSSZrrikz 2695
-VGF 2696
-VHADDPDYrm 2697
-VHADDPDYrr 2698
-VHADDPDrm 2699
-VHADDPDrr 2700
-VHADDPSYrm 2701
-VHADDPSYrr 2702
-VHADDPSrm 2703
-VHADDPSrr 2704
-VHSUBPDYrm 2705
-VHSUBPDYrr 2706
-VHSUBPDrm 2707
-VHSUBPDrr 2708
-VHSUBPSYrm 2709
-VHSUBPSYrr 2710
-VHSUBPSrm 2711
-VHSUBPSrr 2712
-VINSERTF 2713
-VINSERTI 2714
-VINSERTPSZrmi 2715
-VINSERTPSZrri 2716
-VINSERTPSrmi 2717
-VINSERTPSrri 2718
-VLDDQUYrm 2719
-VLDDQUrm 2720
-VLDMXCSR 2721
-VMASKMOVDQU 2722
-VMASKMOVPDYmr 2723
-VMASKMOVPDYrm 2724
-VMASKMOVPDmr 2725
-VMASKMOVPDrm 2726
-VMASKMOVPSYmr 2727
-VMASKMOVPSYrm 2728
-VMASKMOVPSmr 2729
-VMASKMOVPSrm 2730
-VMAXBF 2731
-VMAXCPDYrm 2732
-VMAXCPDYrr 2733
-VMAXCPDZ 2734
-VMAXCPDZrm 2735
-VMAXCPDZrmb 2736
-VMAXCPDZrmbk 2737
-VMAXCPDZrmbkz 2738
-VMAXCPDZrmk 2739
-VMAXCPDZrmkz 2740
-VMAXCPDZrr 2741
-VMAXCPDZrrk 2742
-VMAXCPDZrrkz 2743
-VMAXCPDrm 2744
-VMAXCPDrr 2745
-VMAXCPHZ 2746
-VMAXCPHZrm 2747
-VMAXCPHZrmb 2748
-VMAXCPHZrmbk 2749
-VMAXCPHZrmbkz 2750
-VMAXCPHZrmk 2751
-VMAXCPHZrmkz 2752
-VMAXCPHZrr 2753
-VMAXCPHZrrk 2754
-VMAXCPHZrrkz 2755
-VMAXCPSYrm 2756
-VMAXCPSYrr 2757
-VMAXCPSZ 2758
-VMAXCPSZrm 2759
-VMAXCPSZrmb 2760
-VMAXCPSZrmbk 2761
-VMAXCPSZrmbkz 2762
-VMAXCPSZrmk 2763
-VMAXCPSZrmkz 2764
-VMAXCPSZrr 2765
-VMAXCPSZrrk 2766
-VMAXCPSZrrkz 2767
-VMAXCPSrm 2768
-VMAXCPSrr 2769
-VMAXCSDZrm 2770
-VMAXCSDZrr 2771
-VMAXCSDrm 2772
-VMAXCSDrr 2773
-VMAXCSHZrm 2774
-VMAXCSHZrr 2775
-VMAXCSSZrm 2776
-VMAXCSSZrr 2777
-VMAXCSSrm 2778
-VMAXCSSrr 2779
-VMAXPDYrm 2780
-VMAXPDYrr 2781
-VMAXPDZ 2782
-VMAXPDZrm 2783
-VMAXPDZrmb 2784
-VMAXPDZrmbk 2785
-VMAXPDZrmbkz 2786
-VMAXPDZrmk 2787
-VMAXPDZrmkz 2788
-VMAXPDZrr 2789
-VMAXPDZrrb 2790
-VMAXPDZrrbk 2791
-VMAXPDZrrbkz 2792
-VMAXPDZrrk 2793
-VMAXPDZrrkz 2794
-VMAXPDrm 2795
-VMAXPDrr 2796
-VMAXPHZ 2797
-VMAXPHZrm 2798
-VMAXPHZrmb 2799
-VMAXPHZrmbk 2800
-VMAXPHZrmbkz 2801
-VMAXPHZrmk 2802
-VMAXPHZrmkz 2803
-VMAXPHZrr 2804
-VMAXPHZrrb 2805
-VMAXPHZrrbk 2806
-VMAXPHZrrbkz 2807
-VMAXPHZrrk 2808
-VMAXPHZrrkz 2809
-VMAXPSYrm 2810
-VMAXPSYrr 2811
-VMAXPSZ 2812
-VMAXPSZrm 2813
-VMAXPSZrmb 2814
-VMAXPSZrmbk 2815
-VMAXPSZrmbkz 2816
-VMAXPSZrmk 2817
-VMAXPSZrmkz 2818
-VMAXPSZrr 2819
-VMAXPSZrrb 2820
-VMAXPSZrrbk 2821
-VMAXPSZrrbkz 2822
-VMAXPSZrrk 2823
-VMAXPSZrrkz 2824
-VMAXPSrm 2825
-VMAXPSrr 2826
-VMAXSDZrm 2827
-VMAXSDZrm_Int 2828
-VMAXSDZrmk_Int 2829
-VMAXSDZrmkz_Int 2830
-VMAXSDZrr 2831
-VMAXSDZrr_Int 2832
-VMAXSDZrrb_Int 2833
-VMAXSDZrrbk_Int 2834
-VMAXSDZrrbkz_Int 2835
-VMAXSDZrrk_Int 2836
-VMAXSDZrrkz_Int 2837
-VMAXSDrm 2838
-VMAXSDrm_Int 2839
-VMAXSDrr 2840
-VMAXSDrr_Int 2841
-VMAXSHZrm 2842
-VMAXSHZrm_Int 2843
-VMAXSHZrmk_Int 2844
-VMAXSHZrmkz_Int 2845
-VMAXSHZrr 2846
-VMAXSHZrr_Int 2847
-VMAXSHZrrb_Int 2848
-VMAXSHZrrbk_Int 2849
-VMAXSHZrrbkz_Int 2850
-VMAXSHZrrk_Int 2851
-VMAXSHZrrkz_Int 2852
-VMAXSSZrm 2853
-VMAXSSZrm_Int 2854
-VMAXSSZrmk_Int 2855
-VMAXSSZrmkz_Int 2856
-VMAXSSZrr 2857
-VMAXSSZrr_Int 2858
-VMAXSSZrrb_Int 2859
-VMAXSSZrrbk_Int 2860
-VMAXSSZrrbkz_Int 2861
-VMAXSSZrrk_Int 2862
-VMAXSSZrrkz_Int 2863
-VMAXSSrm 2864
-VMAXSSrm_Int 2865
-VMAXSSrr 2866
-VMAXSSrr_Int 2867
-VMCALL 2868
-VMCLEARm 2869
-VMFUNC 2870
-VMINBF 2871
-VMINCPDYrm 2872
-VMINCPDYrr 2873
-VMINCPDZ 2874
-VMINCPDZrm 2875
-VMINCPDZrmb 2876
-VMINCPDZrmbk 2877
-VMINCPDZrmbkz 2878
-VMINCPDZrmk 2879
-VMINCPDZrmkz 2880
-VMINCPDZrr 2881
-VMINCPDZrrk 2882
-VMINCPDZrrkz 2883
-VMINCPDrm 2884
-VMINCPDrr 2885
-VMINCPHZ 2886
-VMINCPHZrm 2887
-VMINCPHZrmb 2888
-VMINCPHZrmbk 2889
-VMINCPHZrmbkz 2890
-VMINCPHZrmk 2891
-VMINCPHZrmkz 2892
-VMINCPHZrr 2893
-VMINCPHZrrk 2894
-VMINCPHZrrkz 2895
-VMINCPSYrm 2896
-VMINCPSYrr 2897
-VMINCPSZ 2898
-VMINCPSZrm 2899
-VMINCPSZrmb 2900
-VMINCPSZrmbk 2901
-VMINCPSZrmbkz 2902
-VMINCPSZrmk 2903
-VMINCPSZrmkz 2904
-VMINCPSZrr 2905
-VMINCPSZrrk 2906
-VMINCPSZrrkz 2907
-VMINCPSrm 2908
-VMINCPSrr 2909
-VMINCSDZrm 2910
-VMINCSDZrr 2911
-VMINCSDrm 2912
-VMINCSDrr 2913
-VMINCSHZrm 2914
-VMINCSHZrr 2915
-VMINCSSZrm 2916
-VMINCSSZrr 2917
-VMINCSSrm 2918
-VMINCSSrr 2919
-VMINMAXBF 2920
-VMINMAXPDZ 2921
-VMINMAXPDZrmbi 2922
-VMINMAXPDZrmbik 2923
-VMINMAXPDZrmbikz 2924
-VMINMAXPDZrmi 2925
-VMINMAXPDZrmik 2926
-VMINMAXPDZrmikz 2927
-VMINMAXPDZrri 2928
-VMINMAXPDZrrib 2929
-VMINMAXPDZrribk 2930
-VMINMAXPDZrribkz 2931
-VMINMAXPDZrrik 2932
-VMINMAXPDZrrikz 2933
-VMINMAXPHZ 2934
-VMINMAXPHZrmbi 2935
-VMINMAXPHZrmbik 2936
-VMINMAXPHZrmbikz 2937
-VMINMAXPHZrmi 2938
-VMINMAXPHZrmik 2939
-VMINMAXPHZrmikz 2940
-VMINMAXPHZrri 2941
-VMINMAXPHZrrib 2942
-VMINMAXPHZrribk 2943
-VMINMAXPHZrribkz 2944
-VMINMAXPHZrrik 2945
-VMINMAXPHZrrikz 2946
-VMINMAXPSZ 2947
-VMINMAXPSZrmbi 2948
-VMINMAXPSZrmbik 2949
-VMINMAXPSZrmbikz 2950
-VMINMAXPSZrmi 2951
-VMINMAXPSZrmik 2952
-VMINMAXPSZrmikz 2953
-VMINMAXPSZrri 2954
-VMINMAXPSZrrib 2955
-VMINMAXPSZrribk 2956
-VMINMAXPSZrribkz 2957
-VMINMAXPSZrrik 2958
-VMINMAXPSZrrikz 2959
-VMINMAXSDrmi 2960
-VMINMAXSDrmi_Int 2961
-VMINMAXSDrmik_Int 2962
-VMINMAXSDrmikz_Int 2963
-VMINMAXSDrri 2964
-VMINMAXSDrri_Int 2965
-VMINMAXSDrrib_Int 2966
-VMINMAXSDrribk_Int 2967
-VMINMAXSDrribkz_Int 2968
-VMINMAXSDrrik_Int 2969
-VMINMAXSDrrikz_Int 2970
-VMINMAXSHrmi 2971
-VMINMAXSHrmi_Int 2972
-VMINMAXSHrmik_Int 2973
-VMINMAXSHrmikz_Int 2974
-VMINMAXSHrri 2975
-VMINMAXSHrri_Int 2976
-VMINMAXSHrrib_Int 2977
-VMINMAXSHrribk_Int 2978
-VMINMAXSHrribkz_Int 2979
-VMINMAXSHrrik_Int 2980
-VMINMAXSHrrikz_Int 2981
-VMINMAXSSrmi 2982
-VMINMAXSSrmi_Int 2983
-VMINMAXSSrmik_Int 2984
-VMINMAXSSrmikz_Int 2985
-VMINMAXSSrri 2986
-VMINMAXSSrri_Int 2987
-VMINMAXSSrrib_Int 2988
-VMINMAXSSrribk_Int 2989
-VMINMAXSSrribkz_Int 2990
-VMINMAXSSrrik_Int 2991
-VMINMAXSSrrikz_Int 2992
-VMINPDYrm 2993
-VMINPDYrr 2994
-VMINPDZ 2995
-VMINPDZrm 2996
-VMINPDZrmb 2997
-VMINPDZrmbk 2998
-VMINPDZrmbkz 2999
-VMINPDZrmk 3000
-VMINPDZrmkz 3001
-VMINPDZrr 3002
-VMINPDZrrb 3003
-VMINPDZrrbk 3004
-VMINPDZrrbkz 3005
-VMINPDZrrk 3006
-VMINPDZrrkz 3007
-VMINPDrm 3008
-VMINPDrr 3009
-VMINPHZ 3010
-VMINPHZrm 3011
-VMINPHZrmb 3012
-VMINPHZrmbk 3013
-VMINPHZrmbkz 3014
-VMINPHZrmk 3015
-VMINPHZrmkz 3016
-VMINPHZrr 3017
-VMINPHZrrb 3018
-VMINPHZrrbk 3019
-VMINPHZrrbkz 3020
-VMINPHZrrk 3021
-VMINPHZrrkz 3022
-VMINPSYrm 3023
-VMINPSYrr 3024
-VMINPSZ 3025
-VMINPSZrm 3026
-VMINPSZrmb 3027
-VMINPSZrmbk 3028
-VMINPSZrmbkz 3029
-VMINPSZrmk 3030
-VMINPSZrmkz 3031
-VMINPSZrr 3032
-VMINPSZrrb 3033
-VMINPSZrrbk 3034
-VMINPSZrrbkz 3035
-VMINPSZrrk 3036
-VMINPSZrrkz 3037
-VMINPSrm 3038
-VMINPSrr 3039
-VMINSDZrm 3040
-VMINSDZrm_Int 3041
-VMINSDZrmk_Int 3042
-VMINSDZrmkz_Int 3043
-VMINSDZrr 3044
-VMINSDZrr_Int 3045
-VMINSDZrrb_Int 3046
-VMINSDZrrbk_Int 3047
-VMINSDZrrbkz_Int 3048
-VMINSDZrrk_Int 3049
-VMINSDZrrkz_Int 3050
-VMINSDrm 3051
-VMINSDrm_Int 3052
-VMINSDrr 3053
-VMINSDrr_Int 3054
-VMINSHZrm 3055
-VMINSHZrm_Int 3056
-VMINSHZrmk_Int 3057
-VMINSHZrmkz_Int 3058
-VMINSHZrr 3059
-VMINSHZrr_Int 3060
-VMINSHZrrb_Int 3061
-VMINSHZrrbk_Int 3062
-VMINSHZrrbkz_Int 3063
-VMINSHZrrk_Int 3064
-VMINSHZrrkz_Int 3065
-VMINSSZrm 3066
-VMINSSZrm_Int 3067
-VMINSSZrmk_Int 3068
-VMINSSZrmkz_Int 3069
-VMINSSZrr 3070
-VMINSSZrr_Int 3071
-VMINSSZrrb_Int 3072
-VMINSSZrrbk_Int 3073
-VMINSSZrrbkz_Int 3074
-VMINSSZrrk_Int 3075
-VMINSSZrrkz_Int 3076
-VMINSSrm 3077
-VMINSSrm_Int 3078
-VMINSSrr 3079
-VMINSSrr_Int 3080
-VMLAUNCH 3081
-VMLOAD 3082
-VMMCALL 3083
-VMOV 3084
-VMOVAPDYmr 3085
-VMOVAPDYrm 3086
-VMOVAPDYrr 3087
-VMOVAPDYrr_REV 3088
-VMOVAPDZ 3089
-VMOVAPDZmr 3090
-VMOVAPDZmrk 3091
-VMOVAPDZrm 3092
-VMOVAPDZrmk 3093
-VMOVAPDZrmkz 3094
-VMOVAPDZrr 3095
-VMOVAPDZrr_REV 3096
-VMOVAPDZrrk 3097
-VMOVAPDZrrk_REV 3098
-VMOVAPDZrrkz 3099
-VMOVAPDZrrkz_REV 3100
-VMOVAPDmr 3101
-VMOVAPDrm 3102
-VMOVAPDrr 3103
-VMOVAPDrr_REV 3104
-VMOVAPSYmr 3105
-VMOVAPSYrm 3106
-VMOVAPSYrr 3107
-VMOVAPSYrr_REV 3108
-VMOVAPSZ 3109
-VMOVAPSZmr 3110
-VMOVAPSZmrk 3111
-VMOVAPSZrm 3112
-VMOVAPSZrmk 3113
-VMOVAPSZrmkz 3114
-VMOVAPSZrr 3115
-VMOVAPSZrr_REV 3116
-VMOVAPSZrrk 3117
-VMOVAPSZrrk_REV 3118
-VMOVAPSZrrkz 3119
-VMOVAPSZrrkz_REV 3120
-VMOVAPSmr 3121
-VMOVAPSrm 3122
-VMOVAPSrr 3123
-VMOVAPSrr_REV 3124
-VMOVDDUPYrm 3125
-VMOVDDUPYrr 3126
-VMOVDDUPZ 3127
-VMOVDDUPZrm 3128
-VMOVDDUPZrmk 3129
-VMOVDDUPZrmkz 3130
-VMOVDDUPZrr 3131
-VMOVDDUPZrrk 3132
-VMOVDDUPZrrkz 3133
-VMOVDDUPrm 3134
-VMOVDDUPrr 3135
-VMOVDI 3136
-VMOVDQA 3137
-VMOVDQAYmr 3138
-VMOVDQAYrm 3139
-VMOVDQAYrr 3140
-VMOVDQAYrr_REV 3141
-VMOVDQAmr 3142
-VMOVDQArm 3143
-VMOVDQArr 3144
-VMOVDQArr_REV 3145
-VMOVDQU 3146
-VMOVDQUYmr 3147
-VMOVDQUYrm 3148
-VMOVDQUYrr 3149
-VMOVDQUYrr_REV 3150
-VMOVDQUmr 3151
-VMOVDQUrm 3152
-VMOVDQUrr 3153
-VMOVDQUrr_REV 3154
-VMOVHLPSZrr 3155
-VMOVHLPSrr 3156
-VMOVHPDZ 3157
-VMOVHPDmr 3158
-VMOVHPDrm 3159
-VMOVHPSZ 3160
-VMOVHPSmr 3161
-VMOVHPSrm 3162
-VMOVLHPSZrr 3163
-VMOVLHPSrr 3164
-VMOVLPDZ 3165
-VMOVLPDmr 3166
-VMOVLPDrm 3167
-VMOVLPSZ 3168
-VMOVLPSmr 3169
-VMOVLPSrm 3170
-VMOVMSKPDYrr 3171
-VMOVMSKPDrr 3172
-VMOVMSKPSYrr 3173
-VMOVMSKPSrr 3174
-VMOVNTDQAYrm 3175
-VMOVNTDQAZ 3176
-VMOVNTDQAZrm 3177
-VMOVNTDQArm 3178
-VMOVNTDQYmr 3179
-VMOVNTDQZ 3180
-VMOVNTDQZmr 3181
-VMOVNTDQmr 3182
-VMOVNTPDYmr 3183
-VMOVNTPDZ 3184
-VMOVNTPDZmr 3185
-VMOVNTPDmr 3186
-VMOVNTPSYmr 3187
-VMOVNTPSZ 3188
-VMOVNTPSZmr 3189
-VMOVNTPSmr 3190
-VMOVPDI 3191
-VMOVPQI 3192
-VMOVPQIto 3193
-VMOVQI 3194
-VMOVRSBZ 3195
-VMOVRSBZm 3196
-VMOVRSBZmk 3197
-VMOVRSBZmkz 3198
-VMOVRSDZ 3199
-VMOVRSDZm 3200
-VMOVRSDZmk 3201
-VMOVRSDZmkz 3202
-VMOVRSQZ 3203
-VMOVRSQZm 3204
-VMOVRSQZmk 3205
-VMOVRSQZmkz 3206
-VMOVRSWZ 3207
-VMOVRSWZm 3208
-VMOVRSWZmk 3209
-VMOVRSWZmkz 3210
-VMOVSDZmr 3211
-VMOVSDZmrk 3212
-VMOVSDZrm 3213
-VMOVSDZrm_alt 3214
-VMOVSDZrmk 3215
-VMOVSDZrmkz 3216
-VMOVSDZrr 3217
-VMOVSDZrr_REV 3218
-VMOVSDZrrk 3219
-VMOVSDZrrk_REV 3220
-VMOVSDZrrkz 3221
-VMOVSDZrrkz_REV 3222
-VMOVSDmr 3223
-VMOVSDrm 3224
-VMOVSDrm_alt 3225
-VMOVSDrr 3226
-VMOVSDrr_REV 3227
-VMOVSDto 3228
-VMOVSH 3229
-VMOVSHDUPYrm 3230
-VMOVSHDUPYrr 3231
-VMOVSHDUPZ 3232
-VMOVSHDUPZrm 3233
-VMOVSHDUPZrmk 3234
-VMOVSHDUPZrmkz 3235
-VMOVSHDUPZrr 3236
-VMOVSHDUPZrrk 3237
-VMOVSHDUPZrrkz 3238
-VMOVSHDUPrm 3239
-VMOVSHDUPrr 3240
-VMOVSHZmr 3241
-VMOVSHZmrk 3242
-VMOVSHZrm 3243
-VMOVSHZrm_alt 3244
-VMOVSHZrmk 3245
-VMOVSHZrmkz 3246
-VMOVSHZrr 3247
-VMOVSHZrr_REV 3248
-VMOVSHZrrk 3249
-VMOVSHZrrk_REV 3250
-VMOVSHZrrkz 3251
-VMOVSHZrrkz_REV 3252
-VMOVSHtoW 3253
-VMOVSLDUPYrm 3254
-VMOVSLDUPYrr 3255
-VMOVSLDUPZ 3256
-VMOVSLDUPZrm 3257
-VMOVSLDUPZrmk 3258
-VMOVSLDUPZrmkz 3259
-VMOVSLDUPZrr 3260
-VMOVSLDUPZrrk 3261
-VMOVSLDUPZrrkz 3262
-VMOVSLDUPrm 3263
-VMOVSLDUPrr 3264
-VMOVSS 3265
-VMOVSSZmr 3266
-VMOVSSZmrk 3267
-VMOVSSZrm 3268
-VMOVSSZrm_alt 3269
-VMOVSSZrmk 3270
-VMOVSSZrmkz 3271
-VMOVSSZrr 3272
-VMOVSSZrr_REV 3273
-VMOVSSZrrk 3274
-VMOVSSZrrk_REV 3275
-VMOVSSZrrkz 3276
-VMOVSSZrrkz_REV 3277
-VMOVSSmr 3278
-VMOVSSrm 3279
-VMOVSSrm_alt 3280
-VMOVSSrr 3281
-VMOVSSrr_REV 3282
-VMOVUPDYmr 3283
-VMOVUPDYrm 3284
-VMOVUPDYrr 3285
-VMOVUPDYrr_REV 3286
-VMOVUPDZ 3287
-VMOVUPDZmr 3288
-VMOVUPDZmrk 3289
-VMOVUPDZrm 3290
-VMOVUPDZrmk 3291
-VMOVUPDZrmkz 3292
-VMOVUPDZrr 3293
-VMOVUPDZrr_REV 3294
-VMOVUPDZrrk 3295
-VMOVUPDZrrk_REV 3296
-VMOVUPDZrrkz 3297
-VMOVUPDZrrkz_REV 3298
-VMOVUPDmr 3299
-VMOVUPDrm 3300
-VMOVUPDrr 3301
-VMOVUPDrr_REV 3302
-VMOVUPSYmr 3303
-VMOVUPSYrm 3304
-VMOVUPSYrr 3305
-VMOVUPSYrr_REV 3306
-VMOVUPSZ 3307
-VMOVUPSZmr 3308
-VMOVUPSZmrk 3309
-VMOVUPSZrm 3310
-VMOVUPSZrmk 3311
-VMOVUPSZrmkz 3312
-VMOVUPSZrr 3313
-VMOVUPSZrr_REV 3314
-VMOVUPSZrrk 3315
-VMOVUPSZrrk_REV 3316
-VMOVUPSZrrkz 3317
-VMOVUPSZrrkz_REV 3318
-VMOVUPSmr 3319
-VMOVUPSrm 3320
-VMOVUPSrr 3321
-VMOVUPSrr_REV 3322
-VMOVW 3323
-VMOVWmr 3324
-VMOVWrm 3325
-VMOVZPDILo 3326
-VMOVZPQILo 3327
-VMOVZPWILo 3328
-VMPSADBWYrmi 3329
-VMPSADBWYrri 3330
-VMPSADBWZ 3331
-VMPSADBWZrmi 3332
-VMPSADBWZrmik 3333
-VMPSADBWZrmikz 3334
-VMPSADBWZrri 3335
-VMPSADBWZrrik 3336
-VMPSADBWZrrikz 3337
-VMPSADBWrmi 3338
-VMPSADBWrri 3339
-VMPTRLDm 3340
-VMPTRSTm 3341
-VMREAD 3342
-VMRESUME 3343
-VMRUN 3344
-VMSAVE 3345
-VMULBF 3346
-VMULPDYrm 3347
-VMULPDYrr 3348
-VMULPDZ 3349
-VMULPDZrm 3350
-VMULPDZrmb 3351
-VMULPDZrmbk 3352
-VMULPDZrmbkz 3353
-VMULPDZrmk 3354
-VMULPDZrmkz 3355
-VMULPDZrr 3356
-VMULPDZrrb 3357
-VMULPDZrrbk 3358
-VMULPDZrrbkz 3359
-VMULPDZrrk 3360
-VMULPDZrrkz 3361
-VMULPDrm 3362
-VMULPDrr 3363
-VMULPHZ 3364
-VMULPHZrm 3365
-VMULPHZrmb 3366
-VMULPHZrmbk 3367
-VMULPHZrmbkz 3368
-VMULPHZrmk 3369
-VMULPHZrmkz 3370
-VMULPHZrr 3371
-VMULPHZrrb 3372
-VMULPHZrrbk 3373
-VMULPHZrrbkz 3374
-VMULPHZrrk 3375
-VMULPHZrrkz 3376
-VMULPSYrm 3377
-VMULPSYrr 3378
-VMULPSZ 3379
-VMULPSZrm 3380
-VMULPSZrmb 3381
-VMULPSZrmbk 3382
-VMULPSZrmbkz 3383
-VMULPSZrmk 3384
-VMULPSZrmkz 3385
-VMULPSZrr 3386
-VMULPSZrrb 3387
-VMULPSZrrbk 3388
-VMULPSZrrbkz 3389
-VMULPSZrrk 3390
-VMULPSZrrkz 3391
-VMULPSrm 3392
-VMULPSrr 3393
-VMULSDZrm 3394
-VMULSDZrm_Int 3395
-VMULSDZrmk_Int 3396
-VMULSDZrmkz_Int 3397
-VMULSDZrr 3398
-VMULSDZrr_Int 3399
-VMULSDZrrb_Int 3400
-VMULSDZrrbk_Int 3401
-VMULSDZrrbkz_Int 3402
-VMULSDZrrk_Int 3403
-VMULSDZrrkz_Int 3404
-VMULSDrm 3405
-VMULSDrm_Int 3406
-VMULSDrr 3407
-VMULSDrr_Int 3408
-VMULSHZrm 3409
-VMULSHZrm_Int 3410
-VMULSHZrmk_Int 3411
-VMULSHZrmkz_Int 3412
-VMULSHZrr 3413
-VMULSHZrr_Int 3414
-VMULSHZrrb_Int 3415
-VMULSHZrrbk_Int 3416
-VMULSHZrrbkz_Int 3417
-VMULSHZrrk_Int 3418
-VMULSHZrrkz_Int 3419
-VMULSSZrm 3420
-VMULSSZrm_Int 3421
-VMULSSZrmk_Int 3422
-VMULSSZrmkz_Int 3423
-VMULSSZrr 3424
-VMULSSZrr_Int 3425
-VMULSSZrrb_Int 3426
-VMULSSZrrbk_Int 3427
-VMULSSZrrbkz_Int 3428
-VMULSSZrrk_Int 3429
-VMULSSZrrkz_Int 3430
-VMULSSrm 3431
-VMULSSrm_Int 3432
-VMULSSrr 3433
-VMULSSrr_Int 3434
-VMWRITE 3435
-VMXOFF 3436
-VMXON 3437
-VORPDYrm 3438
-VORPDYrr 3439
-VORPDZ 3440
-VORPDZrm 3441
-VORPDZrmb 3442
-VORPDZrmbk 3443
-VORPDZrmbkz 3444
-VORPDZrmk 3445
-VORPDZrmkz 3446
-VORPDZrr 3447
-VORPDZrrk 3448
-VORPDZrrkz 3449
-VORPDrm 3450
-VORPDrr 3451
-VORPSYrm 3452
-VORPSYrr 3453
-VORPSZ 3454
-VORPSZrm 3455
-VORPSZrmb 3456
-VORPSZrmbk 3457
-VORPSZrmbkz 3458
-VORPSZrmk 3459
-VORPSZrmkz 3460
-VORPSZrr 3461
-VORPSZrrk 3462
-VORPSZrrkz 3463
-VORPSrm 3464
-VORPSrr 3465
-VP 3466
-VPABSBYrm 3467
-VPABSBYrr 3468
-VPABSBZ 3469
-VPABSBZrm 3470
-VPABSBZrmk 3471
-VPABSBZrmkz 3472
-VPABSBZrr 3473
-VPABSBZrrk 3474
-VPABSBZrrkz 3475
-VPABSBrm 3476
-VPABSBrr 3477
-VPABSDYrm 3478
-VPABSDYrr 3479
-VPABSDZ 3480
-VPABSDZrm 3481
-VPABSDZrmb 3482
-VPABSDZrmbk 3483
-VPABSDZrmbkz 3484
-VPABSDZrmk 3485
-VPABSDZrmkz 3486
-VPABSDZrr 3487
-VPABSDZrrk 3488
-VPABSDZrrkz 3489
-VPABSDrm 3490
-VPABSDrr 3491
-VPABSQZ 3492
-VPABSQZrm 3493
-VPABSQZrmb 3494
-VPABSQZrmbk 3495
-VPABSQZrmbkz 3496
-VPABSQZrmk 3497
-VPABSQZrmkz 3498
-VPABSQZrr 3499
-VPABSQZrrk 3500
-VPABSQZrrkz 3501
-VPABSWYrm 3502
-VPABSWYrr 3503
-VPABSWZ 3504
-VPABSWZrm 3505
-VPABSWZrmk 3506
-VPABSWZrmkz 3507
-VPABSWZrr 3508
-VPABSWZrrk 3509
-VPABSWZrrkz 3510
-VPABSWrm 3511
-VPABSWrr 3512
-VPACKSSDWYrm 3513
-VPACKSSDWYrr 3514
-VPACKSSDWZ 3515
-VPACKSSDWZrm 3516
-VPACKSSDWZrmb 3517
-VPACKSSDWZrmbk 3518
-VPACKSSDWZrmbkz 3519
-VPACKSSDWZrmk 3520
-VPACKSSDWZrmkz 3521
-VPACKSSDWZrr 3522
-VPACKSSDWZrrk 3523
-VPACKSSDWZrrkz 3524
-VPACKSSDWrm 3525
-VPACKSSDWrr 3526
-VPACKSSWBYrm 3527
-VPACKSSWBYrr 3528
-VPACKSSWBZ 3529
-VPACKSSWBZrm 3530
-VPACKSSWBZrmk 3531
-VPACKSSWBZrmkz 3532
-VPACKSSWBZrr 3533
-VPACKSSWBZrrk 3534
-VPACKSSWBZrrkz 3535
-VPACKSSWBrm 3536
-VPACKSSWBrr 3537
-VPACKUSDWYrm 3538
-VPACKUSDWYrr 3539
-VPACKUSDWZ 3540
-VPACKUSDWZrm 3541
-VPACKUSDWZrmb 3542
-VPACKUSDWZrmbk 3543
-VPACKUSDWZrmbkz 3544
-VPACKUSDWZrmk 3545
-VPACKUSDWZrmkz 3546
-VPACKUSDWZrr 3547
-VPACKUSDWZrrk 3548
-VPACKUSDWZrrkz 3549
-VPACKUSDWrm 3550
-VPACKUSDWrr 3551
-VPACKUSWBYrm 3552
-VPACKUSWBYrr 3553
-VPACKUSWBZ 3554
-VPACKUSWBZrm 3555
-VPACKUSWBZrmk 3556
-VPACKUSWBZrmkz 3557
-VPACKUSWBZrr 3558
-VPACKUSWBZrrk 3559
-VPACKUSWBZrrkz 3560
-VPACKUSWBrm 3561
-VPACKUSWBrr 3562
-VPADDBYrm 3563
-VPADDBYrr 3564
-VPADDBZ 3565
-VPADDBZrm 3566
-VPADDBZrmk 3567
-VPADDBZrmkz 3568
-VPADDBZrr 3569
-VPADDBZrrk 3570
-VPADDBZrrkz 3571
-VPADDBrm 3572
-VPADDBrr 3573
-VPADDDYrm 3574
-VPADDDYrr 3575
-VPADDDZ 3576
-VPADDDZrm 3577
-VPADDDZrmb 3578
-VPADDDZrmbk 3579
-VPADDDZrmbkz 3580
-VPADDDZrmk 3581
-VPADDDZrmkz 3582
-VPADDDZrr 3583
-VPADDDZrrk 3584
-VPADDDZrrkz 3585
-VPADDDrm 3586
-VPADDDrr 3587
-VPADDQYrm 3588
-VPADDQYrr 3589
-VPADDQZ 3590
-VPADDQZrm 3591
-VPADDQZrmb 3592
-VPADDQZrmbk 3593
-VPADDQZrmbkz 3594
-VPADDQZrmk 3595
-VPADDQZrmkz 3596
-VPADDQZrr 3597
-VPADDQZrrk 3598
-VPADDQZrrkz 3599
-VPADDQrm 3600
-VPADDQrr 3601
-VPADDSBYrm 3602
-VPADDSBYrr 3603
-VPADDSBZ 3604
-VPADDSBZrm 3605
-VPADDSBZrmk 3606
-VPADDSBZrmkz 3607
-VPADDSBZrr 3608
-VPADDSBZrrk 3609
-VPADDSBZrrkz 3610
-VPADDSBrm 3611
-VPADDSBrr 3612
-VPADDSWYrm 3613
-VPADDSWYrr 3614
-VPADDSWZ 3615
-VPADDSWZrm 3616
-VPADDSWZrmk 3617
-VPADDSWZrmkz 3618
-VPADDSWZrr 3619
-VPADDSWZrrk 3620
-VPADDSWZrrkz 3621
-VPADDSWrm 3622
-VPADDSWrr 3623
-VPADDUSBYrm 3624
-VPADDUSBYrr 3625
-VPADDUSBZ 3626
-VPADDUSBZrm 3627
-VPADDUSBZrmk 3628
-VPADDUSBZrmkz 3629
-VPADDUSBZrr 3630
-VPADDUSBZrrk 3631
-VPADDUSBZrrkz 3632
-VPADDUSBrm 3633
-VPADDUSBrr 3634
-VPADDUSWYrm 3635
-VPADDUSWYrr 3636
-VPADDUSWZ 3637
-VPADDUSWZrm 3638
-VPADDUSWZrmk 3639
-VPADDUSWZrmkz 3640
-VPADDUSWZrr 3641
-VPADDUSWZrrk 3642
-VPADDUSWZrrkz 3643
-VPADDUSWrm 3644
-VPADDUSWrr 3645
-VPADDWYrm 3646
-VPADDWYrr 3647
-VPADDWZ 3648
-VPADDWZrm 3649
-VPADDWZrmk 3650
-VPADDWZrmkz 3651
-VPADDWZrr 3652
-VPADDWZrrk 3653
-VPADDWZrrkz 3654
-VPADDWrm 3655
-VPADDWrr 3656
-VPALIGNRYrmi 3657
-VPALIGNRYrri 3658
-VPALIGNRZ 3659
-VPALIGNRZrmi 3660
-VPALIGNRZrmik 3661
-VPALIGNRZrmikz 3662
-VPALIGNRZrri 3663
-VPALIGNRZrrik 3664
-VPALIGNRZrrikz 3665
-VPALIGNRrmi 3666
-VPALIGNRrri 3667
-VPANDDZ 3668
-VPANDDZrm 3669
-VPANDDZrmb 3670
-VPANDDZrmbk 3671
-VPANDDZrmbkz 3672
-VPANDDZrmk 3673
-VPANDDZrmkz 3674
-VPANDDZrr 3675
-VPANDDZrrk 3676
-VPANDDZrrkz 3677
-VPANDNDZ 3678
-VPANDNDZrm 3679
-VPANDNDZrmb 3680
-VPANDNDZrmbk 3681
-VPANDNDZrmbkz 3682
-VPANDNDZrmk 3683
-VPANDNDZrmkz 3684
-VPANDNDZrr 3685
-VPANDNDZrrk 3686
-VPANDNDZrrkz 3687
-VPANDNQZ 3688
-VPANDNQZrm 3689
-VPANDNQZrmb 3690
-VPANDNQZrmbk 3691
-VPANDNQZrmbkz 3692
-VPANDNQZrmk 3693
-VPANDNQZrmkz 3694
-VPANDNQZrr 3695
-VPANDNQZrrk 3696
-VPANDNQZrrkz 3697
-VPANDNYrm 3698
-VPANDNYrr 3699
-VPANDNrm 3700
-VPANDNrr 3701
-VPANDQZ 3702
-VPANDQZrm 3703
-VPANDQZrmb 3704
-VPANDQZrmbk 3705
-VPANDQZrmbkz 3706
-VPANDQZrmk 3707
-VPANDQZrmkz 3708
-VPANDQZrr 3709
-VPANDQZrrk 3710
-VPANDQZrrkz 3711
-VPANDYrm 3712
-VPANDYrr 3713
-VPANDrm 3714
-VPANDrr 3715
-VPAVGBYrm 3716
-VPAVGBYrr 3717
-VPAVGBZ 3718
-VPAVGBZrm 3719
-VPAVGBZrmk 3720
-VPAVGBZrmkz 3721
-VPAVGBZrr 3722
-VPAVGBZrrk 3723
-VPAVGBZrrkz 3724
-VPAVGBrm 3725
-VPAVGBrr 3726
-VPAVGWYrm 3727
-VPAVGWYrr 3728
-VPAVGWZ 3729
-VPAVGWZrm 3730
-VPAVGWZrmk 3731
-VPAVGWZrmkz 3732
-VPAVGWZrr 3733
-VPAVGWZrrk 3734
-VPAVGWZrrkz 3735
-VPAVGWrm 3736
-VPAVGWrr 3737
-VPBLENDDYrmi 3738
-VPBLENDDYrri 3739
-VPBLENDDrmi 3740
-VPBLENDDrri 3741
-VPBLENDMBZ 3742
-VPBLENDMBZrm 3743
-VPBLENDMBZrmk 3744
-VPBLENDMBZrmkz 3745
-VPBLENDMBZrr 3746
-VPBLENDMBZrrk 3747
-VPBLENDMBZrrkz 3748
-VPBLENDMDZ 3749
-VPBLENDMDZrm 3750
-VPBLENDMDZrmb 3751
-VPBLENDMDZrmbk 3752
-VPBLENDMDZrmbkz 3753
-VPBLENDMDZrmk 3754
-VPBLENDMDZrmkz 3755
-VPBLENDMDZrr 3756
-VPBLENDMDZrrk 3757
-VPBLENDMDZrrkz 3758
-VPBLENDMQZ 3759
-VPBLENDMQZrm 3760
-VPBLENDMQZrmb 3761
-VPBLENDMQZrmbk 3762
-VPBLENDMQZrmbkz 3763
-VPBLENDMQZrmk 3764
-VPBLENDMQZrmkz 3765
-VPBLENDMQZrr 3766
-VPBLENDMQZrrk 3767
-VPBLENDMQZrrkz 3768
-VPBLENDMWZ 3769
-VPBLENDMWZrm 3770
-VPBLENDMWZrmk 3771
-VPBLENDMWZrmkz 3772
-VPBLENDMWZrr 3773
-VPBLENDMWZrrk 3774
-VPBLENDMWZrrkz 3775
-VPBLENDVBYrmr 3776
-VPBLENDVBYrrr 3777
-VPBLENDVBrmr 3778
-VPBLENDVBrrr 3779
-VPBLENDWYrmi 3780
-VPBLENDWYrri 3781
-VPBLENDWrmi 3782
-VPBLENDWrri 3783
-VPBROADCASTBYrm 3784
-VPBROADCASTBYrr 3785
-VPBROADCASTBZ 3786
-VPBROADCASTBZrm 3787
-VPBROADCASTBZrmk 3788
-VPBROADCASTBZrmkz 3789
-VPBROADCASTBZrr 3790
-VPBROADCASTBZrrk 3791
-VPBROADCASTBZrrkz 3792
-VPBROADCASTBrZ 3793
-VPBROADCASTBrZrr 3794
-VPBROADCASTBrZrrk 3795
-VPBROADCASTBrZrrkz 3796
-VPBROADCASTBrm 3797
-VPBROADCASTBrr 3798
-VPBROADCASTDYrm 3799
-VPBROADCASTDYrr 3800
-VPBROADCASTDZ 3801
-VPBROADCASTDZrm 3802
-VPBROADCASTDZrmk 3803
-VPBROADCASTDZrmkz 3804
-VPBROADCASTDZrr 3805
-VPBROADCASTDZrrk 3806
-VPBROADCASTDZrrkz 3807
-VPBROADCASTDrZ 3808
-VPBROADCASTDrZrr 3809
-VPBROADCASTDrZrrk 3810
-VPBROADCASTDrZrrkz 3811
-VPBROADCASTDrm 3812
-VPBROADCASTDrr 3813
-VPBROADCASTMB 3814
-VPBROADCASTMW 3815
-VPBROADCASTQYrm 3816
-VPBROADCASTQYrr 3817
-VPBROADCASTQZ 3818
-VPBROADCASTQZrm 3819
-VPBROADCASTQZrmk 3820
-VPBROADCASTQZrmkz 3821
-VPBROADCASTQZrr 3822
-VPBROADCASTQZrrk 3823
-VPBROADCASTQZrrkz 3824
-VPBROADCASTQrZ 3825
-VPBROADCASTQrZrr 3826
-VPBROADCASTQrZrrk 3827
-VPBROADCASTQrZrrkz 3828
-VPBROADCASTQrm 3829
-VPBROADCASTQrr 3830
-VPBROADCASTWYrm 3831
-VPBROADCASTWYrr 3832
-VPBROADCASTWZ 3833
-VPBROADCASTWZrm 3834
-VPBROADCASTWZrmk 3835
-VPBROADCASTWZrmkz 3836
-VPBROADCASTWZrr 3837
-VPBROADCASTWZrrk 3838
-VPBROADCASTWZrrkz 3839
-VPBROADCASTWrZ 3840
-VPBROADCASTWrZrr 3841
-VPBROADCASTWrZrrk 3842
-VPBROADCASTWrZrrkz 3843
-VPBROADCASTWrm 3844
-VPBROADCASTWrr 3845
-VPCLMULQDQYrmi 3846
-VPCLMULQDQYrri 3847
-VPCLMULQDQZ 3848
-VPCLMULQDQZrmi 3849
-VPCLMULQDQZrri 3850
-VPCLMULQDQrmi 3851
-VPCLMULQDQrri 3852
-VPCMOVYrmr 3853
-VPCMOVYrrm 3854
-VPCMOVYrrr 3855
-VPCMOVYrrr_REV 3856
-VPCMOVrmr 3857
-VPCMOVrrm 3858
-VPCMOVrrr 3859
-VPCMOVrrr_REV 3860
-VPCMPBZ 3861
-VPCMPBZrmi 3862
-VPCMPBZrmik 3863
-VPCMPBZrri 3864
-VPCMPBZrrik 3865
-VPCMPDZ 3866
-VPCMPDZrmbi 3867
-VPCMPDZrmbik 3868
-VPCMPDZrmi 3869
-VPCMPDZrmik 3870
-VPCMPDZrri 3871
-VPCMPDZrrik 3872
-VPCMPEQBYrm 3873
-VPCMPEQBYrr 3874
-VPCMPEQBZ 3875
-VPCMPEQBZrm 3876
-VPCMPEQBZrmk 3877
-VPCMPEQBZrr 3878
-VPCMPEQBZrrk 3879
-VPCMPEQBrm 3880
-VPCMPEQBrr 3881
-VPCMPEQDYrm 3882
-VPCMPEQDYrr 3883
-VPCMPEQDZ 3884
-VPCMPEQDZrm 3885
-VPCMPEQDZrmb 3886
-VPCMPEQDZrmbk 3887
-VPCMPEQDZrmk 3888
-VPCMPEQDZrr 3889
-VPCMPEQDZrrk 3890
-VPCMPEQDrm 3891
-VPCMPEQDrr 3892
-VPCMPEQQYrm 3893
-VPCMPEQQYrr 3894
-VPCMPEQQZ 3895
-VPCMPEQQZrm 3896
-VPCMPEQQZrmb 3897
-VPCMPEQQZrmbk 3898
-VPCMPEQQZrmk 3899
-VPCMPEQQZrr 3900
-VPCMPEQQZrrk 3901
-VPCMPEQQrm 3902
-VPCMPEQQrr 3903
-VPCMPEQWYrm 3904
-VPCMPEQWYrr 3905
-VPCMPEQWZ 3906
-VPCMPEQWZrm 3907
-VPCMPEQWZrmk 3908
-VPCMPEQWZrr 3909
-VPCMPEQWZrrk 3910
-VPCMPEQWrm 3911
-VPCMPEQWrr 3912
-VPCMPESTRIrmi 3913
-VPCMPESTRIrri 3914
-VPCMPESTRMrmi 3915
-VPCMPESTRMrri 3916
-VPCMPGTBYrm 3917
-VPCMPGTBYrr 3918
-VPCMPGTBZ 3919
-VPCMPGTBZrm 3920
-VPCMPGTBZrmk 3921
-VPCMPGTBZrr 3922
-VPCMPGTBZrrk 3923
-VPCMPGTBrm 3924
-VPCMPGTBrr 3925
-VPCMPGTDYrm 3926
-VPCMPGTDYrr 3927
-VPCMPGTDZ 3928
-VPCMPGTDZrm 3929
-VPCMPGTDZrmb 3930
-VPCMPGTDZrmbk 3931
-VPCMPGTDZrmk 3932
-VPCMPGTDZrr 3933
-VPCMPGTDZrrk 3934
-VPCMPGTDrm 3935
-VPCMPGTDrr 3936
-VPCMPGTQYrm 3937
-VPCMPGTQYrr 3938
-VPCMPGTQZ 3939
-VPCMPGTQZrm 3940
-VPCMPGTQZrmb 3941
-VPCMPGTQZrmbk 3942
-VPCMPGTQZrmk 3943
-VPCMPGTQZrr 3944
-VPCMPGTQZrrk 3945
-VPCMPGTQrm 3946
-VPCMPGTQrr 3947
-VPCMPGTWYrm 3948
-VPCMPGTWYrr 3949
-VPCMPGTWZ 3950
-VPCMPGTWZrm 3951
-VPCMPGTWZrmk 3952
-VPCMPGTWZrr 3953
-VPCMPGTWZrrk 3954
-VPCMPGTWrm 3955
-VPCMPGTWrr 3956
-VPCMPISTRIrmi 3957
-VPCMPISTRIrri 3958
-VPCMPISTRMrmi 3959
-VPCMPISTRMrri 3960
-VPCMPQZ 3961
-VPCMPQZrmbi 3962
-VPCMPQZrmbik 3963
-VPCMPQZrmi 3964
-VPCMPQZrmik 3965
-VPCMPQZrri 3966
-VPCMPQZrrik 3967
-VPCMPUBZ 3968
-VPCMPUBZrmi 3969
-VPCMPUBZrmik 3970
-VPCMPUBZrri 3971
-VPCMPUBZrrik 3972
-VPCMPUDZ 3973
-VPCMPUDZrmbi 3974
-VPCMPUDZrmbik 3975
-VPCMPUDZrmi 3976
-VPCMPUDZrmik 3977
-VPCMPUDZrri 3978
-VPCMPUDZrrik 3979
-VPCMPUQZ 3980
-VPCMPUQZrmbi 3981
-VPCMPUQZrmbik 3982
-VPCMPUQZrmi 3983
-VPCMPUQZrmik 3984
-VPCMPUQZrri 3985
-VPCMPUQZrrik 3986
-VPCMPUWZ 3987
-VPCMPUWZrmi 3988
-VPCMPUWZrmik 3989
-VPCMPUWZrri 3990
-VPCMPUWZrrik 3991
-VPCMPWZ 3992
-VPCMPWZrmi 3993
-VPCMPWZrmik 3994
-VPCMPWZrri 3995
-VPCMPWZrrik 3996
-VPCOMBmi 3997
-VPCOMBri 3998
-VPCOMDmi 3999
-VPCOMDri 4000
-VPCOMPRESSBZ 4001
-VPCOMPRESSBZmr 4002
-VPCOMPRESSBZmrk 4003
-VPCOMPRESSBZrr 4004
-VPCOMPRESSBZrrk 4005
-VPCOMPRESSBZrrkz 4006
-VPCOMPRESSDZ 4007
-VPCOMPRESSDZmr 4008
-VPCOMPRESSDZmrk 4009
-VPCOMPRESSDZrr 4010
-VPCOMPRESSDZrrk 4011
-VPCOMPRESSDZrrkz 4012
-VPCOMPRESSQZ 4013
-VPCOMPRESSQZmr 4014
-VPCOMPRESSQZmrk 4015
-VPCOMPRESSQZrr 4016
-VPCOMPRESSQZrrk 4017
-VPCOMPRESSQZrrkz 4018
-VPCOMPRESSWZ 4019
-VPCOMPRESSWZmr 4020
-VPCOMPRESSWZmrk 4021
-VPCOMPRESSWZrr 4022
-VPCOMPRESSWZrrk 4023
-VPCOMPRESSWZrrkz 4024
-VPCOMQmi 4025
-VPCOMQri 4026
-VPCOMUBmi 4027
-VPCOMUBri 4028
-VPCOMUDmi 4029
-VPCOMUDri 4030
-VPCOMUQmi 4031
-VPCOMUQri 4032
-VPCOMUWmi 4033
-VPCOMUWri 4034
-VPCOMWmi 4035
-VPCOMWri 4036
-VPCONFLICTDZ 4037
-VPCONFLICTDZrm 4038
-VPCONFLICTDZrmb 4039
-VPCONFLICTDZrmbk 4040
-VPCONFLICTDZrmbkz 4041
-VPCONFLICTDZrmk 4042
-VPCONFLICTDZrmkz 4043
-VPCONFLICTDZrr 4044
-VPCONFLICTDZrrk 4045
-VPCONFLICTDZrrkz 4046
-VPCONFLICTQZ 4047
-VPCONFLICTQZrm 4048
-VPCONFLICTQZrmb 4049
-VPCONFLICTQZrmbk 4050
-VPCONFLICTQZrmbkz 4051
-VPCONFLICTQZrmk 4052
-VPCONFLICTQZrmkz 4053
-VPCONFLICTQZrr 4054
-VPCONFLICTQZrrk 4055
-VPCONFLICTQZrrkz 4056
-VPDPBSSDSYrm 4057
-VPDPBSSDSYrr 4058
-VPDPBSSDSZ 4059
-VPDPBSSDSZrm 4060
-VPDPBSSDSZrmb 4061
-VPDPBSSDSZrmbk 4062
-VPDPBSSDSZrmbkz 4063
-VPDPBSSDSZrmk 4064
-VPDPBSSDSZrmkz 4065
-VPDPBSSDSZrr 4066
-VPDPBSSDSZrrk 4067
-VPDPBSSDSZrrkz 4068
-VPDPBSSDSrm 4069
-VPDPBSSDSrr 4070
-VPDPBSSDYrm 4071
-VPDPBSSDYrr 4072
-VPDPBSSDZ 4073
-VPDPBSSDZrm 4074
-VPDPBSSDZrmb 4075
-VPDPBSSDZrmbk 4076
-VPDPBSSDZrmbkz 4077
-VPDPBSSDZrmk 4078
-VPDPBSSDZrmkz 4079
-VPDPBSSDZrr 4080
-VPDPBSSDZrrk 4081
-VPDPBSSDZrrkz 4082
-VPDPBSSDrm 4083
-VPDPBSSDrr 4084
-VPDPBSUDSYrm 4085
-VPDPBSUDSYrr 4086
-VPDPBSUDSZ 4087
-VPDPBSUDSZrm 4088
-VPDPBSUDSZrmb 4089
-VPDPBSUDSZrmbk 4090
-VPDPBSUDSZrmbkz 4091
-VPDPBSUDSZrmk 4092
-VPDPBSUDSZrmkz 4093
-VPDPBSUDSZrr 4094
-VPDPBSUDSZrrk 4095
-VPDPBSUDSZrrkz 4096
-VPDPBSUDSrm 4097
-VPDPBSUDSrr 4098
-VPDPBSUDYrm 4099
-VPDPBSUDYrr 4100
-VPDPBSUDZ 4101
-VPDPBSUDZrm 4102
-VPDPBSUDZrmb 4103
-VPDPBSUDZrmbk 4104
-VPDPBSUDZrmbkz 4105
-VPDPBSUDZrmk 4106
-VPDPBSUDZrmkz 4107
-VPDPBSUDZrr 4108
-VPDPBSUDZrrk 4109
-VPDPBSUDZrrkz 4110
-VPDPBSUDrm 4111
-VPDPBSUDrr 4112
-VPDPBUSDSYrm 4113
-VPDPBUSDSYrr 4114
-VPDPBUSDSZ 4115
-VPDPBUSDSZrm 4116
-VPDPBUSDSZrmb 4117
-VPDPBUSDSZrmbk 4118
-VPDPBUSDSZrmbkz 4119
-VPDPBUSDSZrmk 4120
-VPDPBUSDSZrmkz 4121
-VPDPBUSDSZrr 4122
-VPDPBUSDSZrrk 4123
-VPDPBUSDSZrrkz 4124
-VPDPBUSDSrm 4125
-VPDPBUSDSrr 4126
-VPDPBUSDYrm 4127
-VPDPBUSDYrr 4128
-VPDPBUSDZ 4129
-VPDPBUSDZrm 4130
-VPDPBUSDZrmb 4131
-VPDPBUSDZrmbk 4132
-VPDPBUSDZrmbkz 4133
-VPDPBUSDZrmk 4134
-VPDPBUSDZrmkz 4135
-VPDPBUSDZrr 4136
-VPDPBUSDZrrk 4137
-VPDPBUSDZrrkz 4138
-VPDPBUSDrm 4139
-VPDPBUSDrr 4140
-VPDPBUUDSYrm 4141
-VPDPBUUDSYrr 4142
-VPDPBUUDSZ 4143
-VPDPBUUDSZrm 4144
-VPDPBUUDSZrmb 4145
-VPDPBUUDSZrmbk 4146
-VPDPBUUDSZrmbkz 4147
-VPDPBUUDSZrmk 4148
-VPDPBUUDSZrmkz 4149
-VPDPBUUDSZrr 4150
-VPDPBUUDSZrrk 4151
-VPDPBUUDSZrrkz 4152
-VPDPBUUDSrm 4153
-VPDPBUUDSrr 4154
-VPDPBUUDYrm 4155
-VPDPBUUDYrr 4156
-VPDPBUUDZ 4157
-VPDPBUUDZrm 4158
-VPDPBUUDZrmb 4159
-VPDPBUUDZrmbk 4160
-VPDPBUUDZrmbkz 4161
-VPDPBUUDZrmk 4162
-VPDPBUUDZrmkz 4163
-VPDPBUUDZrr 4164
-VPDPBUUDZrrk 4165
-VPDPBUUDZrrkz 4166
-VPDPBUUDrm 4167
-VPDPBUUDrr 4168
-VPDPWSSDSYrm 4169
-VPDPWSSDSYrr 4170
-VPDPWSSDSZ 4171
-VPDPWSSDSZrm 4172
-VPDPWSSDSZrmb 4173
-VPDPWSSDSZrmbk 4174
-VPDPWSSDSZrmbkz 4175
-VPDPWSSDSZrmk 4176
-VPDPWSSDSZrmkz 4177
-VPDPWSSDSZrr 4178
-VPDPWSSDSZrrk 4179
-VPDPWSSDSZrrkz 4180
-VPDPWSSDSrm 4181
-VPDPWSSDSrr 4182
-VPDPWSSDYrm 4183
-VPDPWSSDYrr 4184
-VPDPWSSDZ 4185
-VPDPWSSDZrm 4186
-VPDPWSSDZrmb 4187
-VPDPWSSDZrmbk 4188
-VPDPWSSDZrmbkz 4189
-VPDPWSSDZrmk 4190
-VPDPWSSDZrmkz 4191
-VPDPWSSDZrr 4192
-VPDPWSSDZrrk 4193
-VPDPWSSDZrrkz 4194
-VPDPWSSDrm 4195
-VPDPWSSDrr 4196
-VPDPWSUDSYrm 4197
-VPDPWSUDSYrr 4198
-VPDPWSUDSZ 4199
-VPDPWSUDSZrm 4200
-VPDPWSUDSZrmb 4201
-VPDPWSUDSZrmbk 4202
-VPDPWSUDSZrmbkz 4203
-VPDPWSUDSZrmk 4204
-VPDPWSUDSZrmkz 4205
-VPDPWSUDSZrr 4206
-VPDPWSUDSZrrk 4207
-VPDPWSUDSZrrkz 4208
-VPDPWSUDSrm 4209
-VPDPWSUDSrr 4210
-VPDPWSUDYrm 4211
-VPDPWSUDYrr 4212
-VPDPWSUDZ 4213
-VPDPWSUDZrm 4214
-VPDPWSUDZrmb 4215
-VPDPWSUDZrmbk 4216
-VPDPWSUDZrmbkz 4217
-VPDPWSUDZrmk 4218
-VPDPWSUDZrmkz 4219
-VPDPWSUDZrr 4220
-VPDPWSUDZrrk 4221
-VPDPWSUDZrrkz 4222
-VPDPWSUDrm 4223
-VPDPWSUDrr 4224
-VPDPWUSDSYrm 4225
-VPDPWUSDSYrr 4226
-VPDPWUSDSZ 4227
-VPDPWUSDSZrm 4228
-VPDPWUSDSZrmb 4229
-VPDPWUSDSZrmbk 4230
-VPDPWUSDSZrmbkz 4231
-VPDPWUSDSZrmk 4232
-VPDPWUSDSZrmkz 4233
-VPDPWUSDSZrr 4234
-VPDPWUSDSZrrk 4235
-VPDPWUSDSZrrkz 4236
-VPDPWUSDSrm 4237
-VPDPWUSDSrr 4238
-VPDPWUSDYrm 4239
-VPDPWUSDYrr 4240
-VPDPWUSDZ 4241
-VPDPWUSDZrm 4242
-VPDPWUSDZrmb 4243
-VPDPWUSDZrmbk 4244
-VPDPWUSDZrmbkz 4245
-VPDPWUSDZrmk 4246
-VPDPWUSDZrmkz 4247
-VPDPWUSDZrr 4248
-VPDPWUSDZrrk 4249
-VPDPWUSDZrrkz 4250
-VPDPWUSDrm 4251
-VPDPWUSDrr 4252
-VPDPWUUDSYrm 4253
-VPDPWUUDSYrr 4254
-VPDPWUUDSZ 4255
-VPDPWUUDSZrm 4256
-VPDPWUUDSZrmb 4257
-VPDPWUUDSZrmbk 4258
-VPDPWUUDSZrmbkz 4259
-VPDPWUUDSZrmk 4260
-VPDPWUUDSZrmkz 4261
-VPDPWUUDSZrr 4262
-VPDPWUUDSZrrk 4263
-VPDPWUUDSZrrkz 4264
-VPDPWUUDSrm 4265
-VPDPWUUDSrr 4266
-VPDPWUUDYrm 4267
-VPDPWUUDYrr 4268
-VPDPWUUDZ 4269
-VPDPWUUDZrm 4270
-VPDPWUUDZrmb 4271
-VPDPWUUDZrmbk 4272
-VPDPWUUDZrmbkz 4273
-VPDPWUUDZrmk 4274
-VPDPWUUDZrmkz 4275
-VPDPWUUDZrr 4276
-VPDPWUUDZrrk 4277
-VPDPWUUDZrrkz 4278
-VPDPWUUDrm 4279
-VPDPWUUDrr 4280
-VPERM 4281
-VPERMBZ 4282
-VPERMBZrm 4283
-VPERMBZrmk 4284
-VPERMBZrmkz 4285
-VPERMBZrr 4286
-VPERMBZrrk 4287
-VPERMBZrrkz 4288
-VPERMDYrm 4289
-VPERMDYrr 4290
-VPERMDZ 4291
-VPERMDZrm 4292
-VPERMDZrmb 4293
-VPERMDZrmbk 4294
-VPERMDZrmbkz 4295
-VPERMDZrmk 4296
-VPERMDZrmkz 4297
-VPERMDZrr 4298
-VPERMDZrrk 4299
-VPERMDZrrkz 4300
-VPERMI 4301
-VPERMIL 4302
-VPERMILPDYmi 4303
-VPERMILPDYri 4304
-VPERMILPDYrm 4305
-VPERMILPDYrr 4306
-VPERMILPDZ 4307
-VPERMILPDZmbi 4308
-VPERMILPDZmbik 4309
-VPERMILPDZmbikz 4310
-VPERMILPDZmi 4311
-VPERMILPDZmik 4312
-VPERMILPDZmikz 4313
-VPERMILPDZri 4314
-VPERMILPDZrik 4315
-VPERMILPDZrikz 4316
-VPERMILPDZrm 4317
-VPERMILPDZrmb 4318
-VPERMILPDZrmbk 4319
-VPERMILPDZrmbkz 4320
-VPERMILPDZrmk 4321
-VPERMILPDZrmkz 4322
-VPERMILPDZrr 4323
-VPERMILPDZrrk 4324
-VPERMILPDZrrkz 4325
-VPERMILPDmi 4326
-VPERMILPDri 4327
-VPERMILPDrm 4328
-VPERMILPDrr 4329
-VPERMILPSYmi 4330
-VPERMILPSYri 4331
-VPERMILPSYrm 4332
-VPERMILPSYrr 4333
-VPERMILPSZ 4334
-VPERMILPSZmbi 4335
-VPERMILPSZmbik 4336
-VPERMILPSZmbikz 4337
-VPERMILPSZmi 4338
-VPERMILPSZmik 4339
-VPERMILPSZmikz 4340
-VPERMILPSZri 4341
-VPERMILPSZrik 4342
-VPERMILPSZrikz 4343
-VPERMILPSZrm 4344
-VPERMILPSZrmb 4345
-VPERMILPSZrmbk 4346
-VPERMILPSZrmbkz 4347
-VPERMILPSZrmk 4348
-VPERMILPSZrmkz 4349
-VPERMILPSZrr 4350
-VPERMILPSZrrk 4351
-VPERMILPSZrrkz 4352
-VPERMILPSmi 4353
-VPERMILPSri 4354
-VPERMILPSrm 4355
-VPERMILPSrr 4356
-VPERMPDYmi 4357
-VPERMPDYri 4358
-VPERMPDZ 4359
-VPERMPDZmbi 4360
-VPERMPDZmbik 4361
-VPERMPDZmbikz 4362
-VPERMPDZmi 4363
-VPERMPDZmik 4364
-VPERMPDZmikz 4365
-VPERMPDZri 4366
-VPERMPDZrik 4367
-VPERMPDZrikz 4368
-VPERMPDZrm 4369
-VPERMPDZrmb 4370
-VPERMPDZrmbk 4371
-VPERMPDZrmbkz 4372
-VPERMPDZrmk 4373
-VPERMPDZrmkz 4374
-VPERMPDZrr 4375
-VPERMPDZrrk 4376
-VPERMPDZrrkz 4377
-VPERMPSYrm 4378
-VPERMPSYrr 4379
-VPERMPSZ 4380
-VPERMPSZrm 4381
-VPERMPSZrmb 4382
-VPERMPSZrmbk 4383
-VPERMPSZrmbkz 4384
-VPERMPSZrmk 4385
-VPERMPSZrmkz 4386
-VPERMPSZrr 4387
-VPERMPSZrrk 4388
-VPERMPSZrrkz 4389
-VPERMQYmi 4390
-VPERMQYri 4391
-VPERMQZ 4392
-VPERMQZmbi 4393
-VPERMQZmbik 4394
-VPERMQZmbikz 4395
-VPERMQZmi 4396
-VPERMQZmik 4397
-VPERMQZmikz 4398
-VPERMQZri 4399
-VPERMQZrik 4400
-VPERMQZrikz 4401
-VPERMQZrm 4402
-VPERMQZrmb 4403
-VPERMQZrmbk 4404
-VPERMQZrmbkz 4405
-VPERMQZrmk 4406
-VPERMQZrmkz 4407
-VPERMQZrr 4408
-VPERMQZrrk 4409
-VPERMQZrrkz 4410
-VPERMT 4411
-VPERMWZ 4412
-VPERMWZrm 4413
-VPERMWZrmk 4414
-VPERMWZrmkz 4415
-VPERMWZrr 4416
-VPERMWZrrk 4417
-VPERMWZrrkz 4418
-VPEXPANDBZ 4419
-VPEXPANDBZrm 4420
-VPEXPANDBZrmk 4421
-VPEXPANDBZrmkz 4422
-VPEXPANDBZrr 4423
-VPEXPANDBZrrk 4424
-VPEXPANDBZrrkz 4425
-VPEXPANDDZ 4426
-VPEXPANDDZrm 4427
-VPEXPANDDZrmk 4428
-VPEXPANDDZrmkz 4429
-VPEXPANDDZrr 4430
-VPEXPANDDZrrk 4431
-VPEXPANDDZrrkz 4432
-VPEXPANDQZ 4433
-VPEXPANDQZrm 4434
-VPEXPANDQZrmk 4435
-VPEXPANDQZrmkz 4436
-VPEXPANDQZrr 4437
-VPEXPANDQZrrk 4438
-VPEXPANDQZrrkz 4439
-VPEXPANDWZ 4440
-VPEXPANDWZrm 4441
-VPEXPANDWZrmk 4442
-VPEXPANDWZrmkz 4443
-VPEXPANDWZrr 4444
-VPEXPANDWZrrk 4445
-VPEXPANDWZrrkz 4446
-VPEXTRBZmri 4447
-VPEXTRBZrri 4448
-VPEXTRBmri 4449
-VPEXTRBrri 4450
-VPEXTRDZmri 4451
-VPEXTRDZrri 4452
-VPEXTRDmri 4453
-VPEXTRDrri 4454
-VPEXTRQZmri 4455
-VPEXTRQZrri 4456
-VPEXTRQmri 4457
-VPEXTRQrri 4458
-VPEXTRWZmri 4459
-VPEXTRWZrri 4460
-VPEXTRWZrri_REV 4461
-VPEXTRWmri 4462
-VPEXTRWrri 4463
-VPEXTRWrri_REV 4464
-VPGATHERDDYrm 4465
-VPGATHERDDZ 4466
-VPGATHERDDZrm 4467
-VPGATHERDDrm 4468
-VPGATHERDQYrm 4469
-VPGATHERDQZ 4470
-VPGATHERDQZrm 4471
-VPGATHERDQrm 4472
-VPGATHERQDYrm 4473
-VPGATHERQDZ 4474
-VPGATHERQDZrm 4475
-VPGATHERQDrm 4476
-VPGATHERQQYrm 4477
-VPGATHERQQZ 4478
-VPGATHERQQZrm 4479
-VPGATHERQQrm 4480
-VPHADDBDrm 4481
-VPHADDBDrr 4482
-VPHADDBQrm 4483
-VPHADDBQrr 4484
-VPHADDBWrm 4485
-VPHADDBWrr 4486
-VPHADDDQrm 4487
-VPHADDDQrr 4488
-VPHADDDYrm 4489
-VPHADDDYrr 4490
-VPHADDDrm 4491
-VPHADDDrr 4492
-VPHADDSWYrm 4493
-VPHADDSWYrr 4494
-VPHADDSWrm 4495
-VPHADDSWrr 4496
-VPHADDUBDrm 4497
-VPHADDUBDrr 4498
-VPHADDUBQrm 4499
-VPHADDUBQrr 4500
-VPHADDUBWrm 4501
-VPHADDUBWrr 4502
-VPHADDUDQrm 4503
-VPHADDUDQrr 4504
-VPHADDUWDrm 4505
-VPHADDUWDrr 4506
-VPHADDUWQrm 4507
-VPHADDUWQrr 4508
-VPHADDWDrm 4509
-VPHADDWDrr 4510
-VPHADDWQrm 4511
-VPHADDWQrr 4512
-VPHADDWYrm 4513
-VPHADDWYrr 4514
-VPHADDWrm 4515
-VPHADDWrr 4516
-VPHMINPOSUWrm 4517
-VPHMINPOSUWrr 4518
-VPHSUBBWrm 4519
-VPHSUBBWrr 4520
-VPHSUBDQrm 4521
-VPHSUBDQrr 4522
-VPHSUBDYrm 4523
-VPHSUBDYrr 4524
-VPHSUBDrm 4525
-VPHSUBDrr 4526
-VPHSUBSWYrm 4527
-VPHSUBSWYrr 4528
-VPHSUBSWrm 4529
-VPHSUBSWrr 4530
-VPHSUBWDrm 4531
-VPHSUBWDrr 4532
-VPHSUBWYrm 4533
-VPHSUBWYrr 4534
-VPHSUBWrm 4535
-VPHSUBWrr 4536
-VPINSRBZrmi 4537
-VPINSRBZrri 4538
-VPINSRBrmi 4539
-VPINSRBrri 4540
-VPINSRDZrmi 4541
-VPINSRDZrri 4542
-VPINSRDrmi 4543
-VPINSRDrri 4544
-VPINSRQZrmi 4545
-VPINSRQZrri 4546
-VPINSRQrmi 4547
-VPINSRQrri 4548
-VPINSRWZrmi 4549
-VPINSRWZrri 4550
-VPINSRWrmi 4551
-VPINSRWrri 4552
-VPLZCNTDZ 4553
-VPLZCNTDZrm 4554
-VPLZCNTDZrmb 4555
-VPLZCNTDZrmbk 4556
-VPLZCNTDZrmbkz 4557
-VPLZCNTDZrmk 4558
-VPLZCNTDZrmkz 4559
-VPLZCNTDZrr 4560
-VPLZCNTDZrrk 4561
-VPLZCNTDZrrkz 4562
-VPLZCNTQZ 4563
-VPLZCNTQZrm 4564
-VPLZCNTQZrmb 4565
-VPLZCNTQZrmbk 4566
-VPLZCNTQZrmbkz 4567
-VPLZCNTQZrmk 4568
-VPLZCNTQZrmkz 4569
-VPLZCNTQZrr 4570
-VPLZCNTQZrrk 4571
-VPLZCNTQZrrkz 4572
-VPMACSDDrm 4573
-VPMACSDDrr 4574
-VPMACSDQHrm 4575
-VPMACSDQHrr 4576
-VPMACSDQLrm 4577
-VPMACSDQLrr 4578
-VPMACSSDDrm 4579
-VPMACSSDDrr 4580
-VPMACSSDQHrm 4581
-VPMACSSDQHrr 4582
-VPMACSSDQLrm 4583
-VPMACSSDQLrr 4584
-VPMACSSWDrm 4585
-VPMACSSWDrr 4586
-VPMACSSWWrm 4587
-VPMACSSWWrr 4588
-VPMACSWDrm 4589
-VPMACSWDrr 4590
-VPMACSWWrm 4591
-VPMACSWWrr 4592
-VPMADCSSWDrm 4593
-VPMADCSSWDrr 4594
-VPMADCSWDrm 4595
-VPMADCSWDrr 4596
-VPMADD 4597
-VPMADDUBSWYrm 4598
-VPMADDUBSWYrr 4599
-VPMADDUBSWZ 4600
-VPMADDUBSWZrm 4601
-VPMADDUBSWZrmk 4602
-VPMADDUBSWZrmkz 4603
-VPMADDUBSWZrr 4604
-VPMADDUBSWZrrk 4605
-VPMADDUBSWZrrkz 4606
-VPMADDUBSWrm 4607
-VPMADDUBSWrr 4608
-VPMADDWDYrm 4609
-VPMADDWDYrr 4610
-VPMADDWDZ 4611
-VPMADDWDZrm 4612
-VPMADDWDZrmk 4613
-VPMADDWDZrmkz 4614
-VPMADDWDZrr 4615
-VPMADDWDZrrk 4616
-VPMADDWDZrrkz 4617
-VPMADDWDrm 4618
-VPMADDWDrr 4619
-VPMASKMOVDYmr 4620
-VPMASKMOVDYrm 4621
-VPMASKMOVDmr 4622
-VPMASKMOVDrm 4623
-VPMASKMOVQYmr 4624
-VPMASKMOVQYrm 4625
-VPMASKMOVQmr 4626
-VPMASKMOVQrm 4627
-VPMAXSBYrm 4628
-VPMAXSBYrr 4629
-VPMAXSBZ 4630
-VPMAXSBZrm 4631
-VPMAXSBZrmk 4632
-VPMAXSBZrmkz 4633
-VPMAXSBZrr 4634
-VPMAXSBZrrk 4635
-VPMAXSBZrrkz 4636
-VPMAXSBrm 4637
-VPMAXSBrr 4638
-VPMAXSDYrm 4639
-VPMAXSDYrr 4640
-VPMAXSDZ 4641
-VPMAXSDZrm 4642
-VPMAXSDZrmb 4643
-VPMAXSDZrmbk 4644
-VPMAXSDZrmbkz 4645
-VPMAXSDZrmk 4646
-VPMAXSDZrmkz 4647
-VPMAXSDZrr 4648
-VPMAXSDZrrk 4649
-VPMAXSDZrrkz 4650
-VPMAXSDrm 4651
-VPMAXSDrr 4652
-VPMAXSQZ 4653
-VPMAXSQZrm 4654
-VPMAXSQZrmb 4655
-VPMAXSQZrmbk 4656
-VPMAXSQZrmbkz 4657
-VPMAXSQZrmk 4658
-VPMAXSQZrmkz 4659
-VPMAXSQZrr 4660
-VPMAXSQZrrk 4661
-VPMAXSQZrrkz 4662
-VPMAXSWYrm 4663
-VPMAXSWYrr 4664
-VPMAXSWZ 4665
-VPMAXSWZrm 4666
-VPMAXSWZrmk 4667
-VPMAXSWZrmkz 4668
-VPMAXSWZrr 4669
-VPMAXSWZrrk 4670
-VPMAXSWZrrkz 4671
-VPMAXSWrm 4672
-VPMAXSWrr 4673
-VPMAXUBYrm 4674
-VPMAXUBYrr 4675
-VPMAXUBZ 4676
-VPMAXUBZrm 4677
-VPMAXUBZrmk 4678
-VPMAXUBZrmkz 4679
-VPMAXUBZrr 4680
-VPMAXUBZrrk 4681
-VPMAXUBZrrkz 4682
-VPMAXUBrm 4683
-VPMAXUBrr 4684
-VPMAXUDYrm 4685
-VPMAXUDYrr 4686
-VPMAXUDZ 4687
-VPMAXUDZrm 4688
-VPMAXUDZrmb 4689
-VPMAXUDZrmbk 4690
-VPMAXUDZrmbkz 4691
-VPMAXUDZrmk 4692
-VPMAXUDZrmkz 4693
-VPMAXUDZrr 4694
-VPMAXUDZrrk 4695
-VPMAXUDZrrkz 4696
-VPMAXUDrm 4697
-VPMAXUDrr 4698
-VPMAXUQZ 4699
-VPMAXUQZrm 4700
-VPMAXUQZrmb 4701
-VPMAXUQZrmbk 4702
-VPMAXUQZrmbkz 4703
-VPMAXUQZrmk 4704
-VPMAXUQZrmkz 4705
-VPMAXUQZrr 4706
-VPMAXUQZrrk 4707
-VPMAXUQZrrkz 4708
-VPMAXUWYrm 4709
-VPMAXUWYrr 4710
-VPMAXUWZ 4711
-VPMAXUWZrm 4712
-VPMAXUWZrmk 4713
-VPMAXUWZrmkz 4714
-VPMAXUWZrr 4715
-VPMAXUWZrrk 4716
-VPMAXUWZrrkz 4717
-VPMAXUWrm 4718
-VPMAXUWrr 4719
-VPMINSBYrm 4720
-VPMINSBYrr 4721
-VPMINSBZ 4722
-VPMINSBZrm 4723
-VPMINSBZrmk 4724
-VPMINSBZrmkz 4725
-VPMINSBZrr 4726
-VPMINSBZrrk 4727
-VPMINSBZrrkz 4728
-VPMINSBrm 4729
-VPMINSBrr 4730
-VPMINSDYrm 4731
-VPMINSDYrr 4732
-VPMINSDZ 4733
-VPMINSDZrm 4734
-VPMINSDZrmb 4735
-VPMINSDZrmbk 4736
-VPMINSDZrmbkz 4737
-VPMINSDZrmk 4738
-VPMINSDZrmkz 4739
-VPMINSDZrr 4740
-VPMINSDZrrk 4741
-VPMINSDZrrkz 4742
-VPMINSDrm 4743
-VPMINSDrr 4744
-VPMINSQZ 4745
-VPMINSQZrm 4746
-VPMINSQZrmb 4747
-VPMINSQZrmbk 4748
-VPMINSQZrmbkz 4749
-VPMINSQZrmk 4750
-VPMINSQZrmkz 4751
-VPMINSQZrr 4752
-VPMINSQZrrk 4753
-VPMINSQZrrkz 4754
-VPMINSWYrm 4755
-VPMINSWYrr 4756
-VPMINSWZ 4757
-VPMINSWZrm 4758
-VPMINSWZrmk 4759
-VPMINSWZrmkz 4760
-VPMINSWZrr 4761
-VPMINSWZrrk 4762
-VPMINSWZrrkz 4763
-VPMINSWrm 4764
-VPMINSWrr 4765
-VPMINUBYrm 4766
-VPMINUBYrr 4767
-VPMINUBZ 4768
-VPMINUBZrm 4769
-VPMINUBZrmk 4770
-VPMINUBZrmkz 4771
-VPMINUBZrr 4772
-VPMINUBZrrk 4773
-VPMINUBZrrkz 4774
-VPMINUBrm 4775
-VPMINUBrr 4776
-VPMINUDYrm 4777
-VPMINUDYrr 4778
-VPMINUDZ 4779
-VPMINUDZrm 4780
-VPMINUDZrmb 4781
-VPMINUDZrmbk 4782
-VPMINUDZrmbkz 4783
-VPMINUDZrmk 4784
-VPMINUDZrmkz 4785
-VPMINUDZrr 4786
-VPMINUDZrrk 4787
-VPMINUDZrrkz 4788
-VPMINUDrm 4789
-VPMINUDrr 4790
-VPMINUQZ 4791
-VPMINUQZrm 4792
-VPMINUQZrmb 4793
-VPMINUQZrmbk 4794
-VPMINUQZrmbkz 4795
-VPMINUQZrmk 4796
-VPMINUQZrmkz 4797
-VPMINUQZrr 4798
-VPMINUQZrrk 4799
-VPMINUQZrrkz 4800
-VPMINUWYrm 4801
-VPMINUWYrr 4802
-VPMINUWZ 4803
-VPMINUWZrm 4804
-VPMINUWZrmk 4805
-VPMINUWZrmkz 4806
-VPMINUWZrr 4807
-VPMINUWZrrk 4808
-VPMINUWZrrkz 4809
-VPMINUWrm 4810
-VPMINUWrr 4811
-VPMOVB 4812
-VPMOVD 4813
-VPMOVDBZ 4814
-VPMOVDBZmr 4815
-VPMOVDBZmrk 4816
-VPMOVDBZrr 4817
-VPMOVDBZrrk 4818
-VPMOVDBZrrkz 4819
-VPMOVDWZ 4820
-VPMOVDWZmr 4821
-VPMOVDWZmrk 4822
-VPMOVDWZrr 4823
-VPMOVDWZrrk 4824
-VPMOVDWZrrkz 4825
-VPMOVM 4826
-VPMOVMSKBYrr 4827
-VPMOVMSKBrr 4828
-VPMOVQ 4829
-VPMOVQBZ 4830
-VPMOVQBZmr 4831
-VPMOVQBZmrk 4832
-VPMOVQBZrr 4833
-VPMOVQBZrrk 4834
-VPMOVQBZrrkz 4835
-VPMOVQDZ 4836
-VPMOVQDZmr 4837
-VPMOVQDZmrk 4838
-VPMOVQDZrr 4839
-VPMOVQDZrrk 4840
-VPMOVQDZrrkz 4841
-VPMOVQWZ 4842
-VPMOVQWZmr 4843
-VPMOVQWZmrk 4844
-VPMOVQWZrr 4845
-VPMOVQWZrrk 4846
-VPMOVQWZrrkz 4847
-VPMOVSDBZ 4848
-VPMOVSDBZmr 4849
-VPMOVSDBZmrk 4850
-VPMOVSDBZrr 4851
-VPMOVSDBZrrk 4852
-VPMOVSDBZrrkz 4853
-VPMOVSDWZ 4854
-VPMOVSDWZmr 4855
-VPMOVSDWZmrk 4856
-VPMOVSDWZrr 4857
-VPMOVSDWZrrk 4858
-VPMOVSDWZrrkz 4859
-VPMOVSQBZ 4860
-VPMOVSQBZmr 4861
-VPMOVSQBZmrk 4862
-VPMOVSQBZrr 4863
-VPMOVSQBZrrk 4864
-VPMOVSQBZrrkz 4865
-VPMOVSQDZ 4866
-VPMOVSQDZmr 4867
-VPMOVSQDZmrk 4868
-VPMOVSQDZrr 4869
-VPMOVSQDZrrk 4870
-VPMOVSQDZrrkz 4871
-VPMOVSQWZ 4872
-VPMOVSQWZmr 4873
-VPMOVSQWZmrk 4874
-VPMOVSQWZrr 4875
-VPMOVSQWZrrk 4876
-VPMOVSQWZrrkz 4877
-VPMOVSWBZ 4878
-VPMOVSWBZmr 4879
-VPMOVSWBZmrk 4880
-VPMOVSWBZrr 4881
-VPMOVSWBZrrk 4882
-VPMOVSWBZrrkz 4883
-VPMOVSXBDYrm 4884
-VPMOVSXBDYrr 4885
-VPMOVSXBDZ 4886
-VPMOVSXBDZrm 4887
-VPMOVSXBDZrmk 4888
-VPMOVSXBDZrmkz 4889
-VPMOVSXBDZrr 4890
-VPMOVSXBDZrrk 4891
-VPMOVSXBDZrrkz 4892
-VPMOVSXBDrm 4893
-VPMOVSXBDrr 4894
-VPMOVSXBQYrm 4895
-VPMOVSXBQYrr 4896
-VPMOVSXBQZ 4897
-VPMOVSXBQZrm 4898
-VPMOVSXBQZrmk 4899
-VPMOVSXBQZrmkz 4900
-VPMOVSXBQZrr 4901
-VPMOVSXBQZrrk 4902
-VPMOVSXBQZrrkz 4903
-VPMOVSXBQrm 4904
-VPMOVSXBQrr 4905
-VPMOVSXBWYrm 4906
-VPMOVSXBWYrr 4907
-VPMOVSXBWZ 4908
-VPMOVSXBWZrm 4909
-VPMOVSXBWZrmk 4910
-VPMOVSXBWZrmkz 4911
-VPMOVSXBWZrr 4912
-VPMOVSXBWZrrk 4913
-VPMOVSXBWZrrkz 4914
-VPMOVSXBWrm 4915
-VPMOVSXBWrr 4916
-VPMOVSXDQYrm 4917
-VPMOVSXDQYrr 4918
-VPMOVSXDQZ 4919
-VPMOVSXDQZrm 4920
-VPMOVSXDQZrmk 4921
-VPMOVSXDQZrmkz 4922
-VPMOVSXDQZrr 4923
-VPMOVSXDQZrrk 4924
-VPMOVSXDQZrrkz 4925
-VPMOVSXDQrm 4926
-VPMOVSXDQrr 4927
-VPMOVSXWDYrm 4928
-VPMOVSXWDYrr 4929
-VPMOVSXWDZ 4930
-VPMOVSXWDZrm 4931
-VPMOVSXWDZrmk 4932
-VPMOVSXWDZrmkz 4933
-VPMOVSXWDZrr 4934
-VPMOVSXWDZrrk 4935
-VPMOVSXWDZrrkz 4936
-VPMOVSXWDrm 4937
-VPMOVSXWDrr 4938
-VPMOVSXWQYrm 4939
-VPMOVSXWQYrr 4940
-VPMOVSXWQZ 4941
-VPMOVSXWQZrm 4942
-VPMOVSXWQZrmk 4943
-VPMOVSXWQZrmkz 4944
-VPMOVSXWQZrr 4945
-VPMOVSXWQZrrk 4946
-VPMOVSXWQZrrkz 4947
-VPMOVSXWQrm 4948
-VPMOVSXWQrr 4949
-VPMOVUSDBZ 4950
-VPMOVUSDBZmr 4951
-VPMOVUSDBZmrk 4952
-VPMOVUSDBZrr 4953
-VPMOVUSDBZrrk 4954
-VPMOVUSDBZrrkz 4955
-VPMOVUSDWZ 4956
-VPMOVUSDWZmr 4957
-VPMOVUSDWZmrk 4958
-VPMOVUSDWZrr 4959
-VPMOVUSDWZrrk 4960
-VPMOVUSDWZrrkz 4961
-VPMOVUSQBZ 4962
-VPMOVUSQBZmr 4963
-VPMOVUSQBZmrk 4964
-VPMOVUSQBZrr 4965
-VPMOVUSQBZrrk 4966
-VPMOVUSQBZrrkz 4967
-VPMOVUSQDZ 4968
-VPMOVUSQDZmr 4969
-VPMOVUSQDZmrk 4970
-VPMOVUSQDZrr 4971
-VPMOVUSQDZrrk 4972
-VPMOVUSQDZrrkz 4973
-VPMOVUSQWZ 4974
-VPMOVUSQWZmr 4975
-VPMOVUSQWZmrk 4976
-VPMOVUSQWZrr 4977
-VPMOVUSQWZrrk 4978
-VPMOVUSQWZrrkz 4979
-VPMOVUSWBZ 4980
-VPMOVUSWBZmr 4981
-VPMOVUSWBZmrk 4982
-VPMOVUSWBZrr 4983
-VPMOVUSWBZrrk 4984
-VPMOVUSWBZrrkz 4985
-VPMOVW 4986
-VPMOVWBZ 4987
-VPMOVWBZmr 4988
-VPMOVWBZmrk 4989
-VPMOVWBZrr 4990
-VPMOVWBZrrk 4991
-VPMOVWBZrrkz 4992
-VPMOVZXBDYrm 4993
-VPMOVZXBDYrr 4994
-VPMOVZXBDZ 4995
-VPMOVZXBDZrm 4996
-VPMOVZXBDZrmk 4997
-VPMOVZXBDZrmkz 4998
-VPMOVZXBDZrr 4999
-VPMOVZXBDZrrk 5000
-VPMOVZXBDZrrkz 5001
-VPMOVZXBDrm 5002
-VPMOVZXBDrr 5003
-VPMOVZXBQYrm 5004
-VPMOVZXBQYrr 5005
-VPMOVZXBQZ 5006
-VPMOVZXBQZrm 5007
-VPMOVZXBQZrmk 5008
-VPMOVZXBQZrmkz 5009
-VPMOVZXBQZrr 5010
-VPMOVZXBQZrrk 5011
-VPMOVZXBQZrrkz 5012
-VPMOVZXBQrm 5013
-VPMOVZXBQrr 5014
-VPMOVZXBWYrm 5015
-VPMOVZXBWYrr 5016
-VPMOVZXBWZ 5017
-VPMOVZXBWZrm 5018
-VPMOVZXBWZrmk 5019
-VPMOVZXBWZrmkz 5020
-VPMOVZXBWZrr 5021
-VPMOVZXBWZrrk 5022
-VPMOVZXBWZrrkz 5023
-VPMOVZXBWrm 5024
-VPMOVZXBWrr 5025
-VPMOVZXDQYrm 5026
-VPMOVZXDQYrr 5027
-VPMOVZXDQZ 5028
-VPMOVZXDQZrm 5029
-VPMOVZXDQZrmk 5030
-VPMOVZXDQZrmkz 5031
-VPMOVZXDQZrr 5032
-VPMOVZXDQZrrk 5033
-VPMOVZXDQZrrkz 5034
-VPMOVZXDQrm 5035
-VPMOVZXDQrr 5036
-VPMOVZXWDYrm 5037
-VPMOVZXWDYrr 5038
-VPMOVZXWDZ 5039
-VPMOVZXWDZrm 5040
-VPMOVZXWDZrmk 5041
-VPMOVZXWDZrmkz 5042
-VPMOVZXWDZrr 5043
-VPMOVZXWDZrrk 5044
-VPMOVZXWDZrrkz 5045
-VPMOVZXWDrm 5046
-VPMOVZXWDrr 5047
-VPMOVZXWQYrm 5048
-VPMOVZXWQYrr 5049
-VPMOVZXWQZ 5050
-VPMOVZXWQZrm 5051
-VPMOVZXWQZrmk 5052
-VPMOVZXWQZrmkz 5053
-VPMOVZXWQZrr 5054
-VPMOVZXWQZrrk 5055
-VPMOVZXWQZrrkz 5056
-VPMOVZXWQrm 5057
-VPMOVZXWQrr 5058
-VPMULDQYrm 5059
-VPMULDQYrr 5060
-VPMULDQZ 5061
-VPMULDQZrm 5062
-VPMULDQZrmb 5063
-VPMULDQZrmbk 5064
-VPMULDQZrmbkz 5065
-VPMULDQZrmk 5066
-VPMULDQZrmkz 5067
-VPMULDQZrr 5068
-VPMULDQZrrk 5069
-VPMULDQZrrkz 5070
-VPMULDQrm 5071
-VPMULDQrr 5072
-VPMULHRSWYrm 5073
-VPMULHRSWYrr 5074
-VPMULHRSWZ 5075
-VPMULHRSWZrm 5076
-VPMULHRSWZrmk 5077
-VPMULHRSWZrmkz 5078
-VPMULHRSWZrr 5079
-VPMULHRSWZrrk 5080
-VPMULHRSWZrrkz 5081
-VPMULHRSWrm 5082
-VPMULHRSWrr 5083
-VPMULHUWYrm 5084
-VPMULHUWYrr 5085
-VPMULHUWZ 5086
-VPMULHUWZrm 5087
-VPMULHUWZrmk 5088
-VPMULHUWZrmkz 5089
-VPMULHUWZrr 5090
-VPMULHUWZrrk 5091
-VPMULHUWZrrkz 5092
-VPMULHUWrm 5093
-VPMULHUWrr 5094
-VPMULHWYrm 5095
-VPMULHWYrr 5096
-VPMULHWZ 5097
-VPMULHWZrm 5098
-VPMULHWZrmk 5099
-VPMULHWZrmkz 5100
-VPMULHWZrr 5101
-VPMULHWZrrk 5102
-VPMULHWZrrkz 5103
-VPMULHWrm 5104
-VPMULHWrr 5105
-VPMULLDYrm 5106
-VPMULLDYrr 5107
-VPMULLDZ 5108
-VPMULLDZrm 5109
-VPMULLDZrmb 5110
-VPMULLDZrmbk 5111
-VPMULLDZrmbkz 5112
-VPMULLDZrmk 5113
-VPMULLDZrmkz 5114
-VPMULLDZrr 5115
-VPMULLDZrrk 5116
-VPMULLDZrrkz 5117
-VPMULLDrm 5118
-VPMULLDrr 5119
-VPMULLQZ 5120
-VPMULLQZrm 5121
-VPMULLQZrmb 5122
-VPMULLQZrmbk 5123
-VPMULLQZrmbkz 5124
-VPMULLQZrmk 5125
-VPMULLQZrmkz 5126
-VPMULLQZrr 5127
-VPMULLQZrrk 5128
-VPMULLQZrrkz 5129
-VPMULLWYrm 5130
-VPMULLWYrr 5131
-VPMULLWZ 5132
-VPMULLWZrm 5133
-VPMULLWZrmk 5134
-VPMULLWZrmkz 5135
-VPMULLWZrr 5136
-VPMULLWZrrk 5137
-VPMULLWZrrkz 5138
-VPMULLWrm 5139
-VPMULLWrr 5140
-VPMULTISHIFTQBZ 5141
-VPMULTISHIFTQBZrm 5142
-VPMULTISHIFTQBZrmb 5143
-VPMULTISHIFTQBZrmbk 5144
-VPMULTISHIFTQBZrmbkz 5145
-VPMULTISHIFTQBZrmk 5146
-VPMULTISHIFTQBZrmkz 5147
-VPMULTISHIFTQBZrr 5148
-VPMULTISHIFTQBZrrk 5149
-VPMULTISHIFTQBZrrkz 5150
-VPMULUDQYrm 5151
-VPMULUDQYrr 5152
-VPMULUDQZ 5153
-VPMULUDQZrm 5154
-VPMULUDQZrmb 5155
-VPMULUDQZrmbk 5156
-VPMULUDQZrmbkz 5157
-VPMULUDQZrmk 5158
-VPMULUDQZrmkz 5159
-VPMULUDQZrr 5160
-VPMULUDQZrrk 5161
-VPMULUDQZrrkz 5162
-VPMULUDQrm 5163
-VPMULUDQrr 5164
-VPOPCNTBZ 5165
-VPOPCNTBZrm 5166
-VPOPCNTBZrmk 5167
-VPOPCNTBZrmkz 5168
-VPOPCNTBZrr 5169
-VPOPCNTBZrrk 5170
-VPOPCNTBZrrkz 5171
-VPOPCNTDZ 5172
-VPOPCNTDZrm 5173
-VPOPCNTDZrmb 5174
-VPOPCNTDZrmbk 5175
-VPOPCNTDZrmbkz 5176
-VPOPCNTDZrmk 5177
-VPOPCNTDZrmkz 5178
-VPOPCNTDZrr 5179
-VPOPCNTDZrrk 5180
-VPOPCNTDZrrkz 5181
-VPOPCNTQZ 5182
-VPOPCNTQZrm 5183
-VPOPCNTQZrmb 5184
-VPOPCNTQZrmbk 5185
-VPOPCNTQZrmbkz 5186
-VPOPCNTQZrmk 5187
-VPOPCNTQZrmkz 5188
-VPOPCNTQZrr 5189
-VPOPCNTQZrrk 5190
-VPOPCNTQZrrkz 5191
-VPOPCNTWZ 5192
-VPOPCNTWZrm 5193
-VPOPCNTWZrmk 5194
-VPOPCNTWZrmkz 5195
-VPOPCNTWZrr 5196
-VPOPCNTWZrrk 5197
-VPOPCNTWZrrkz 5198
-VPORDZ 5199
-VPORDZrm 5200
-VPORDZrmb 5201
-VPORDZrmbk 5202
-VPORDZrmbkz 5203
-VPORDZrmk 5204
-VPORDZrmkz 5205
-VPORDZrr 5206
-VPORDZrrk 5207
-VPORDZrrkz 5208
-VPORQZ 5209
-VPORQZrm 5210
-VPORQZrmb 5211
-VPORQZrmbk 5212
-VPORQZrmbkz 5213
-VPORQZrmk 5214
-VPORQZrmkz 5215
-VPORQZrr 5216
-VPORQZrrk 5217
-VPORQZrrkz 5218
-VPORYrm 5219
-VPORYrr 5220
-VPORrm 5221
-VPORrr 5222
-VPPERMrmr 5223
-VPPERMrrm 5224
-VPPERMrrr 5225
-VPPERMrrr_REV 5226
-VPROLDZ 5227
-VPROLDZmbi 5228
-VPROLDZmbik 5229
-VPROLDZmbikz 5230
-VPROLDZmi 5231
-VPROLDZmik 5232
-VPROLDZmikz 5233
-VPROLDZri 5234
-VPROLDZrik 5235
-VPROLDZrikz 5236
-VPROLQZ 5237
-VPROLQZmbi 5238
-VPROLQZmbik 5239
-VPROLQZmbikz 5240
-VPROLQZmi 5241
-VPROLQZmik 5242
-VPROLQZmikz 5243
-VPROLQZri 5244
-VPROLQZrik 5245
-VPROLQZrikz 5246
-VPROLVDZ 5247
-VPROLVDZrm 5248
-VPROLVDZrmb 5249
-VPROLVDZrmbk 5250
-VPROLVDZrmbkz 5251
-VPROLVDZrmk 5252
-VPROLVDZrmkz 5253
-VPROLVDZrr 5254
-VPROLVDZrrk 5255
-VPROLVDZrrkz 5256
-VPROLVQZ 5257
-VPROLVQZrm 5258
-VPROLVQZrmb 5259
-VPROLVQZrmbk 5260
-VPROLVQZrmbkz 5261
-VPROLVQZrmk 5262
-VPROLVQZrmkz 5263
-VPROLVQZrr 5264
-VPROLVQZrrk 5265
-VPROLVQZrrkz 5266
-VPRORDZ 5267
-VPRORDZmbi 5268
-VPRORDZmbik 5269
-VPRORDZmbikz 5270
-VPRORDZmi 5271
-VPRORDZmik 5272
-VPRORDZmikz 5273
-VPRORDZri 5274
-VPRORDZrik 5275
-VPRORDZrikz 5276
-VPRORQZ 5277
-VPRORQZmbi 5278
-VPRORQZmbik 5279
-VPRORQZmbikz 5280
-VPRORQZmi 5281
-VPRORQZmik 5282
-VPRORQZmikz 5283
-VPRORQZri 5284
-VPRORQZrik 5285
-VPRORQZrikz 5286
-VPRORVDZ 5287
-VPRORVDZrm 5288
-VPRORVDZrmb 5289
-VPRORVDZrmbk 5290
-VPRORVDZrmbkz 5291
-VPRORVDZrmk 5292
-VPRORVDZrmkz 5293
-VPRORVDZrr 5294
-VPRORVDZrrk 5295
-VPRORVDZrrkz 5296
-VPRORVQZ 5297
-VPRORVQZrm 5298
-VPRORVQZrmb 5299
-VPRORVQZrmbk 5300
-VPRORVQZrmbkz 5301
-VPRORVQZrmk 5302
-VPRORVQZrmkz 5303
-VPRORVQZrr 5304
-VPRORVQZrrk 5305
-VPRORVQZrrkz 5306
-VPROTBmi 5307
-VPROTBmr 5308
-VPROTBri 5309
-VPROTBrm 5310
-VPROTBrr 5311
-VPROTBrr_REV 5312
-VPROTDmi 5313
-VPROTDmr 5314
-VPROTDri 5315
-VPROTDrm 5316
-VPROTDrr 5317
-VPROTDrr_REV 5318
-VPROTQmi 5319
-VPROTQmr 5320
-VPROTQri 5321
-VPROTQrm 5322
-VPROTQrr 5323
-VPROTQrr_REV 5324
-VPROTWmi 5325
-VPROTWmr 5326
-VPROTWri 5327
-VPROTWrm 5328
-VPROTWrr 5329
-VPROTWrr_REV 5330
-VPSADBWYrm 5331
-VPSADBWYrr 5332
-VPSADBWZ 5333
-VPSADBWZrm 5334
-VPSADBWZrr 5335
-VPSADBWrm 5336
-VPSADBWrr 5337
-VPSCATTERDDZ 5338
-VPSCATTERDDZmr 5339
-VPSCATTERDQZ 5340
-VPSCATTERDQZmr 5341
-VPSCATTERQDZ 5342
-VPSCATTERQDZmr 5343
-VPSCATTERQQZ 5344
-VPSCATTERQQZmr 5345
-VPSHABmr 5346
-VPSHABrm 5347
-VPSHABrr 5348
-VPSHABrr_REV 5349
-VPSHADmr 5350
-VPSHADrm 5351
-VPSHADrr 5352
-VPSHADrr_REV 5353
-VPSHAQmr 5354
-VPSHAQrm 5355
-VPSHAQrr 5356
-VPSHAQrr_REV 5357
-VPSHAWmr 5358
-VPSHAWrm 5359
-VPSHAWrr 5360
-VPSHAWrr_REV 5361
-VPSHLBmr 5362
-VPSHLBrm 5363
-VPSHLBrr 5364
-VPSHLBrr_REV 5365
-VPSHLDDZ 5366
-VPSHLDDZrmbi 5367
-VPSHLDDZrmbik 5368
-VPSHLDDZrmbikz 5369
-VPSHLDDZrmi 5370
-VPSHLDDZrmik 5371
-VPSHLDDZrmikz 5372
-VPSHLDDZrri 5373
-VPSHLDDZrrik 5374
-VPSHLDDZrrikz 5375
-VPSHLDQZ 5376
-VPSHLDQZrmbi 5377
-VPSHLDQZrmbik 5378
-VPSHLDQZrmbikz 5379
-VPSHLDQZrmi 5380
-VPSHLDQZrmik 5381
-VPSHLDQZrmikz 5382
-VPSHLDQZrri 5383
-VPSHLDQZrrik 5384
-VPSHLDQZrrikz 5385
-VPSHLDVDZ 5386
-VPSHLDVDZm 5387
-VPSHLDVDZmb 5388
-VPSHLDVDZmbk 5389
-VPSHLDVDZmbkz 5390
-VPSHLDVDZmk 5391
-VPSHLDVDZmkz 5392
-VPSHLDVDZr 5393
-VPSHLDVDZrk 5394
-VPSHLDVDZrkz 5395
-VPSHLDVQZ 5396
-VPSHLDVQZm 5397
-VPSHLDVQZmb 5398
-VPSHLDVQZmbk 5399
-VPSHLDVQZmbkz 5400
-VPSHLDVQZmk 5401
-VPSHLDVQZmkz 5402
-VPSHLDVQZr 5403
-VPSHLDVQZrk 5404
-VPSHLDVQZrkz 5405
-VPSHLDVWZ 5406
-VPSHLDVWZm 5407
-VPSHLDVWZmk 5408
-VPSHLDVWZmkz 5409
-VPSHLDVWZr 5410
-VPSHLDVWZrk 5411
-VPSHLDVWZrkz 5412
-VPSHLDWZ 5413
-VPSHLDWZrmi 5414
-VPSHLDWZrmik 5415
-VPSHLDWZrmikz 5416
-VPSHLDWZrri 5417
-VPSHLDWZrrik 5418
-VPSHLDWZrrikz 5419
-VPSHLDmr 5420
-VPSHLDrm 5421
-VPSHLDrr 5422
-VPSHLDrr_REV 5423
-VPSHLQmr 5424
-VPSHLQrm 5425
-VPSHLQrr 5426
-VPSHLQrr_REV 5427
-VPSHLWmr 5428
-VPSHLWrm 5429
-VPSHLWrr 5430
-VPSHLWrr_REV 5431
-VPSHRDDZ 5432
-VPSHRDDZrmbi 5433
-VPSHRDDZrmbik 5434
-VPSHRDDZrmbikz 5435
-VPSHRDDZrmi 5436
-VPSHRDDZrmik 5437
-VPSHRDDZrmikz 5438
-VPSHRDDZrri 5439
-VPSHRDDZrrik 5440
-VPSHRDDZrrikz 5441
-VPSHRDQZ 5442
-VPSHRDQZrmbi 5443
-VPSHRDQZrmbik 5444
-VPSHRDQZrmbikz 5445
-VPSHRDQZrmi 5446
-VPSHRDQZrmik 5447
-VPSHRDQZrmikz 5448
-VPSHRDQZrri 5449
-VPSHRDQZrrik 5450
-VPSHRDQZrrikz 5451
-VPSHRDVDZ 5452
-VPSHRDVDZm 5453
-VPSHRDVDZmb 5454
-VPSHRDVDZmbk 5455
-VPSHRDVDZmbkz 5456
-VPSHRDVDZmk 5457
-VPSHRDVDZmkz 5458
-VPSHRDVDZr 5459
-VPSHRDVDZrk 5460
-VPSHRDVDZrkz 5461
-VPSHRDVQZ 5462
-VPSHRDVQZm 5463
-VPSHRDVQZmb 5464
-VPSHRDVQZmbk 5465
-VPSHRDVQZmbkz 5466
-VPSHRDVQZmk 5467
-VPSHRDVQZmkz 5468
-VPSHRDVQZr 5469
-VPSHRDVQZrk 5470
-VPSHRDVQZrkz 5471
-VPSHRDVWZ 5472
-VPSHRDVWZm 5473
-VPSHRDVWZmk 5474
-VPSHRDVWZmkz 5475
-VPSHRDVWZr 5476
-VPSHRDVWZrk 5477
-VPSHRDVWZrkz 5478
-VPSHRDWZ 5479
-VPSHRDWZrmi 5480
-VPSHRDWZrmik 5481
-VPSHRDWZrmikz 5482
-VPSHRDWZrri 5483
-VPSHRDWZrrik 5484
-VPSHRDWZrrikz 5485
-VPSHUFBITQMBZ 5486
-VPSHUFBITQMBZrm 5487
-VPSHUFBITQMBZrmk 5488
-VPSHUFBITQMBZrr 5489
-VPSHUFBITQMBZrrk 5490
-VPSHUFBYrm 5491
-VPSHUFBYrr 5492
-VPSHUFBZ 5493
-VPSHUFBZrm 5494
-VPSHUFBZrmk 5495
-VPSHUFBZrmkz 5496
-VPSHUFBZrr 5497
-VPSHUFBZrrk 5498
-VPSHUFBZrrkz 5499
-VPSHUFBrm 5500
-VPSHUFBrr 5501
-VPSHUFDYmi 5502
-VPSHUFDYri 5503
-VPSHUFDZ 5504
-VPSHUFDZmbi 5505
-VPSHUFDZmbik 5506
-VPSHUFDZmbikz 5507
-VPSHUFDZmi 5508
-VPSHUFDZmik 5509
-VPSHUFDZmikz 5510
-VPSHUFDZri 5511
-VPSHUFDZrik 5512
-VPSHUFDZrikz 5513
-VPSHUFDmi 5514
-VPSHUFDri 5515
-VPSHUFHWYmi 5516
-VPSHUFHWYri 5517
-VPSHUFHWZ 5518
-VPSHUFHWZmi 5519
-VPSHUFHWZmik 5520
-VPSHUFHWZmikz 5521
-VPSHUFHWZri 5522
-VPSHUFHWZrik 5523
-VPSHUFHWZrikz 5524
-VPSHUFHWmi 5525
-VPSHUFHWri 5526
-VPSHUFLWYmi 5527
-VPSHUFLWYri 5528
-VPSHUFLWZ 5529
-VPSHUFLWZmi 5530
-VPSHUFLWZmik 5531
-VPSHUFLWZmikz 5532
-VPSHUFLWZri 5533
-VPSHUFLWZrik 5534
-VPSHUFLWZrikz 5535
-VPSHUFLWmi 5536
-VPSHUFLWri 5537
-VPSIGNBYrm 5538
-VPSIGNBYrr 5539
-VPSIGNBrm 5540
-VPSIGNBrr 5541
-VPSIGNDYrm 5542
-VPSIGNDYrr 5543
-VPSIGNDrm 5544
-VPSIGNDrr 5545
-VPSIGNWYrm 5546
-VPSIGNWYrr 5547
-VPSIGNWrm 5548
-VPSIGNWrr 5549
-VPSLLDQYri 5550
-VPSLLDQZ 5551
-VPSLLDQZmi 5552
-VPSLLDQZri 5553
-VPSLLDQri 5554
-VPSLLDYri 5555
-VPSLLDYrm 5556
-VPSLLDYrr 5557
-VPSLLDZ 5558
-VPSLLDZmbi 5559
-VPSLLDZmbik 5560
-VPSLLDZmbikz 5561
-VPSLLDZmi 5562
-VPSLLDZmik 5563
-VPSLLDZmikz 5564
-VPSLLDZri 5565
-VPSLLDZrik 5566
-VPSLLDZrikz 5567
-VPSLLDZrm 5568
-VPSLLDZrmk 5569
-VPSLLDZrmkz 5570
-VPSLLDZrr 5571
-VPSLLDZrrk 5572
-VPSLLDZrrkz 5573
-VPSLLDri 5574
-VPSLLDrm 5575
-VPSLLDrr 5576
-VPSLLQYri 5577
-VPSLLQYrm 5578
-VPSLLQYrr 5579
-VPSLLQZ 5580
-VPSLLQZmbi 5581
-VPSLLQZmbik 5582
-VPSLLQZmbikz 5583
-VPSLLQZmi 5584
-VPSLLQZmik 5585
-VPSLLQZmikz 5586
-VPSLLQZri 5587
-VPSLLQZrik 5588
-VPSLLQZrikz 5589
-VPSLLQZrm 5590
-VPSLLQZrmk 5591
-VPSLLQZrmkz 5592
-VPSLLQZrr 5593
-VPSLLQZrrk 5594
-VPSLLQZrrkz 5595
-VPSLLQri 5596
-VPSLLQrm 5597
-VPSLLQrr 5598
-VPSLLVDYrm 5599
-VPSLLVDYrr 5600
-VPSLLVDZ 5601
-VPSLLVDZrm 5602
-VPSLLVDZrmb 5603
-VPSLLVDZrmbk 5604
-VPSLLVDZrmbkz 5605
-VPSLLVDZrmk 5606
-VPSLLVDZrmkz 5607
-VPSLLVDZrr 5608
-VPSLLVDZrrk 5609
-VPSLLVDZrrkz 5610
-VPSLLVDrm 5611
-VPSLLVDrr 5612
-VPSLLVQYrm 5613
-VPSLLVQYrr 5614
-VPSLLVQZ 5615
-VPSLLVQZrm 5616
-VPSLLVQZrmb 5617
-VPSLLVQZrmbk 5618
-VPSLLVQZrmbkz 5619
-VPSLLVQZrmk 5620
-VPSLLVQZrmkz 5621
-VPSLLVQZrr 5622
-VPSLLVQZrrk 5623
-VPSLLVQZrrkz 5624
-VPSLLVQrm 5625
-VPSLLVQrr 5626
-VPSLLVWZ 5627
-VPSLLVWZrm 5628
-VPSLLVWZrmk 5629
-VPSLLVWZrmkz 5630
-VPSLLVWZrr 5631
-VPSLLVWZrrk 5632
-VPSLLVWZrrkz 5633
-VPSLLWYri 5634
-VPSLLWYrm 5635
-VPSLLWYrr 5636
-VPSLLWZ 5637
-VPSLLWZmi 5638
-VPSLLWZmik 5639
-VPSLLWZmikz 5640
-VPSLLWZri 5641
-VPSLLWZrik 5642
-VPSLLWZrikz 5643
-VPSLLWZrm 5644
-VPSLLWZrmk 5645
-VPSLLWZrmkz 5646
-VPSLLWZrr 5647
-VPSLLWZrrk 5648
-VPSLLWZrrkz 5649
-VPSLLWri 5650
-VPSLLWrm 5651
-VPSLLWrr 5652
-VPSRADYri 5653
-VPSRADYrm 5654
-VPSRADYrr 5655
-VPSRADZ 5656
-VPSRADZmbi 5657
-VPSRADZmbik 5658
-VPSRADZmbikz 5659
-VPSRADZmi 5660
-VPSRADZmik 5661
-VPSRADZmikz 5662
-VPSRADZri 5663
-VPSRADZrik 5664
-VPSRADZrikz 5665
-VPSRADZrm 5666
-VPSRADZrmk 5667
-VPSRADZrmkz 5668
-VPSRADZrr 5669
-VPSRADZrrk 5670
-VPSRADZrrkz 5671
-VPSRADri 5672
-VPSRADrm 5673
-VPSRADrr 5674
-VPSRAQZ 5675
-VPSRAQZmbi 5676
-VPSRAQZmbik 5677
-VPSRAQZmbikz 5678
-VPSRAQZmi 5679
-VPSRAQZmik 5680
-VPSRAQZmikz 5681
-VPSRAQZri 5682
-VPSRAQZrik 5683
-VPSRAQZrikz 5684
-VPSRAQZrm 5685
-VPSRAQZrmk 5686
-VPSRAQZrmkz 5687
-VPSRAQZrr 5688
-VPSRAQZrrk 5689
-VPSRAQZrrkz 5690
-VPSRAVDYrm 5691
-VPSRAVDYrr 5692
-VPSRAVDZ 5693
-VPSRAVDZrm 5694
-VPSRAVDZrmb 5695
-VPSRAVDZrmbk 5696
-VPSRAVDZrmbkz 5697
-VPSRAVDZrmk 5698
-VPSRAVDZrmkz 5699
-VPSRAVDZrr 5700
-VPSRAVDZrrk 5701
-VPSRAVDZrrkz 5702
-VPSRAVDrm 5703
-VPSRAVDrr 5704
-VPSRAVQZ 5705
-VPSRAVQZrm 5706
-VPSRAVQZrmb 5707
-VPSRAVQZrmbk 5708
-VPSRAVQZrmbkz 5709
-VPSRAVQZrmk 5710
-VPSRAVQZrmkz 5711
-VPSRAVQZrr 5712
-VPSRAVQZrrk 5713
-VPSRAVQZrrkz 5714
-VPSRAVWZ 5715
-VPSRAVWZrm 5716
-VPSRAVWZrmk 5717
-VPSRAVWZrmkz 5718
-VPSRAVWZrr 5719
-VPSRAVWZrrk 5720
-VPSRAVWZrrkz 5721
-VPSRAWYri 5722
-VPSRAWYrm 5723
-VPSRAWYrr 5724
-VPSRAWZ 5725
-VPSRAWZmi 5726
-VPSRAWZmik 5727
-VPSRAWZmikz 5728
-VPSRAWZri 5729
-VPSRAWZrik 5730
-VPSRAWZrikz 5731
-VPSRAWZrm 5732
-VPSRAWZrmk 5733
-VPSRAWZrmkz 5734
-VPSRAWZrr 5735
-VPSRAWZrrk 5736
-VPSRAWZrrkz 5737
-VPSRAWri 5738
-VPSRAWrm 5739
-VPSRAWrr 5740
-VPSRLDQYri 5741
-VPSRLDQZ 5742
-VPSRLDQZmi 5743
-VPSRLDQZri 5744
-VPSRLDQri 5745
-VPSRLDYri 5746
-VPSRLDYrm 5747
-VPSRLDYrr 5748
-VPSRLDZ 5749
-VPSRLDZmbi 5750
-VPSRLDZmbik 5751
-VPSRLDZmbikz 5752
-VPSRLDZmi 5753
-VPSRLDZmik 5754
-VPSRLDZmikz 5755
-VPSRLDZri 5756
-VPSRLDZrik 5757
-VPSRLDZrikz 5758
-VPSRLDZrm 5759
-VPSRLDZrmk 5760
-VPSRLDZrmkz 5761
-VPSRLDZrr 5762
-VPSRLDZrrk 5763
-VPSRLDZrrkz 5764
-VPSRLDri 5765
-VPSRLDrm 5766
-VPSRLDrr 5767
-VPSRLQYri 5768
-VPSRLQYrm 5769
-VPSRLQYrr 5770
-VPSRLQZ 5771
-VPSRLQZmbi 5772
-VPSRLQZmbik 5773
-VPSRLQZmbikz 5774
-VPSRLQZmi 5775
-VPSRLQZmik 5776
-VPSRLQZmikz 5777
-VPSRLQZri 5778
-VPSRLQZrik 5779
-VPSRLQZrikz 5780
-VPSRLQZrm 5781
-VPSRLQZrmk 5782
-VPSRLQZrmkz 5783
-VPSRLQZrr 5784
-VPSRLQZrrk 5785
-VPSRLQZrrkz 5786
-VPSRLQri 5787
-VPSRLQrm 5788
-VPSRLQrr 5789
-VPSRLVDYrm 5790
-VPSRLVDYrr 5791
-VPSRLVDZ 5792
-VPSRLVDZrm 5793
-VPSRLVDZrmb 5794
-VPSRLVDZrmbk 5795
-VPSRLVDZrmbkz 5796
-VPSRLVDZrmk 5797
-VPSRLVDZrmkz 5798
-VPSRLVDZrr 5799
-VPSRLVDZrrk 5800
-VPSRLVDZrrkz 5801
-VPSRLVDrm 5802
-VPSRLVDrr 5803
-VPSRLVQYrm 5804
-VPSRLVQYrr 5805
-VPSRLVQZ 5806
-VPSRLVQZrm 5807
-VPSRLVQZrmb 5808
-VPSRLVQZrmbk 5809
-VPSRLVQZrmbkz 5810
-VPSRLVQZrmk 5811
-VPSRLVQZrmkz 5812
-VPSRLVQZrr 5813
-VPSRLVQZrrk 5814
-VPSRLVQZrrkz 5815
-VPSRLVQrm 5816
-VPSRLVQrr 5817
-VPSRLVWZ 5818
-VPSRLVWZrm 5819
-VPSRLVWZrmk 5820
-VPSRLVWZrmkz 5821
-VPSRLVWZrr 5822
-VPSRLVWZrrk 5823
-VPSRLVWZrrkz 5824
-VPSRLWYri 5825
-VPSRLWYrm 5826
-VPSRLWYrr 5827
-VPSRLWZ 5828
-VPSRLWZmi 5829
-VPSRLWZmik 5830
-VPSRLWZmikz 5831
-VPSRLWZri 5832
-VPSRLWZrik 5833
-VPSRLWZrikz 5834
-VPSRLWZrm 5835
-VPSRLWZrmk 5836
-VPSRLWZrmkz 5837
-VPSRLWZrr 5838
-VPSRLWZrrk 5839
-VPSRLWZrrkz 5840
-VPSRLWri 5841
-VPSRLWrm 5842
-VPSRLWrr 5843
-VPSUBBYrm 5844
-VPSUBBYrr 5845
-VPSUBBZ 5846
-VPSUBBZrm 5847
-VPSUBBZrmk 5848
-VPSUBBZrmkz 5849
-VPSUBBZrr 5850
-VPSUBBZrrk 5851
-VPSUBBZrrkz 5852
-VPSUBBrm 5853
-VPSUBBrr 5854
-VPSUBDYrm 5855
-VPSUBDYrr 5856
-VPSUBDZ 5857
-VPSUBDZrm 5858
-VPSUBDZrmb 5859
-VPSUBDZrmbk 5860
-VPSUBDZrmbkz 5861
-VPSUBDZrmk 5862
-VPSUBDZrmkz 5863
-VPSUBDZrr 5864
-VPSUBDZrrk 5865
-VPSUBDZrrkz 5866
-VPSUBDrm 5867
-VPSUBDrr 5868
-VPSUBQYrm 5869
-VPSUBQYrr 5870
-VPSUBQZ 5871
-VPSUBQZrm 5872
-VPSUBQZrmb 5873
-VPSUBQZrmbk 5874
-VPSUBQZrmbkz 5875
-VPSUBQZrmk 5876
-VPSUBQZrmkz 5877
-VPSUBQZrr 5878
-VPSUBQZrrk 5879
-VPSUBQZrrkz 5880
-VPSUBQrm 5881
-VPSUBQrr 5882
-VPSUBSBYrm 5883
-VPSUBSBYrr 5884
-VPSUBSBZ 5885
-VPSUBSBZrm 5886
-VPSUBSBZrmk 5887
-VPSUBSBZrmkz 5888
-VPSUBSBZrr 5889
-VPSUBSBZrrk 5890
-VPSUBSBZrrkz 5891
-VPSUBSBrm 5892
-VPSUBSBrr 5893
-VPSUBSWYrm 5894
-VPSUBSWYrr 5895
-VPSUBSWZ 5896
-VPSUBSWZrm 5897
-VPSUBSWZrmk 5898
-VPSUBSWZrmkz 5899
-VPSUBSWZrr 5900
-VPSUBSWZrrk 5901
-VPSUBSWZrrkz 5902
-VPSUBSWrm 5903
-VPSUBSWrr 5904
-VPSUBUSBYrm 5905
-VPSUBUSBYrr 5906
-VPSUBUSBZ 5907
-VPSUBUSBZrm 5908
-VPSUBUSBZrmk 5909
-VPSUBUSBZrmkz 5910
-VPSUBUSBZrr 5911
-VPSUBUSBZrrk 5912
-VPSUBUSBZrrkz 5913
-VPSUBUSBrm 5914
-VPSUBUSBrr 5915
-VPSUBUSWYrm 5916
-VPSUBUSWYrr 5917
-VPSUBUSWZ 5918
-VPSUBUSWZrm 5919
-VPSUBUSWZrmk 5920
-VPSUBUSWZrmkz 5921
-VPSUBUSWZrr 5922
-VPSUBUSWZrrk 5923
-VPSUBUSWZrrkz 5924
-VPSUBUSWrm 5925
-VPSUBUSWrr 5926
-VPSUBWYrm 5927
-VPSUBWYrr 5928
-VPSUBWZ 5929
-VPSUBWZrm 5930
-VPSUBWZrmk 5931
-VPSUBWZrmkz 5932
-VPSUBWZrr 5933
-VPSUBWZrrk 5934
-VPSUBWZrrkz 5935
-VPSUBWrm 5936
-VPSUBWrr 5937
-VPTERNLOGDZ 5938
-VPTERNLOGDZrmbi 5939
-VPTERNLOGDZrmbik 5940
-VPTERNLOGDZrmbikz 5941
-VPTERNLOGDZrmi 5942
-VPTERNLOGDZrmik 5943
-VPTERNLOGDZrmikz 5944
-VPTERNLOGDZrri 5945
-VPTERNLOGDZrrik 5946
-VPTERNLOGDZrrikz 5947
-VPTERNLOGQZ 5948
-VPTERNLOGQZrmbi 5949
-VPTERNLOGQZrmbik 5950
-VPTERNLOGQZrmbikz 5951
-VPTERNLOGQZrmi 5952
-VPTERNLOGQZrmik 5953
-VPTERNLOGQZrmikz 5954
-VPTERNLOGQZrri 5955
-VPTERNLOGQZrrik 5956
-VPTERNLOGQZrrikz 5957
-VPTESTMBZ 5958
-VPTESTMBZrm 5959
-VPTESTMBZrmk 5960
-VPTESTMBZrr 5961
-VPTESTMBZrrk 5962
-VPTESTMDZ 5963
-VPTESTMDZrm 5964
-VPTESTMDZrmb 5965
-VPTESTMDZrmbk 5966
-VPTESTMDZrmk 5967
-VPTESTMDZrr 5968
-VPTESTMDZrrk 5969
-VPTESTMQZ 5970
-VPTESTMQZrm 5971
-VPTESTMQZrmb 5972
-VPTESTMQZrmbk 5973
-VPTESTMQZrmk 5974
-VPTESTMQZrr 5975
-VPTESTMQZrrk 5976
-VPTESTMWZ 5977
-VPTESTMWZrm 5978
-VPTESTMWZrmk 5979
-VPTESTMWZrr 5980
-VPTESTMWZrrk 5981
-VPTESTNMBZ 5982
-VPTESTNMBZrm 5983
-VPTESTNMBZrmk 5984
-VPTESTNMBZrr 5985
-VPTESTNMBZrrk 5986
-VPTESTNMDZ 5987
-VPTESTNMDZrm 5988
-VPTESTNMDZrmb 5989
-VPTESTNMDZrmbk 5990
-VPTESTNMDZrmk 5991
-VPTESTNMDZrr 5992
-VPTESTNMDZrrk 5993
-VPTESTNMQZ 5994
-VPTESTNMQZrm 5995
-VPTESTNMQZrmb 5996
-VPTESTNMQZrmbk 5997
-VPTESTNMQZrmk 5998
-VPTESTNMQZrr 5999
-VPTESTNMQZrrk 6000
-VPTESTNMWZ 6001
-VPTESTNMWZrm 6002
-VPTESTNMWZrmk 6003
-VPTESTNMWZrr 6004
-VPTESTNMWZrrk 6005
-VPTESTYrm 6006
-VPTESTYrr 6007
-VPTESTrm 6008
-VPTESTrr 6009
-VPUNPCKHBWYrm 6010
-VPUNPCKHBWYrr 6011
-VPUNPCKHBWZ 6012
-VPUNPCKHBWZrm 6013
-VPUNPCKHBWZrmk 6014
-VPUNPCKHBWZrmkz 6015
-VPUNPCKHBWZrr 6016
-VPUNPCKHBWZrrk 6017
-VPUNPCKHBWZrrkz 6018
-VPUNPCKHBWrm 6019
-VPUNPCKHBWrr 6020
-VPUNPCKHDQYrm 6021
-VPUNPCKHDQYrr 6022
-VPUNPCKHDQZ 6023
-VPUNPCKHDQZrm 6024
-VPUNPCKHDQZrmb 6025
-VPUNPCKHDQZrmbk 6026
-VPUNPCKHDQZrmbkz 6027
-VPUNPCKHDQZrmk 6028
-VPUNPCKHDQZrmkz 6029
-VPUNPCKHDQZrr 6030
-VPUNPCKHDQZrrk 6031
-VPUNPCKHDQZrrkz 6032
-VPUNPCKHDQrm 6033
-VPUNPCKHDQrr 6034
-VPUNPCKHQDQYrm 6035
-VPUNPCKHQDQYrr 6036
-VPUNPCKHQDQZ 6037
-VPUNPCKHQDQZrm 6038
-VPUNPCKHQDQZrmb 6039
-VPUNPCKHQDQZrmbk 6040
-VPUNPCKHQDQZrmbkz 6041
-VPUNPCKHQDQZrmk 6042
-VPUNPCKHQDQZrmkz 6043
-VPUNPCKHQDQZrr 6044
-VPUNPCKHQDQZrrk 6045
-VPUNPCKHQDQZrrkz 6046
-VPUNPCKHQDQrm 6047
-VPUNPCKHQDQrr 6048
-VPUNPCKHWDYrm 6049
-VPUNPCKHWDYrr 6050
-VPUNPCKHWDZ 6051
-VPUNPCKHWDZrm 6052
-VPUNPCKHWDZrmk 6053
-VPUNPCKHWDZrmkz 6054
-VPUNPCKHWDZrr 6055
-VPUNPCKHWDZrrk 6056
-VPUNPCKHWDZrrkz 6057
-VPUNPCKHWDrm 6058
-VPUNPCKHWDrr 6059
-VPUNPCKLBWYrm 6060
-VPUNPCKLBWYrr 6061
-VPUNPCKLBWZ 6062
-VPUNPCKLBWZrm 6063
-VPUNPCKLBWZrmk 6064
-VPUNPCKLBWZrmkz 6065
-VPUNPCKLBWZrr 6066
-VPUNPCKLBWZrrk 6067
-VPUNPCKLBWZrrkz 6068
-VPUNPCKLBWrm 6069
-VPUNPCKLBWrr 6070
-VPUNPCKLDQYrm 6071
-VPUNPCKLDQYrr 6072
-VPUNPCKLDQZ 6073
-VPUNPCKLDQZrm 6074
-VPUNPCKLDQZrmb 6075
-VPUNPCKLDQZrmbk 6076
-VPUNPCKLDQZrmbkz 6077
-VPUNPCKLDQZrmk 6078
-VPUNPCKLDQZrmkz 6079
-VPUNPCKLDQZrr 6080
-VPUNPCKLDQZrrk 6081
-VPUNPCKLDQZrrkz 6082
-VPUNPCKLDQrm 6083
-VPUNPCKLDQrr 6084
-VPUNPCKLQDQYrm 6085
-VPUNPCKLQDQYrr 6086
-VPUNPCKLQDQZ 6087
-VPUNPCKLQDQZrm 6088
-VPUNPCKLQDQZrmb 6089
-VPUNPCKLQDQZrmbk 6090
-VPUNPCKLQDQZrmbkz 6091
-VPUNPCKLQDQZrmk 6092
-VPUNPCKLQDQZrmkz 6093
-VPUNPCKLQDQZrr 6094
-VPUNPCKLQDQZrrk 6095
-VPUNPCKLQDQZrrkz 6096
-VPUNPCKLQDQrm 6097
-VPUNPCKLQDQrr 6098
-VPUNPCKLWDYrm 6099
-VPUNPCKLWDYrr 6100
-VPUNPCKLWDZ 6101
-VPUNPCKLWDZrm 6102
-VPUNPCKLWDZrmk 6103
-VPUNPCKLWDZrmkz 6104
-VPUNPCKLWDZrr 6105
-VPUNPCKLWDZrrk 6106
-VPUNPCKLWDZrrkz 6107
-VPUNPCKLWDrm 6108
-VPUNPCKLWDrr 6109
-VPXORDZ 6110
-VPXORDZrm 6111
-VPXORDZrmb 6112
-VPXORDZrmbk 6113
-VPXORDZrmbkz 6114
-VPXORDZrmk 6115
-VPXORDZrmkz 6116
-VPXORDZrr 6117
-VPXORDZrrk 6118
-VPXORDZrrkz 6119
-VPXORQZ 6120
-VPXORQZrm 6121
-VPXORQZrmb 6122
-VPXORQZrmbk 6123
-VPXORQZrmbkz 6124
-VPXORQZrmk 6125
-VPXORQZrmkz 6126
-VPXORQZrr 6127
-VPXORQZrrk 6128
-VPXORQZrrkz 6129
-VPXORYrm 6130
-VPXORYrr 6131
-VPXORrm 6132
-VPXORrr 6133
-VRANGEPDZ 6134
-VRANGEPDZrmbi 6135
-VRANGEPDZrmbik 6136
-VRANGEPDZrmbikz 6137
-VRANGEPDZrmi 6138
-VRANGEPDZrmik 6139
-VRANGEPDZrmikz 6140
-VRANGEPDZrri 6141
-VRANGEPDZrrib 6142
-VRANGEPDZrribk 6143
-VRANGEPDZrribkz 6144
-VRANGEPDZrrik 6145
-VRANGEPDZrrikz 6146
-VRANGEPSZ 6147
-VRANGEPSZrmbi 6148
-VRANGEPSZrmbik 6149
-VRANGEPSZrmbikz 6150
-VRANGEPSZrmi 6151
-VRANGEPSZrmik 6152
-VRANGEPSZrmikz 6153
-VRANGEPSZrri 6154
-VRANGEPSZrrib 6155
-VRANGEPSZrribk 6156
-VRANGEPSZrribkz 6157
-VRANGEPSZrrik 6158
-VRANGEPSZrrikz 6159
-VRANGESDZrmi 6160
-VRANGESDZrmik 6161
-VRANGESDZrmikz 6162
-VRANGESDZrri 6163
-VRANGESDZrrib 6164
-VRANGESDZrribk 6165
-VRANGESDZrribkz 6166
-VRANGESDZrrik 6167
-VRANGESDZrrikz 6168
-VRANGESSZrmi 6169
-VRANGESSZrmik 6170
-VRANGESSZrmikz 6171
-VRANGESSZrri 6172
-VRANGESSZrrib 6173
-VRANGESSZrribk 6174
-VRANGESSZrribkz 6175
-VRANGESSZrrik 6176
-VRANGESSZrrikz 6177
-VRCP 6178
-VRCPBF 6179
-VRCPPHZ 6180
-VRCPPHZm 6181
-VRCPPHZmb 6182
-VRCPPHZmbk 6183
-VRCPPHZmbkz 6184
-VRCPPHZmk 6185
-VRCPPHZmkz 6186
-VRCPPHZr 6187
-VRCPPHZrk 6188
-VRCPPHZrkz 6189
-VRCPPSYm 6190
-VRCPPSYr 6191
-VRCPPSm 6192
-VRCPPSr 6193
-VRCPSHZrm 6194
-VRCPSHZrmk 6195
-VRCPSHZrmkz 6196
-VRCPSHZrr 6197
-VRCPSHZrrk 6198
-VRCPSHZrrkz 6199
-VRCPSSm 6200
-VRCPSSm_Int 6201
-VRCPSSr 6202
-VRCPSSr_Int 6203
-VREDUCEBF 6204
-VREDUCEPDZ 6205
-VREDUCEPDZrmbi 6206
-VREDUCEPDZrmbik 6207
-VREDUCEPDZrmbikz 6208
-VREDUCEPDZrmi 6209
-VREDUCEPDZrmik 6210
-VREDUCEPDZrmikz 6211
-VREDUCEPDZrri 6212
-VREDUCEPDZrrib 6213
-VREDUCEPDZrribk 6214
-VREDUCEPDZrribkz 6215
-VREDUCEPDZrrik 6216
-VREDUCEPDZrrikz 6217
-VREDUCEPHZ 6218
-VREDUCEPHZrmbi 6219
-VREDUCEPHZrmbik 6220
-VREDUCEPHZrmbikz 6221
-VREDUCEPHZrmi 6222
-VREDUCEPHZrmik 6223
-VREDUCEPHZrmikz 6224
-VREDUCEPHZrri 6225
-VREDUCEPHZrrib 6226
-VREDUCEPHZrribk 6227
-VREDUCEPHZrribkz 6228
-VREDUCEPHZrrik 6229
-VREDUCEPHZrrikz 6230
-VREDUCEPSZ 6231
-VREDUCEPSZrmbi 6232
-VREDUCEPSZrmbik 6233
-VREDUCEPSZrmbikz 6234
-VREDUCEPSZrmi 6235
-VREDUCEPSZrmik 6236
-VREDUCEPSZrmikz 6237
-VREDUCEPSZrri 6238
-VREDUCEPSZrrib 6239
-VREDUCEPSZrribk 6240
-VREDUCEPSZrribkz 6241
-VREDUCEPSZrrik 6242
-VREDUCEPSZrrikz 6243
-VREDUCESDZrmi 6244
-VREDUCESDZrmik 6245
-VREDUCESDZrmikz 6246
-VREDUCESDZrri 6247
-VREDUCESDZrrib 6248
-VREDUCESDZrribk 6249
-VREDUCESDZrribkz 6250
-VREDUCESDZrrik 6251
-VREDUCESDZrrikz 6252
-VREDUCESHZrmi 6253
-VREDUCESHZrmik 6254
-VREDUCESHZrmikz 6255
-VREDUCESHZrri 6256
-VREDUCESHZrrib 6257
-VREDUCESHZrribk 6258
-VREDUCESHZrribkz 6259
-VREDUCESHZrrik 6260
-VREDUCESHZrrikz 6261
-VREDUCESSZrmi 6262
-VREDUCESSZrmik 6263
-VREDUCESSZrmikz 6264
-VREDUCESSZrri 6265
-VREDUCESSZrrib 6266
-VREDUCESSZrribk 6267
-VREDUCESSZrribkz 6268
-VREDUCESSZrrik 6269
-VREDUCESSZrrikz 6270
-VRNDSCALEBF 6271
-VRNDSCALEPDZ 6272
-VRNDSCALEPDZrmbi 6273
-VRNDSCALEPDZrmbik 6274
-VRNDSCALEPDZrmbikz 6275
-VRNDSCALEPDZrmi 6276
-VRNDSCALEPDZrmik 6277
-VRNDSCALEPDZrmikz 6278
-VRNDSCALEPDZrri 6279
-VRNDSCALEPDZrrib 6280
-VRNDSCALEPDZrribk 6281
-VRNDSCALEPDZrribkz 6282
-VRNDSCALEPDZrrik 6283
-VRNDSCALEPDZrrikz 6284
-VRNDSCALEPHZ 6285
-VRNDSCALEPHZrmbi 6286
-VRNDSCALEPHZrmbik 6287
-VRNDSCALEPHZrmbikz 6288
-VRNDSCALEPHZrmi 6289
-VRNDSCALEPHZrmik 6290
-VRNDSCALEPHZrmikz 6291
-VRNDSCALEPHZrri 6292
-VRNDSCALEPHZrrib 6293
-VRNDSCALEPHZrribk 6294
-VRNDSCALEPHZrribkz 6295
-VRNDSCALEPHZrrik 6296
-VRNDSCALEPHZrrikz 6297
-VRNDSCALEPSZ 6298
-VRNDSCALEPSZrmbi 6299
-VRNDSCALEPSZrmbik 6300
-VRNDSCALEPSZrmbikz 6301
-VRNDSCALEPSZrmi 6302
-VRNDSCALEPSZrmik 6303
-VRNDSCALEPSZrmikz 6304
-VRNDSCALEPSZrri 6305
-VRNDSCALEPSZrrib 6306
-VRNDSCALEPSZrribk 6307
-VRNDSCALEPSZrribkz 6308
-VRNDSCALEPSZrrik 6309
-VRNDSCALEPSZrrikz 6310
-VRNDSCALESDZrmi 6311
-VRNDSCALESDZrmi_Int 6312
-VRNDSCALESDZrmik_Int 6313
-VRNDSCALESDZrmikz_Int 6314
-VRNDSCALESDZrri 6315
-VRNDSCALESDZrri_Int 6316
-VRNDSCALESDZrrib_Int 6317
-VRNDSCALESDZrribk_Int 6318
-VRNDSCALESDZrribkz_Int 6319
-VRNDSCALESDZrrik_Int 6320
-VRNDSCALESDZrrikz_Int 6321
-VRNDSCALESHZrmi 6322
-VRNDSCALESHZrmi_Int 6323
-VRNDSCALESHZrmik_Int 6324
-VRNDSCALESHZrmikz_Int 6325
-VRNDSCALESHZrri 6326
-VRNDSCALESHZrri_Int 6327
-VRNDSCALESHZrrib_Int 6328
-VRNDSCALESHZrribk_Int 6329
-VRNDSCALESHZrribkz_Int 6330
-VRNDSCALESHZrrik_Int 6331
-VRNDSCALESHZrrikz_Int 6332
-VRNDSCALESSZrmi 6333
-VRNDSCALESSZrmi_Int 6334
-VRNDSCALESSZrmik_Int 6335
-VRNDSCALESSZrmikz_Int 6336
-VRNDSCALESSZrri 6337
-VRNDSCALESSZrri_Int 6338
-VRNDSCALESSZrrib_Int 6339
-VRNDSCALESSZrribk_Int 6340
-VRNDSCALESSZrribkz_Int 6341
-VRNDSCALESSZrrik_Int 6342
-VRNDSCALESSZrrikz_Int 6343
-VROUNDPDYmi 6344
-VROUNDPDYri 6345
-VROUNDPDmi 6346
-VROUNDPDri 6347
-VROUNDPSYmi 6348
-VROUNDPSYri 6349
-VROUNDPSmi 6350
-VROUNDPSri 6351
-VROUNDSDmi 6352
-VROUNDSDmi_Int 6353
-VROUNDSDri 6354
-VROUNDSDri_Int 6355
-VROUNDSSmi 6356
-VROUNDSSmi_Int 6357
-VROUNDSSri 6358
-VROUNDSSri_Int 6359
-VRSQRT 6360
-VRSQRTBF 6361
-VRSQRTPHZ 6362
-VRSQRTPHZm 6363
-VRSQRTPHZmb 6364
-VRSQRTPHZmbk 6365
-VRSQRTPHZmbkz 6366
-VRSQRTPHZmk 6367
-VRSQRTPHZmkz 6368
-VRSQRTPHZr 6369
-VRSQRTPHZrk 6370
-VRSQRTPHZrkz 6371
-VRSQRTPSYm 6372
-VRSQRTPSYr 6373
-VRSQRTPSm 6374
-VRSQRTPSr 6375
-VRSQRTSHZrm 6376
-VRSQRTSHZrmk 6377
-VRSQRTSHZrmkz 6378
-VRSQRTSHZrr 6379
-VRSQRTSHZrrk 6380
-VRSQRTSHZrrkz 6381
-VRSQRTSSm 6382
-VRSQRTSSm_Int 6383
-VRSQRTSSr 6384
-VRSQRTSSr_Int 6385
-VSCALEFBF 6386
-VSCALEFPDZ 6387
-VSCALEFPDZrm 6388
-VSCALEFPDZrmb 6389
-VSCALEFPDZrmbk 6390
-VSCALEFPDZrmbkz 6391
-VSCALEFPDZrmk 6392
-VSCALEFPDZrmkz 6393
-VSCALEFPDZrr 6394
-VSCALEFPDZrrb 6395
-VSCALEFPDZrrbk 6396
-VSCALEFPDZrrbkz 6397
-VSCALEFPDZrrk 6398
-VSCALEFPDZrrkz 6399
-VSCALEFPHZ 6400
-VSCALEFPHZrm 6401
-VSCALEFPHZrmb 6402
-VSCALEFPHZrmbk 6403
-VSCALEFPHZrmbkz 6404
-VSCALEFPHZrmk 6405
-VSCALEFPHZrmkz 6406
-VSCALEFPHZrr 6407
-VSCALEFPHZrrb 6408
-VSCALEFPHZrrbk 6409
-VSCALEFPHZrrbkz 6410
-VSCALEFPHZrrk 6411
-VSCALEFPHZrrkz 6412
-VSCALEFPSZ 6413
-VSCALEFPSZrm 6414
-VSCALEFPSZrmb 6415
-VSCALEFPSZrmbk 6416
-VSCALEFPSZrmbkz 6417
-VSCALEFPSZrmk 6418
-VSCALEFPSZrmkz 6419
-VSCALEFPSZrr 6420
-VSCALEFPSZrrb 6421
-VSCALEFPSZrrbk 6422
-VSCALEFPSZrrbkz 6423
-VSCALEFPSZrrk 6424
-VSCALEFPSZrrkz 6425
-VSCALEFSDZrm 6426
-VSCALEFSDZrmk 6427
-VSCALEFSDZrmkz 6428
-VSCALEFSDZrr 6429
-VSCALEFSDZrrb_Int 6430
-VSCALEFSDZrrbk_Int 6431
-VSCALEFSDZrrbkz_Int 6432
-VSCALEFSDZrrk 6433
-VSCALEFSDZrrkz 6434
-VSCALEFSHZrm 6435
-VSCALEFSHZrmk 6436
-VSCALEFSHZrmkz 6437
-VSCALEFSHZrr 6438
-VSCALEFSHZrrb_Int 6439
-VSCALEFSHZrrbk_Int 6440
-VSCALEFSHZrrbkz_Int 6441
-VSCALEFSHZrrk 6442
-VSCALEFSHZrrkz 6443
-VSCALEFSSZrm 6444
-VSCALEFSSZrmk 6445
-VSCALEFSSZrmkz 6446
-VSCALEFSSZrr 6447
-VSCALEFSSZrrb_Int 6448
-VSCALEFSSZrrbk_Int 6449
-VSCALEFSSZrrbkz_Int 6450
-VSCALEFSSZrrk 6451
-VSCALEFSSZrrkz 6452
-VSCATTERDPDZ 6453
-VSCATTERDPDZmr 6454
-VSCATTERDPSZ 6455
-VSCATTERDPSZmr 6456
-VSCATTERPF 6457
-VSCATTERQPDZ 6458
-VSCATTERQPDZmr 6459
-VSCATTERQPSZ 6460
-VSCATTERQPSZmr 6461
-VSHA 6462
-VSHUFF 6463
-VSHUFI 6464
-VSHUFPDYrmi 6465
-VSHUFPDYrri 6466
-VSHUFPDZ 6467
-VSHUFPDZrmbi 6468
-VSHUFPDZrmbik 6469
-VSHUFPDZrmbikz 6470
-VSHUFPDZrmi 6471
-VSHUFPDZrmik 6472
-VSHUFPDZrmikz 6473
-VSHUFPDZrri 6474
-VSHUFPDZrrik 6475
-VSHUFPDZrrikz 6476
-VSHUFPDrmi 6477
-VSHUFPDrri 6478
-VSHUFPSYrmi 6479
-VSHUFPSYrri 6480
-VSHUFPSZ 6481
-VSHUFPSZrmbi 6482
-VSHUFPSZrmbik 6483
-VSHUFPSZrmbikz 6484
-VSHUFPSZrmi 6485
-VSHUFPSZrmik 6486
-VSHUFPSZrmikz 6487
-VSHUFPSZrri 6488
-VSHUFPSZrrik 6489
-VSHUFPSZrrikz 6490
-VSHUFPSrmi 6491
-VSHUFPSrri 6492
-VSM 6493
-VSQRTBF 6494
-VSQRTPDYm 6495
-VSQRTPDYr 6496
-VSQRTPDZ 6497
-VSQRTPDZm 6498
-VSQRTPDZmb 6499
-VSQRTPDZmbk 6500
-VSQRTPDZmbkz 6501
-VSQRTPDZmk 6502
-VSQRTPDZmkz 6503
-VSQRTPDZr 6504
-VSQRTPDZrb 6505
-VSQRTPDZrbk 6506
-VSQRTPDZrbkz 6507
-VSQRTPDZrk 6508
-VSQRTPDZrkz 6509
-VSQRTPDm 6510
-VSQRTPDr 6511
-VSQRTPHZ 6512
-VSQRTPHZm 6513
-VSQRTPHZmb 6514
-VSQRTPHZmbk 6515
-VSQRTPHZmbkz 6516
-VSQRTPHZmk 6517
-VSQRTPHZmkz 6518
-VSQRTPHZr 6519
-VSQRTPHZrb 6520
-VSQRTPHZrbk 6521
-VSQRTPHZrbkz 6522
-VSQRTPHZrk 6523
-VSQRTPHZrkz 6524
-VSQRTPSYm 6525
-VSQRTPSYr 6526
-VSQRTPSZ 6527
-VSQRTPSZm 6528
-VSQRTPSZmb 6529
-VSQRTPSZmbk 6530
-VSQRTPSZmbkz 6531
-VSQRTPSZmk 6532
-VSQRTPSZmkz 6533
-VSQRTPSZr 6534
-VSQRTPSZrb 6535
-VSQRTPSZrbk 6536
-VSQRTPSZrbkz 6537
-VSQRTPSZrk 6538
-VSQRTPSZrkz 6539
-VSQRTPSm 6540
-VSQRTPSr 6541
-VSQRTSDZm 6542
-VSQRTSDZm_Int 6543
-VSQRTSDZmk_Int 6544
-VSQRTSDZmkz_Int 6545
-VSQRTSDZr 6546
-VSQRTSDZr_Int 6547
-VSQRTSDZrb_Int 6548
-VSQRTSDZrbk_Int 6549
-VSQRTSDZrbkz_Int 6550
-VSQRTSDZrk_Int 6551
-VSQRTSDZrkz_Int 6552
-VSQRTSDm 6553
-VSQRTSDm_Int 6554
-VSQRTSDr 6555
-VSQRTSDr_Int 6556
-VSQRTSHZm 6557
-VSQRTSHZm_Int 6558
-VSQRTSHZmk_Int 6559
-VSQRTSHZmkz_Int 6560
-VSQRTSHZr 6561
-VSQRTSHZr_Int 6562
-VSQRTSHZrb_Int 6563
-VSQRTSHZrbk_Int 6564
-VSQRTSHZrbkz_Int 6565
-VSQRTSHZrk_Int 6566
-VSQRTSHZrkz_Int 6567
-VSQRTSSZm 6568
-VSQRTSSZm_Int 6569
-VSQRTSSZmk_Int 6570
-VSQRTSSZmkz_Int 6571
-VSQRTSSZr 6572
-VSQRTSSZr_Int 6573
-VSQRTSSZrb_Int 6574
-VSQRTSSZrbk_Int 6575
-VSQRTSSZrbkz_Int 6576
-VSQRTSSZrk_Int 6577
-VSQRTSSZrkz_Int 6578
-VSQRTSSm 6579
-VSQRTSSm_Int 6580
-VSQRTSSr 6581
-VSQRTSSr_Int 6582
-VSTMXCSR 6583
-VSUBBF 6584
-VSUBPDYrm 6585
-VSUBPDYrr 6586
-VSUBPDZ 6587
-VSUBPDZrm 6588
-VSUBPDZrmb 6589
-VSUBPDZrmbk 6590
-VSUBPDZrmbkz 6591
-VSUBPDZrmk 6592
-VSUBPDZrmkz 6593
-VSUBPDZrr 6594
-VSUBPDZrrb 6595
-VSUBPDZrrbk 6596
-VSUBPDZrrbkz 6597
-VSUBPDZrrk 6598
-VSUBPDZrrkz 6599
-VSUBPDrm 6600
-VSUBPDrr 6601
-VSUBPHZ 6602
-VSUBPHZrm 6603
-VSUBPHZrmb 6604
-VSUBPHZrmbk 6605
-VSUBPHZrmbkz 6606
-VSUBPHZrmk 6607
-VSUBPHZrmkz 6608
-VSUBPHZrr 6609
-VSUBPHZrrb 6610
-VSUBPHZrrbk 6611
-VSUBPHZrrbkz 6612
-VSUBPHZrrk 6613
-VSUBPHZrrkz 6614
-VSUBPSYrm 6615
-VSUBPSYrr 6616
-VSUBPSZ 6617
-VSUBPSZrm 6618
-VSUBPSZrmb 6619
-VSUBPSZrmbk 6620
-VSUBPSZrmbkz 6621
-VSUBPSZrmk 6622
-VSUBPSZrmkz 6623
-VSUBPSZrr 6624
-VSUBPSZrrb 6625
-VSUBPSZrrbk 6626
-VSUBPSZrrbkz 6627
-VSUBPSZrrk 6628
-VSUBPSZrrkz 6629
-VSUBPSrm 6630
-VSUBPSrr 6631
-VSUBSDZrm 6632
-VSUBSDZrm_Int 6633
-VSUBSDZrmk_Int 6634
-VSUBSDZrmkz_Int 6635
-VSUBSDZrr 6636
-VSUBSDZrr_Int 6637
-VSUBSDZrrb_Int 6638
-VSUBSDZrrbk_Int 6639
-VSUBSDZrrbkz_Int 6640
-VSUBSDZrrk_Int 6641
-VSUBSDZrrkz_Int 6642
-VSUBSDrm 6643
-VSUBSDrm_Int 6644
-VSUBSDrr 6645
-VSUBSDrr_Int 6646
-VSUBSHZrm 6647
-VSUBSHZrm_Int 6648
-VSUBSHZrmk_Int 6649
-VSUBSHZrmkz_Int 6650
-VSUBSHZrr 6651
-VSUBSHZrr_Int 6652
-VSUBSHZrrb_Int 6653
-VSUBSHZrrbk_Int 6654
-VSUBSHZrrbkz_Int 6655
-VSUBSHZrrk_Int 6656
-VSUBSHZrrkz_Int 6657
-VSUBSSZrm 6658
-VSUBSSZrm_Int 6659
-VSUBSSZrmk_Int 6660
-VSUBSSZrmkz_Int 6661
-VSUBSSZrr 6662
-VSUBSSZrr_Int 6663
-VSUBSSZrrb_Int 6664
-VSUBSSZrrbk_Int 6665
-VSUBSSZrrbkz_Int 6666
-VSUBSSZrrk_Int 6667
-VSUBSSZrrkz_Int 6668
-VSUBSSrm 6669
-VSUBSSrm_Int 6670
-VSUBSSrr 6671
-VSUBSSrr_Int 6672
-VTESTPDYrm 6673
-VTESTPDYrr 6674
-VTESTPDrm 6675
-VTESTPDrr 6676
-VTESTPSYrm 6677
-VTESTPSYrr 6678
-VTESTPSrm 6679
-VTESTPSrr 6680
-VUCOMISDZrm 6681
-VUCOMISDZrm_Int 6682
-VUCOMISDZrr 6683
-VUCOMISDZrr_Int 6684
-VUCOMISDZrrb 6685
-VUCOMISDrm 6686
-VUCOMISDrm_Int 6687
-VUCOMISDrr 6688
-VUCOMISDrr_Int 6689
-VUCOMISHZrm 6690
-VUCOMISHZrm_Int 6691
-VUCOMISHZrr 6692
-VUCOMISHZrr_Int 6693
-VUCOMISHZrrb 6694
-VUCOMISSZrm 6695
-VUCOMISSZrm_Int 6696
-VUCOMISSZrr 6697
-VUCOMISSZrr_Int 6698
-VUCOMISSZrrb 6699
-VUCOMISSrm 6700
-VUCOMISSrm_Int 6701
-VUCOMISSrr 6702
-VUCOMISSrr_Int 6703
-VUCOMXSDZrm 6704
-VUCOMXSDZrm_Int 6705
-VUCOMXSDZrr 6706
-VUCOMXSDZrr_Int 6707
-VUCOMXSDZrrb_Int 6708
-VUCOMXSHZrm 6709
-VUCOMXSHZrm_Int 6710
-VUCOMXSHZrr 6711
-VUCOMXSHZrr_Int 6712
-VUCOMXSHZrrb_Int 6713
-VUCOMXSSZrm 6714
-VUCOMXSSZrm_Int 6715
-VUCOMXSSZrr 6716
-VUCOMXSSZrr_Int 6717
-VUCOMXSSZrrb_Int 6718
-VUNPCKHPDYrm 6719
-VUNPCKHPDYrr 6720
-VUNPCKHPDZ 6721
-VUNPCKHPDZrm 6722
-VUNPCKHPDZrmb 6723
-VUNPCKHPDZrmbk 6724
-VUNPCKHPDZrmbkz 6725
-VUNPCKHPDZrmk 6726
-VUNPCKHPDZrmkz 6727
-VUNPCKHPDZrr 6728
-VUNPCKHPDZrrk 6729
-VUNPCKHPDZrrkz 6730
-VUNPCKHPDrm 6731
-VUNPCKHPDrr 6732
-VUNPCKHPSYrm 6733
-VUNPCKHPSYrr 6734
-VUNPCKHPSZ 6735
-VUNPCKHPSZrm 6736
-VUNPCKHPSZrmb 6737
-VUNPCKHPSZrmbk 6738
-VUNPCKHPSZrmbkz 6739
-VUNPCKHPSZrmk 6740
-VUNPCKHPSZrmkz 6741
-VUNPCKHPSZrr 6742
-VUNPCKHPSZrrk 6743
-VUNPCKHPSZrrkz 6744
-VUNPCKHPSrm 6745
-VUNPCKHPSrr 6746
-VUNPCKLPDYrm 6747
-VUNPCKLPDYrr 6748
-VUNPCKLPDZ 6749
-VUNPCKLPDZrm 6750
-VUNPCKLPDZrmb 6751
-VUNPCKLPDZrmbk 6752
-VUNPCKLPDZrmbkz 6753
-VUNPCKLPDZrmk 6754
-VUNPCKLPDZrmkz 6755
-VUNPCKLPDZrr 6756
-VUNPCKLPDZrrk 6757
-VUNPCKLPDZrrkz 6758
-VUNPCKLPDrm 6759
-VUNPCKLPDrr 6760
-VUNPCKLPSYrm 6761
-VUNPCKLPSYrr 6762
-VUNPCKLPSZ 6763
-VUNPCKLPSZrm 6764
-VUNPCKLPSZrmb 6765
-VUNPCKLPSZrmbk 6766
-VUNPCKLPSZrmbkz 6767
-VUNPCKLPSZrmk 6768
-VUNPCKLPSZrmkz 6769
-VUNPCKLPSZrr 6770
-VUNPCKLPSZrrk 6771
-VUNPCKLPSZrrkz 6772
-VUNPCKLPSrm 6773
-VUNPCKLPSrr 6774
-VXORPDYrm 6775
-VXORPDYrr 6776
-VXORPDZ 6777
-VXORPDZrm 6778
-VXORPDZrmb 6779
-VXORPDZrmbk 6780
-VXORPDZrmbkz 6781
-VXORPDZrmk 6782
-VXORPDZrmkz 6783
-VXORPDZrr 6784
-VXORPDZrrk 6785
-VXORPDZrrkz 6786
-VXORPDrm 6787
-VXORPDrr 6788
-VXORPSYrm 6789
-VXORPSYrr 6790
-VXORPSZ 6791
-VXORPSZrm 6792
-VXORPSZrmb 6793
-VXORPSZrmbk 6794
-VXORPSZrmbkz 6795
-VXORPSZrmk 6796
-VXORPSZrmkz 6797
-VXORPSZrr 6798
-VXORPSZrrk 6799
-VXORPSZrrkz 6800
-VXORPSrm 6801
-VXORPSrr 6802
-VZEROALL 6803
-VZEROUPPER 6804
-V_SET 6805
-V_SETALLONES 6806
-WAIT 6807
-WBINVD 6808
-WBNOINVD 6809
-WRFLAGS 6810
-WRFSBASE 6811
-WRGSBASE 6812
-WRMSR 6813
-WRMSRLIST 6814
-WRMSRNS 6815
-WRMSRNSir 6816
-WRMSRNSir_EVEX 6817
-WRPKRUr 6818
-WRSSD 6819
-WRSSD_EVEX 6820
-WRSSQ 6821
-WRSSQ_EVEX 6822
-WRUSSD 6823
-WRUSSD_EVEX 6824
-WRUSSQ 6825
-WRUSSQ_EVEX 6826
-XABORT 6827
-XABORT_DEF 6828
-XACQUIRE_PREFIX 6829
-XADD 6830
-XAM_F 6831
-XAM_Fp 6832
-XBEGIN 6833
-XCHG 6834
-XCH_F 6835
-XCRYPTCBC 6836
-XCRYPTCFB 6837
-XCRYPTCTR 6838
-XCRYPTECB 6839
-XCRYPTOFB 6840
-XEND 6841
-XGETBV 6842
-XLAT 6843
-XOR 6844
-XORPDrm 6845
-XORPDrr 6846
-XORPSrm 6847
-XORPSrr 6848
-XRELEASE_PREFIX 6849
-XRESLDTRK 6850
-XRSTOR 6851
-XRSTORS 6852
-XSAVE 6853
-XSAVEC 6854
-XSAVEOPT 6855
-XSAVES 6856
-XSETBV 6857
-XSHA 6858
-XSTORE 6859
-XSUSLDTRK 6860
-XTEST 6861
-Immediate 6862
-CImmediate 6863
-FPImmediate 6864
-MBB 6865
-FrameIndex 6866
-ConstantPoolIndex 6867
-TargetIndex 6868
-JumpTableIndex 6869
-ExternalSymbol 6870
-GlobalAddress 6871
-BlockAddress 6872
-RegisterMask 6873
-RegisterLiveOut 6874
-Metadata 6875
-MCSymbol 6876
-CFIIndex 6877
-IntrinsicID 6878
-Predicate 6879
-ShuffleMask 6880
-PhyReg_GR8 6881
-PhyReg_GRH8 6882
-PhyReg_GR8_NOREX2 6883
-PhyReg_GR8_NOREX 6884
-PhyReg_GR8_ABCD_H 6885
-PhyReg_GR8_ABCD_L 6886
-PhyReg_GRH16 6887
-PhyReg_GR16 6888
-PhyReg_GR16_NOREX2 6889
-PhyReg_GR16_NOREX 6890
-PhyReg_VK1 6891
-PhyReg_VK16 6892
-PhyReg_VK2 6893
-PhyReg_VK4 6894
-PhyReg_VK8 6895
-PhyReg_VK16WM 6896
-PhyReg_VK1WM 6897
-PhyReg_VK2WM 6898
-PhyReg_VK4WM 6899
-PhyReg_VK8WM 6900
-PhyReg_SEGMENT_REG 6901
-PhyReg_GR16_ABCD 6902
-PhyReg_FPCCR 6903
-PhyReg_FR16X 6904
-PhyReg_FR16 6905
-PhyReg_VK16PAIR 6906
-PhyReg_VK1PAIR 6907
-PhyReg_VK2PAIR 6908
-PhyReg_VK4PAIR 6909
-PhyReg_VK8PAIR 6910
-PhyReg_VK1PAIR_with_sub_mask_0_in_VK1WM 6911
-PhyReg_LOW32_ADDR_ACCESS_RBP 6912
-PhyReg_LOW32_ADDR_ACCESS 6913
-PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 6914
-PhyReg_FR32X 6915
-PhyReg_GR32 6916
-PhyReg_GR32_NOSP 6917
-PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 6918
-PhyReg_DEBUG_REG 6919
-PhyReg_FR32 6920
-PhyReg_GR32_NOREX2 6921
-PhyReg_GR32_NOREX2_NOSP 6922
-PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 6923
-PhyReg_GR32_NOREX 6924
-PhyReg_VK32 6925
-PhyReg_GR32_NOREX_NOSP 6926
-PhyReg_RFP32 6927
-PhyReg_VK32WM 6928
-PhyReg_GR32_ABCD 6929
-PhyReg_GR32_TC 6930
-PhyReg_GR32_ABCD_and_GR32_TC 6931
-PhyReg_GR32_AD 6932
-PhyReg_GR32_ArgRef 6933
-PhyReg_GR32_BPSP 6934
-PhyReg_GR32_BSI 6935
-PhyReg_GR32_CB 6936
-PhyReg_GR32_DC 6937
-PhyReg_GR32_DIBP 6938
-PhyReg_GR32_SIDI 6939
-PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 6940
-PhyReg_CCR 6941
-PhyReg_DFCCR 6942
-PhyReg_GR32_ABCD_and_GR32_BSI 6943
-PhyReg_GR32_AD_and_GR32_ArgRef 6944
-PhyReg_GR32_ArgRef_and_GR32_CB 6945
-PhyReg_GR32_BPSP_and_GR32_DIBP 6946
-PhyReg_GR32_BPSP_and_GR32_TC 6947
-PhyReg_GR32_BSI_and_GR32_SIDI 6948
-PhyReg_GR32_DIBP_and_GR32_SIDI 6949
-PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 6950
-PhyReg_LOW32_ADDR_ACCESS_with_sub_32bit 6951
-PhyReg_RFP64 6952
-PhyReg_GR64 6953
-PhyReg_FR64X 6954
-PhyReg_GR64_with_sub_8bit 6955
-PhyReg_GR64_NOSP 6956
-PhyReg_GR64_NOREX2 6957
-PhyReg_CONTROL_REG 6958
-PhyReg_FR64 6959
-PhyReg_GR64_with_sub_16bit_in_GR16_NOREX2 6960
-PhyReg_GR64_NOREX2_NOSP 6961
-PhyReg_GR64PLTSafe 6962
-PhyReg_GR64_TC 6963
-PhyReg_GR64_NOREX 6964
-PhyReg_GR64_TCW64 6965
-PhyReg_GR64_TC_with_sub_8bit 6966
-PhyReg_GR64_NOREX2_NOSP_and_GR64_TC 6967
-PhyReg_GR64_TCW64_with_sub_8bit 6968
-PhyReg_GR64_TC_and_GR64_TCW64 6969
-PhyReg_GR64_with_sub_16bit_in_GR16_NOREX 6970
-PhyReg_VK64 6971
-PhyReg_VR64 6972
-PhyReg_GR64PLTSafe_and_GR64_TC 6973
-PhyReg_GR64_NOREX2_NOSP_and_GR64_TCW64 6974
-PhyReg_GR64_NOREX_NOSP 6975
-PhyReg_GR64_NOREX_and_GR64_TC 6976
-PhyReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 6977
-PhyReg_VK64WM 6978
-PhyReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 6979
-PhyReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 6980
-PhyReg_GR64PLTSafe_and_GR64_TCW64 6981
-PhyReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 6982
-PhyReg_GR64_NOREX_and_GR64_TCW64 6983
-PhyReg_GR64_ABCD 6984
-PhyReg_GR64_with_sub_32bit_in_GR32_TC 6985
-PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 6986
-PhyReg_GR64_AD 6987
-PhyReg_GR64_ArgRef 6988
-PhyReg_GR64_and_LOW32_ADDR_ACCESS_RBP 6989
-PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef 6990
-PhyReg_GR64_with_sub_32bit_in_GR32_BPSP 6991
-PhyReg_GR64_with_sub_32bit_in_GR32_BSI 6992
-PhyReg_GR64_with_sub_32bit_in_GR32_CB 6993
-PhyReg_GR64_with_sub_32bit_in_GR32_DIBP 6994
-PhyReg_GR64_with_sub_32bit_in_GR32_SIDI 6995
-PhyReg_GR64_A 6996
-PhyReg_GR64_ArgRef_and_GR64_TC 6997
-PhyReg_GR64_and_LOW32_ADDR_ACCESS 6998
-PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 6999
-PhyReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7000
-PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7001
-PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7002
-PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7003
-PhyReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7004
-PhyReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7005
-PhyReg_RST 7006
-PhyReg_RFP80 7007
-PhyReg_RFP80_7 7008
-PhyReg_VR128X 7009
-PhyReg_VR128 7010
-PhyReg_VR256X 7011
-PhyReg_VR256 7012
-PhyReg_VR512 7013
-PhyReg_VR512_0_15 7014
-PhyReg_TILE 7015
-VirtReg_GR8 7016
-VirtReg_GRH8 7017
-VirtReg_GR8_NOREX2 7018
-VirtReg_GR8_NOREX 7019
-VirtReg_GR8_ABCD_H 7020
-VirtReg_GR8_ABCD_L 7021
-VirtReg_GRH16 7022
-VirtReg_GR16 7023
-VirtReg_GR16_NOREX2 7024
-VirtReg_GR16_NOREX 7025
-VirtReg_VK1 7026
-VirtReg_VK16 7027
-VirtReg_VK2 7028
-VirtReg_VK4 7029
-VirtReg_VK8 7030
-VirtReg_VK16WM 7031
-VirtReg_VK1WM 7032
-VirtReg_VK2WM 7033
-VirtReg_VK4WM 7034
-VirtReg_VK8WM 7035
-VirtReg_SEGMENT_REG 7036
-VirtReg_GR16_ABCD 7037
-VirtReg_FPCCR 7038
-VirtReg_FR16X 7039
-VirtReg_FR16 7040
-VirtReg_VK16PAIR 7041
-VirtReg_VK1PAIR 7042
-VirtReg_VK2PAIR 7043
-VirtReg_VK4PAIR 7044
-VirtReg_VK8PAIR 7045
-VirtReg_VK1PAIR_with_sub_mask_0_in_VK1WM 7046
-VirtReg_LOW32_ADDR_ACCESS_RBP 7047
-VirtReg_LOW32_ADDR_ACCESS 7048
-VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 7049
-VirtReg_FR32X 7050
-VirtReg_GR32 7051
-VirtReg_GR32_NOSP 7052
-VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 7053
-VirtReg_DEBUG_REG 7054
-VirtReg_FR32 7055
-VirtReg_GR32_NOREX2 7056
-VirtReg_GR32_NOREX2_NOSP 7057
-VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 7058
-VirtReg_GR32_NOREX 7059
-VirtReg_VK32 7060
-VirtReg_GR32_NOREX_NOSP 7061
-VirtReg_RFP32 7062
-VirtReg_VK32WM 7063
-VirtReg_GR32_ABCD 7064
-VirtReg_GR32_TC 7065
-VirtReg_GR32_ABCD_and_GR32_TC 7066
-VirtReg_GR32_AD 7067
-VirtReg_GR32_ArgRef 7068
-VirtReg_GR32_BPSP 7069
-VirtReg_GR32_BSI 7070
-VirtReg_GR32_CB 7071
-VirtReg_GR32_DC 7072
-VirtReg_GR32_DIBP 7073
-VirtReg_GR32_SIDI 7074
-VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 7075
-VirtReg_CCR 7076
-VirtReg_DFCCR 7077
-VirtReg_GR32_ABCD_and_GR32_BSI 7078
-VirtReg_GR32_AD_and_GR32_ArgRef 7079
-VirtReg_GR32_ArgRef_and_GR32_CB 7080
-VirtReg_GR32_BPSP_and_GR32_DIBP 7081
-VirtReg_GR32_BPSP_and_GR32_TC 7082
-VirtReg_GR32_BSI_and_GR32_SIDI 7083
-VirtReg_GR32_DIBP_and_GR32_SIDI 7084
-VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 7085
-VirtReg_LOW32_ADDR_ACCESS_with_sub_32bit 7086
-VirtReg_RFP64 7087
-VirtReg_GR64 7088
-VirtReg_FR64X 7089
-VirtReg_GR64_with_sub_8bit 7090
-VirtReg_GR64_NOSP 7091
-VirtReg_GR64_NOREX2 7092
-VirtReg_CONTROL_REG 7093
-VirtReg_FR64 7094
-VirtReg_GR64_with_sub_16bit_in_GR16_NOREX2 7095
-VirtReg_GR64_NOREX2_NOSP 7096
-VirtReg_GR64PLTSafe 7097
-VirtReg_GR64_TC 7098
-VirtReg_GR64_NOREX 7099
-VirtReg_GR64_TCW64 7100
-VirtReg_GR64_TC_with_sub_8bit 7101
-VirtReg_GR64_NOREX2_NOSP_and_GR64_TC 7102
-VirtReg_GR64_TCW64_with_sub_8bit 7103
-VirtReg_GR64_TC_and_GR64_TCW64 7104
-VirtReg_GR64_with_sub_16bit_in_GR16_NOREX 7105
-VirtReg_VK64 7106
-VirtReg_VR64 7107
-VirtReg_GR64PLTSafe_and_GR64_TC 7108
-VirtReg_GR64_NOREX2_NOSP_and_GR64_TCW64 7109
-VirtReg_GR64_NOREX_NOSP 7110
-VirtReg_GR64_NOREX_and_GR64_TC 7111
-VirtReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 7112
-VirtReg_VK64WM 7113
-VirtReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 7114
-VirtReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 7115
-VirtReg_GR64PLTSafe_and_GR64_TCW64 7116
-VirtReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 7117
-VirtReg_GR64_NOREX_and_GR64_TCW64 7118
-VirtReg_GR64_ABCD 7119
-VirtReg_GR64_with_sub_32bit_in_GR32_TC 7120
-VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 7121
-VirtReg_GR64_AD 7122
-VirtReg_GR64_ArgRef 7123
-VirtReg_GR64_and_LOW32_ADDR_ACCESS_RBP 7124
-VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef 7125
-VirtReg_GR64_with_sub_32bit_in_GR32_BPSP 7126
-VirtReg_GR64_with_sub_32bit_in_GR32_BSI 7127
-VirtReg_GR64_with_sub_32bit_in_GR32_CB 7128
-VirtReg_GR64_with_sub_32bit_in_GR32_DIBP 7129
-VirtReg_GR64_with_sub_32bit_in_GR32_SIDI 7130
-VirtReg_GR64_A 7131
-VirtReg_GR64_ArgRef_and_GR64_TC 7132
-VirtReg_GR64_and_LOW32_ADDR_ACCESS 7133
-VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 7134
-VirtReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7135
-VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7136
-VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7137
-VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7138
-VirtReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7139
-VirtReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7140
-VirtReg_RST 7141
-VirtReg_RFP80 7142
-VirtReg_RFP80_7 7143
-VirtReg_VR128X 7144
-VirtReg_VR128 7145
-VirtReg_VR256X 7146
-VirtReg_VR256 7147
-VirtReg_VR512 7148
-VirtReg_VR512_0_15 7149
-VirtReg_TILE 7150
+RELOC_NONE 1533
+REPNE_PREFIX 1534
+REP_MOVSB 1535
+REP_MOVSD 1536
+REP_MOVSQ 1537
+REP_MOVSW 1538
+REP_PREFIX 1539
+REP_STOSB 1540
+REP_STOSD 1541
+REP_STOSQ 1542
+REP_STOSW 1543
+RET 1544
+RETI 1545
+REX 1546
+RMPADJUST 1547
+RMPQUERY 1548
+RMPUPDATE 1549
+ROL 1550
+ROR 1551
+RORX 1552
+ROUNDPDmi 1553
+ROUNDPDri 1554
+ROUNDPSmi 1555
+ROUNDPSri 1556
+ROUNDSDmi 1557
+ROUNDSDmi_Int 1558
+ROUNDSDri 1559
+ROUNDSDri_Int 1560
+ROUNDSSmi 1561
+ROUNDSSmi_Int 1562
+ROUNDSSri 1563
+ROUNDSSri_Int 1564
+RSM 1565
+RSQRTPSm 1566
+RSQRTPSr 1567
+RSQRTSSm 1568
+RSQRTSSm_Int 1569
+RSQRTSSr 1570
+RSQRTSSr_Int 1571
+RSTORSSP 1572
+SAHF 1573
+SALC 1574
+SAR 1575
+SARX 1576
+SAVEPREVSSP 1577
+SBB 1578
+SCASB 1579
+SCASL 1580
+SCASQ 1581
+SCASW 1582
+SEAMCALL 1583
+SEAMOPS 1584
+SEAMRET 1585
+SEG_ALLOCA 1586
+SEH_BeginEpilogue 1587
+SEH_EndEpilogue 1588
+SEH_EndPrologue 1589
+SEH_PushFrame 1590
+SEH_PushReg 1591
+SEH_SaveReg 1592
+SEH_SaveXMM 1593
+SEH_SetFrame 1594
+SEH_StackAlign 1595
+SEH_StackAlloc 1596
+SEH_UnwindV 1597
+SEH_UnwindVersion 1598
+SENDUIPI 1599
+SERIALIZE 1600
+SETB_C 1601
+SETCCm 1602
+SETCCm_EVEX 1603
+SETCCr 1604
+SETCCr_EVEX 1605
+SETSSBSY 1606
+SETZUCCm 1607
+SETZUCCr 1608
+SFENCE 1609
+SGDT 1610
+SHA 1611
+SHL 1612
+SHLD 1613
+SHLDROT 1614
+SHLX 1615
+SHR 1616
+SHRD 1617
+SHRDROT 1618
+SHRX 1619
+SHUFPDrmi 1620
+SHUFPDrri 1621
+SHUFPSrmi 1622
+SHUFPSrri 1623
+SIDT 1624
+SKINIT 1625
+SLDT 1626
+SLWPCB 1627
+SMSW 1628
+SQRTPDm 1629
+SQRTPDr 1630
+SQRTPSm 1631
+SQRTPSr 1632
+SQRTSDm 1633
+SQRTSDm_Int 1634
+SQRTSDr 1635
+SQRTSDr_Int 1636
+SQRTSSm 1637
+SQRTSSm_Int 1638
+SQRTSSr 1639
+SQRTSSr_Int 1640
+SQRT_F 1641
+SQRT_Fp 1642
+SS_PREFIX 1643
+STAC 1644
+STACKALLOC_W_PROBING 1645
+STACKMAP 1646
+STATEPOINT 1647
+STC 1648
+STD 1649
+STGI 1650
+STI 1651
+STMXCSR 1652
+STOSB 1653
+STOSL 1654
+STOSQ 1655
+STOSW 1656
+STR 1657
+STRm 1658
+STTILECFG 1659
+STTILECFG_EVEX 1660
+STUI 1661
+ST_F 1662
+ST_FP 1663
+ST_FPrr 1664
+ST_Fp 1665
+ST_FpP 1666
+ST_Frr 1667
+SUB 1668
+SUBPDrm 1669
+SUBPDrr 1670
+SUBPSrm 1671
+SUBPSrr 1672
+SUBREG_TO_REG 1673
+SUBR_F 1674
+SUBR_FI 1675
+SUBR_FPrST 1676
+SUBR_FST 1677
+SUBR_Fp 1678
+SUBR_FpI 1679
+SUBR_FrST 1680
+SUBSDrm 1681
+SUBSDrm_Int 1682
+SUBSDrr 1683
+SUBSDrr_Int 1684
+SUBSSrm 1685
+SUBSSrm_Int 1686
+SUBSSrr 1687
+SUBSSrr_Int 1688
+SUB_F 1689
+SUB_FI 1690
+SUB_FPrST 1691
+SUB_FST 1692
+SUB_Fp 1693
+SUB_FpI 1694
+SUB_FrST 1695
+SWAPGS 1696
+SYSCALL 1697
+SYSENTER 1698
+SYSEXIT 1699
+SYSRET 1700
+T 1701
+TAILJMPd 1702
+TAILJMPd_CC 1703
+TAILJMPm 1704
+TAILJMPr 1705
+TCMMIMFP 1706
+TCMMRLFP 1707
+TCRETURN_HIPE 1708
+TCRETURN_WIN 1709
+TCRETURN_WINmi 1710
+TCRETURNdi 1711
+TCRETURNdicc 1712
+TCRETURNmi 1713
+TCRETURNri 1714
+TCVTROWD 1715
+TCVTROWPS 1716
+TDCALL 1717
+TDPBF 1718
+TDPBHF 1719
+TDPBSSD 1720
+TDPBSUD 1721
+TDPBUSD 1722
+TDPBUUD 1723
+TDPFP 1724
+TDPHBF 1725
+TDPHF 1726
+TEST 1727
+TESTUI 1728
+TILELOADD 1729
+TILELOADDRS 1730
+TILELOADDRST 1731
+TILELOADDRS_EVEX 1732
+TILELOADDT 1733
+TILELOADD_EVEX 1734
+TILEMOVROWrre 1735
+TILEMOVROWrri 1736
+TILERELEASE 1737
+TILESTORED 1738
+TILESTORED_EVEX 1739
+TILEZERO 1740
+TLBSYNC 1741
+TLSCall 1742
+TLS_addr 1743
+TLS_addrX 1744
+TLS_base_addr 1745
+TLS_base_addrX 1746
+TLS_desc 1747
+TMMULTF 1748
+TPAUSE 1749
+TRAP 1750
+TST_F 1751
+TST_Fp 1752
+TZCNT 1753
+TZMSK 1754
+UBSAN_UD 1755
+UCOMISDrm 1756
+UCOMISDrm_Int 1757
+UCOMISDrr 1758
+UCOMISDrr_Int 1759
+UCOMISSrm 1760
+UCOMISSrm_Int 1761
+UCOMISSrr 1762
+UCOMISSrr_Int 1763
+UCOM_FIPr 1764
+UCOM_FIr 1765
+UCOM_FPPr 1766
+UCOM_FPr 1767
+UCOM_FpIr 1768
+UCOM_Fpr 1769
+UCOM_Fr 1770
+UD 1771
+UIRET 1772
+UMONITOR 1773
+UMWAIT 1774
+UNPCKHPDrm 1775
+UNPCKHPDrr 1776
+UNPCKHPSrm 1777
+UNPCKHPSrr 1778
+UNPCKLPDrm 1779
+UNPCKLPDrr 1780
+UNPCKLPSrm 1781
+UNPCKLPSrr 1782
+URDMSRri 1783
+URDMSRri_EVEX 1784
+URDMSRrr 1785
+URDMSRrr_EVEX 1786
+UWRMSRir 1787
+UWRMSRir_EVEX 1788
+UWRMSRrr 1789
+UWRMSRrr_EVEX 1790
+V 1791
+VAARG 1792
+VAARG_X 1793
+VADDBF 1794
+VADDPDYrm 1795
+VADDPDYrr 1796
+VADDPDZ 1797
+VADDPDZrm 1798
+VADDPDZrmb 1799
+VADDPDZrmbk 1800
+VADDPDZrmbkz 1801
+VADDPDZrmk 1802
+VADDPDZrmkz 1803
+VADDPDZrr 1804
+VADDPDZrrb 1805
+VADDPDZrrbk 1806
+VADDPDZrrbkz 1807
+VADDPDZrrk 1808
+VADDPDZrrkz 1809
+VADDPDrm 1810
+VADDPDrr 1811
+VADDPHZ 1812
+VADDPHZrm 1813
+VADDPHZrmb 1814
+VADDPHZrmbk 1815
+VADDPHZrmbkz 1816
+VADDPHZrmk 1817
+VADDPHZrmkz 1818
+VADDPHZrr 1819
+VADDPHZrrb 1820
+VADDPHZrrbk 1821
+VADDPHZrrbkz 1822
+VADDPHZrrk 1823
+VADDPHZrrkz 1824
+VADDPSYrm 1825
+VADDPSYrr 1826
+VADDPSZ 1827
+VADDPSZrm 1828
+VADDPSZrmb 1829
+VADDPSZrmbk 1830
+VADDPSZrmbkz 1831
+VADDPSZrmk 1832
+VADDPSZrmkz 1833
+VADDPSZrr 1834
+VADDPSZrrb 1835
+VADDPSZrrbk 1836
+VADDPSZrrbkz 1837
+VADDPSZrrk 1838
+VADDPSZrrkz 1839
+VADDPSrm 1840
+VADDPSrr 1841
+VADDSDZrm 1842
+VADDSDZrm_Int 1843
+VADDSDZrmk_Int 1844
+VADDSDZrmkz_Int 1845
+VADDSDZrr 1846
+VADDSDZrr_Int 1847
+VADDSDZrrb_Int 1848
+VADDSDZrrbk_Int 1849
+VADDSDZrrbkz_Int 1850
+VADDSDZrrk_Int 1851
+VADDSDZrrkz_Int 1852
+VADDSDrm 1853
+VADDSDrm_Int 1854
+VADDSDrr 1855
+VADDSDrr_Int 1856
+VADDSHZrm 1857
+VADDSHZrm_Int 1858
+VADDSHZrmk_Int 1859
+VADDSHZrmkz_Int 1860
+VADDSHZrr 1861
+VADDSHZrr_Int 1862
+VADDSHZrrb_Int 1863
+VADDSHZrrbk_Int 1864
+VADDSHZrrbkz_Int 1865
+VADDSHZrrk_Int 1866
+VADDSHZrrkz_Int 1867
+VADDSSZrm 1868
+VADDSSZrm_Int 1869
+VADDSSZrmk_Int 1870
+VADDSSZrmkz_Int 1871
+VADDSSZrr 1872
+VADDSSZrr_Int 1873
+VADDSSZrrb_Int 1874
+VADDSSZrrbk_Int 1875
+VADDSSZrrbkz_Int 1876
+VADDSSZrrk_Int 1877
+VADDSSZrrkz_Int 1878
+VADDSSrm 1879
+VADDSSrm_Int 1880
+VADDSSrr 1881
+VADDSSrr_Int 1882
+VADDSUBPDYrm 1883
+VADDSUBPDYrr 1884
+VADDSUBPDrm 1885
+VADDSUBPDrr 1886
+VADDSUBPSYrm 1887
+VADDSUBPSYrr 1888
+VADDSUBPSrm 1889
+VADDSUBPSrr 1890
+VAESDECLASTYrm 1891
+VAESDECLASTYrr 1892
+VAESDECLASTZ 1893
+VAESDECLASTZrm 1894
+VAESDECLASTZrr 1895
+VAESDECLASTrm 1896
+VAESDECLASTrr 1897
+VAESDECYrm 1898
+VAESDECYrr 1899
+VAESDECZ 1900
+VAESDECZrm 1901
+VAESDECZrr 1902
+VAESDECrm 1903
+VAESDECrr 1904
+VAESENCLASTYrm 1905
+VAESENCLASTYrr 1906
+VAESENCLASTZ 1907
+VAESENCLASTZrm 1908
+VAESENCLASTZrr 1909
+VAESENCLASTrm 1910
+VAESENCLASTrr 1911
+VAESENCYrm 1912
+VAESENCYrr 1913
+VAESENCZ 1914
+VAESENCZrm 1915
+VAESENCZrr 1916
+VAESENCrm 1917
+VAESENCrr 1918
+VAESIMCrm 1919
+VAESIMCrr 1920
+VAESKEYGENASSISTrmi 1921
+VAESKEYGENASSISTrri 1922
+VALIGNDZ 1923
+VALIGNDZrmbi 1924
+VALIGNDZrmbik 1925
+VALIGNDZrmbikz 1926
+VALIGNDZrmi 1927
+VALIGNDZrmik 1928
+VALIGNDZrmikz 1929
+VALIGNDZrri 1930
+VALIGNDZrrik 1931
+VALIGNDZrrikz 1932
+VALIGNQZ 1933
+VALIGNQZrmbi 1934
+VALIGNQZrmbik 1935
+VALIGNQZrmbikz 1936
+VALIGNQZrmi 1937
+VALIGNQZrmik 1938
+VALIGNQZrmikz 1939
+VALIGNQZrri 1940
+VALIGNQZrrik 1941
+VALIGNQZrrikz 1942
+VANDNPDYrm 1943
+VANDNPDYrr 1944
+VANDNPDZ 1945
+VANDNPDZrm 1946
+VANDNPDZrmb 1947
+VANDNPDZrmbk 1948
+VANDNPDZrmbkz 1949
+VANDNPDZrmk 1950
+VANDNPDZrmkz 1951
+VANDNPDZrr 1952
+VANDNPDZrrk 1953
+VANDNPDZrrkz 1954
+VANDNPDrm 1955
+VANDNPDrr 1956
+VANDNPSYrm 1957
+VANDNPSYrr 1958
+VANDNPSZ 1959
+VANDNPSZrm 1960
+VANDNPSZrmb 1961
+VANDNPSZrmbk 1962
+VANDNPSZrmbkz 1963
+VANDNPSZrmk 1964
+VANDNPSZrmkz 1965
+VANDNPSZrr 1966
+VANDNPSZrrk 1967
+VANDNPSZrrkz 1968
+VANDNPSrm 1969
+VANDNPSrr 1970
+VANDPDYrm 1971
+VANDPDYrr 1972
+VANDPDZ 1973
+VANDPDZrm 1974
+VANDPDZrmb 1975
+VANDPDZrmbk 1976
+VANDPDZrmbkz 1977
+VANDPDZrmk 1978
+VANDPDZrmkz 1979
+VANDPDZrr 1980
+VANDPDZrrk 1981
+VANDPDZrrkz 1982
+VANDPDrm 1983
+VANDPDrr 1984
+VANDPSYrm 1985
+VANDPSYrr 1986
+VANDPSZ 1987
+VANDPSZrm 1988
+VANDPSZrmb 1989
+VANDPSZrmbk 1990
+VANDPSZrmbkz 1991
+VANDPSZrmk 1992
+VANDPSZrmkz 1993
+VANDPSZrr 1994
+VANDPSZrrk 1995
+VANDPSZrrkz 1996
+VANDPSrm 1997
+VANDPSrr 1998
+VASTART_SAVE_XMM_REGS 1999
+VBCSTNEBF 2000
+VBCSTNESH 2001
+VBLENDMPDZ 2002
+VBLENDMPDZrm 2003
+VBLENDMPDZrmb 2004
+VBLENDMPDZrmbk 2005
+VBLENDMPDZrmbkz 2006
+VBLENDMPDZrmk 2007
+VBLENDMPDZrmkz 2008
+VBLENDMPDZrr 2009
+VBLENDMPDZrrk 2010
+VBLENDMPDZrrkz 2011
+VBLENDMPSZ 2012
+VBLENDMPSZrm 2013
+VBLENDMPSZrmb 2014
+VBLENDMPSZrmbk 2015
+VBLENDMPSZrmbkz 2016
+VBLENDMPSZrmk 2017
+VBLENDMPSZrmkz 2018
+VBLENDMPSZrr 2019
+VBLENDMPSZrrk 2020
+VBLENDMPSZrrkz 2021
+VBLENDPDYrmi 2022
+VBLENDPDYrri 2023
+VBLENDPDrmi 2024
+VBLENDPDrri 2025
+VBLENDPSYrmi 2026
+VBLENDPSYrri 2027
+VBLENDPSrmi 2028
+VBLENDPSrri 2029
+VBLENDVPDYrmr 2030
+VBLENDVPDYrrr 2031
+VBLENDVPDrmr 2032
+VBLENDVPDrrr 2033
+VBLENDVPSYrmr 2034
+VBLENDVPSYrrr 2035
+VBLENDVPSrmr 2036
+VBLENDVPSrrr 2037
+VBROADCASTF 2038
+VBROADCASTI 2039
+VBROADCASTSDYrm 2040
+VBROADCASTSDYrr 2041
+VBROADCASTSDZ 2042
+VBROADCASTSDZrm 2043
+VBROADCASTSDZrmk 2044
+VBROADCASTSDZrmkz 2045
+VBROADCASTSDZrr 2046
+VBROADCASTSDZrrk 2047
+VBROADCASTSDZrrkz 2048
+VBROADCASTSSYrm 2049
+VBROADCASTSSYrr 2050
+VBROADCASTSSZ 2051
+VBROADCASTSSZrm 2052
+VBROADCASTSSZrmk 2053
+VBROADCASTSSZrmkz 2054
+VBROADCASTSSZrr 2055
+VBROADCASTSSZrrk 2056
+VBROADCASTSSZrrkz 2057
+VBROADCASTSSrm 2058
+VBROADCASTSSrr 2059
+VCMPBF 2060
+VCMPPDYrmi 2061
+VCMPPDYrri 2062
+VCMPPDZ 2063
+VCMPPDZrmbi 2064
+VCMPPDZrmbik 2065
+VCMPPDZrmi 2066
+VCMPPDZrmik 2067
+VCMPPDZrri 2068
+VCMPPDZrrib 2069
+VCMPPDZrribk 2070
+VCMPPDZrrik 2071
+VCMPPDrmi 2072
+VCMPPDrri 2073
+VCMPPHZ 2074
+VCMPPHZrmbi 2075
+VCMPPHZrmbik 2076
+VCMPPHZrmi 2077
+VCMPPHZrmik 2078
+VCMPPHZrri 2079
+VCMPPHZrrib 2080
+VCMPPHZrribk 2081
+VCMPPHZrrik 2082
+VCMPPSYrmi 2083
+VCMPPSYrri 2084
+VCMPPSZ 2085
+VCMPPSZrmbi 2086
+VCMPPSZrmbik 2087
+VCMPPSZrmi 2088
+VCMPPSZrmik 2089
+VCMPPSZrri 2090
+VCMPPSZrrib 2091
+VCMPPSZrribk 2092
+VCMPPSZrrik 2093
+VCMPPSrmi 2094
+VCMPPSrri 2095
+VCMPSDZrmi 2096
+VCMPSDZrmi_Int 2097
+VCMPSDZrmik_Int 2098
+VCMPSDZrri 2099
+VCMPSDZrri_Int 2100
+VCMPSDZrrib_Int 2101
+VCMPSDZrribk_Int 2102
+VCMPSDZrrik_Int 2103
+VCMPSDrmi 2104
+VCMPSDrmi_Int 2105
+VCMPSDrri 2106
+VCMPSDrri_Int 2107
+VCMPSHZrmi 2108
+VCMPSHZrmi_Int 2109
+VCMPSHZrmik_Int 2110
+VCMPSHZrri 2111
+VCMPSHZrri_Int 2112
+VCMPSHZrrib_Int 2113
+VCMPSHZrribk_Int 2114
+VCMPSHZrrik_Int 2115
+VCMPSSZrmi 2116
+VCMPSSZrmi_Int 2117
+VCMPSSZrmik_Int 2118
+VCMPSSZrri 2119
+VCMPSSZrri_Int 2120
+VCMPSSZrrib_Int 2121
+VCMPSSZrribk_Int 2122
+VCMPSSZrrik_Int 2123
+VCMPSSrmi 2124
+VCMPSSrmi_Int 2125
+VCMPSSrri 2126
+VCMPSSrri_Int 2127
+VCOMISBF 2128
+VCOMISDZrm 2129
+VCOMISDZrm_Int 2130
+VCOMISDZrr 2131
+VCOMISDZrr_Int 2132
+VCOMISDZrrb 2133
+VCOMISDrm 2134
+VCOMISDrm_Int 2135
+VCOMISDrr 2136
+VCOMISDrr_Int 2137
+VCOMISHZrm 2138
+VCOMISHZrm_Int 2139
+VCOMISHZrr 2140
+VCOMISHZrr_Int 2141
+VCOMISHZrrb 2142
+VCOMISSZrm 2143
+VCOMISSZrm_Int 2144
+VCOMISSZrr 2145
+VCOMISSZrr_Int 2146
+VCOMISSZrrb 2147
+VCOMISSrm 2148
+VCOMISSrm_Int 2149
+VCOMISSrr 2150
+VCOMISSrr_Int 2151
+VCOMPRESSPDZ 2152
+VCOMPRESSPDZmr 2153
+VCOMPRESSPDZmrk 2154
+VCOMPRESSPDZrr 2155
+VCOMPRESSPDZrrk 2156
+VCOMPRESSPDZrrkz 2157
+VCOMPRESSPSZ 2158
+VCOMPRESSPSZmr 2159
+VCOMPRESSPSZmrk 2160
+VCOMPRESSPSZrr 2161
+VCOMPRESSPSZrrk 2162
+VCOMPRESSPSZrrkz 2163
+VCOMXSDZrm_Int 2164
+VCOMXSDZrr_Int 2165
+VCOMXSDZrrb_Int 2166
+VCOMXSHZrm_Int 2167
+VCOMXSHZrr_Int 2168
+VCOMXSHZrrb_Int 2169
+VCOMXSSZrm_Int 2170
+VCOMXSSZrr_Int 2171
+VCOMXSSZrrb_Int 2172
+VCVT 2173
+VCVTBF 2174
+VCVTBIASPH 2175
+VCVTDQ 2176
+VCVTHF 2177
+VCVTNE 2178
+VCVTNEEBF 2179
+VCVTNEEPH 2180
+VCVTNEOBF 2181
+VCVTNEOPH 2182
+VCVTNEPS 2183
+VCVTPD 2184
+VCVTPH 2185
+VCVTPS 2186
+VCVTQQ 2187
+VCVTSD 2188
+VCVTSH 2189
+VCVTSI 2190
+VCVTSS 2191
+VCVTTBF 2192
+VCVTTPD 2193
+VCVTTPH 2194
+VCVTTPS 2195
+VCVTTSD 2196
+VCVTTSH 2197
+VCVTTSS 2198
+VCVTUDQ 2199
+VCVTUQQ 2200
+VCVTUSI 2201
+VCVTUW 2202
+VCVTW 2203
+VDBPSADBWZ 2204
+VDBPSADBWZrmi 2205
+VDBPSADBWZrmik 2206
+VDBPSADBWZrmikz 2207
+VDBPSADBWZrri 2208
+VDBPSADBWZrrik 2209
+VDBPSADBWZrrikz 2210
+VDIVBF 2211
+VDIVPDYrm 2212
+VDIVPDYrr 2213
+VDIVPDZ 2214
+VDIVPDZrm 2215
+VDIVPDZrmb 2216
+VDIVPDZrmbk 2217
+VDIVPDZrmbkz 2218
+VDIVPDZrmk 2219
+VDIVPDZrmkz 2220
+VDIVPDZrr 2221
+VDIVPDZrrb 2222
+VDIVPDZrrbk 2223
+VDIVPDZrrbkz 2224
+VDIVPDZrrk 2225
+VDIVPDZrrkz 2226
+VDIVPDrm 2227
+VDIVPDrr 2228
+VDIVPHZ 2229
+VDIVPHZrm 2230
+VDIVPHZrmb 2231
+VDIVPHZrmbk 2232
+VDIVPHZrmbkz 2233
+VDIVPHZrmk 2234
+VDIVPHZrmkz 2235
+VDIVPHZrr 2236
+VDIVPHZrrb 2237
+VDIVPHZrrbk 2238
+VDIVPHZrrbkz 2239
+VDIVPHZrrk 2240
+VDIVPHZrrkz 2241
+VDIVPSYrm 2242
+VDIVPSYrr 2243
+VDIVPSZ 2244
+VDIVPSZrm 2245
+VDIVPSZrmb 2246
+VDIVPSZrmbk 2247
+VDIVPSZrmbkz 2248
+VDIVPSZrmk 2249
+VDIVPSZrmkz 2250
+VDIVPSZrr 2251
+VDIVPSZrrb 2252
+VDIVPSZrrbk 2253
+VDIVPSZrrbkz 2254
+VDIVPSZrrk 2255
+VDIVPSZrrkz 2256
+VDIVPSrm 2257
+VDIVPSrr 2258
+VDIVSDZrm 2259
+VDIVSDZrm_Int 2260
+VDIVSDZrmk_Int 2261
+VDIVSDZrmkz_Int 2262
+VDIVSDZrr 2263
+VDIVSDZrr_Int 2264
+VDIVSDZrrb_Int 2265
+VDIVSDZrrbk_Int 2266
+VDIVSDZrrbkz_Int 2267
+VDIVSDZrrk_Int 2268
+VDIVSDZrrkz_Int 2269
+VDIVSDrm 2270
+VDIVSDrm_Int 2271
+VDIVSDrr 2272
+VDIVSDrr_Int 2273
+VDIVSHZrm 2274
+VDIVSHZrm_Int 2275
+VDIVSHZrmk_Int 2276
+VDIVSHZrmkz_Int 2277
+VDIVSHZrr 2278
+VDIVSHZrr_Int 2279
+VDIVSHZrrb_Int 2280
+VDIVSHZrrbk_Int 2281
+VDIVSHZrrbkz_Int 2282
+VDIVSHZrrk_Int 2283
+VDIVSHZrrkz_Int 2284
+VDIVSSZrm 2285
+VDIVSSZrm_Int 2286
+VDIVSSZrmk_Int 2287
+VDIVSSZrmkz_Int 2288
+VDIVSSZrr 2289
+VDIVSSZrr_Int 2290
+VDIVSSZrrb_Int 2291
+VDIVSSZrrbk_Int 2292
+VDIVSSZrrbkz_Int 2293
+VDIVSSZrrk_Int 2294
+VDIVSSZrrkz_Int 2295
+VDIVSSrm 2296
+VDIVSSrm_Int 2297
+VDIVSSrr 2298
+VDIVSSrr_Int 2299
+VDPBF 2300
+VDPPDrmi 2301
+VDPPDrri 2302
+VDPPHPSZ 2303
+VDPPHPSZm 2304
+VDPPHPSZmb 2305
+VDPPHPSZmbk 2306
+VDPPHPSZmbkz 2307
+VDPPHPSZmk 2308
+VDPPHPSZmkz 2309
+VDPPHPSZr 2310
+VDPPHPSZrk 2311
+VDPPHPSZrkz 2312
+VDPPSYrmi 2313
+VDPPSYrri 2314
+VDPPSrmi 2315
+VDPPSrri 2316
+VERRm 2317
+VERRr 2318
+VERWm 2319
+VERWr 2320
+VEXP 2321
+VEXPANDPDZ 2322
+VEXPANDPDZrm 2323
+VEXPANDPDZrmk 2324
+VEXPANDPDZrmkz 2325
+VEXPANDPDZrr 2326
+VEXPANDPDZrrk 2327
+VEXPANDPDZrrkz 2328
+VEXPANDPSZ 2329
+VEXPANDPSZrm 2330
+VEXPANDPSZrmk 2331
+VEXPANDPSZrmkz 2332
+VEXPANDPSZrr 2333
+VEXPANDPSZrrk 2334
+VEXPANDPSZrrkz 2335
+VEXTRACTF 2336
+VEXTRACTI 2337
+VEXTRACTPSZmri 2338
+VEXTRACTPSZrri 2339
+VEXTRACTPSmri 2340
+VEXTRACTPSrri 2341
+VFCMADDCPHZ 2342
+VFCMADDCPHZm 2343
+VFCMADDCPHZmb 2344
+VFCMADDCPHZmbk 2345
+VFCMADDCPHZmbkz 2346
+VFCMADDCPHZmk 2347
+VFCMADDCPHZmkz 2348
+VFCMADDCPHZr 2349
+VFCMADDCPHZrb 2350
+VFCMADDCPHZrbk 2351
+VFCMADDCPHZrbkz 2352
+VFCMADDCPHZrk 2353
+VFCMADDCPHZrkz 2354
+VFCMADDCSHZm 2355
+VFCMADDCSHZmk 2356
+VFCMADDCSHZmkz 2357
+VFCMADDCSHZr 2358
+VFCMADDCSHZrb 2359
+VFCMADDCSHZrbk 2360
+VFCMADDCSHZrbkz 2361
+VFCMADDCSHZrk 2362
+VFCMADDCSHZrkz 2363
+VFCMULCPHZ 2364
+VFCMULCPHZrm 2365
+VFCMULCPHZrmb 2366
+VFCMULCPHZrmbk 2367
+VFCMULCPHZrmbkz 2368
+VFCMULCPHZrmk 2369
+VFCMULCPHZrmkz 2370
+VFCMULCPHZrr 2371
+VFCMULCPHZrrb 2372
+VFCMULCPHZrrbk 2373
+VFCMULCPHZrrbkz 2374
+VFCMULCPHZrrk 2375
+VFCMULCPHZrrkz 2376
+VFCMULCSHZrm 2377
+VFCMULCSHZrmk 2378
+VFCMULCSHZrmkz 2379
+VFCMULCSHZrr 2380
+VFCMULCSHZrrb 2381
+VFCMULCSHZrrbk 2382
+VFCMULCSHZrrbkz 2383
+VFCMULCSHZrrk 2384
+VFCMULCSHZrrkz 2385
+VFIXUPIMMPDZ 2386
+VFIXUPIMMPDZrmbi 2387
+VFIXUPIMMPDZrmbik 2388
+VFIXUPIMMPDZrmbikz 2389
+VFIXUPIMMPDZrmi 2390
+VFIXUPIMMPDZrmik 2391
+VFIXUPIMMPDZrmikz 2392
+VFIXUPIMMPDZrri 2393
+VFIXUPIMMPDZrrib 2394
+VFIXUPIMMPDZrribk 2395
+VFIXUPIMMPDZrribkz 2396
+VFIXUPIMMPDZrrik 2397
+VFIXUPIMMPDZrrikz 2398
+VFIXUPIMMPSZ 2399
+VFIXUPIMMPSZrmbi 2400
+VFIXUPIMMPSZrmbik 2401
+VFIXUPIMMPSZrmbikz 2402
+VFIXUPIMMPSZrmi 2403
+VFIXUPIMMPSZrmik 2404
+VFIXUPIMMPSZrmikz 2405
+VFIXUPIMMPSZrri 2406
+VFIXUPIMMPSZrrib 2407
+VFIXUPIMMPSZrribk 2408
+VFIXUPIMMPSZrribkz 2409
+VFIXUPIMMPSZrrik 2410
+VFIXUPIMMPSZrrikz 2411
+VFIXUPIMMSDZrmi 2412
+VFIXUPIMMSDZrmik 2413
+VFIXUPIMMSDZrmikz 2414
+VFIXUPIMMSDZrri 2415
+VFIXUPIMMSDZrrib 2416
+VFIXUPIMMSDZrribk 2417
+VFIXUPIMMSDZrribkz 2418
+VFIXUPIMMSDZrrik 2419
+VFIXUPIMMSDZrrikz 2420
+VFIXUPIMMSSZrmi 2421
+VFIXUPIMMSSZrmik 2422
+VFIXUPIMMSSZrmikz 2423
+VFIXUPIMMSSZrri 2424
+VFIXUPIMMSSZrrib 2425
+VFIXUPIMMSSZrribk 2426
+VFIXUPIMMSSZrribkz 2427
+VFIXUPIMMSSZrrik 2428
+VFIXUPIMMSSZrrikz 2429
+VFMADD 2430
+VFMADDCPHZ 2431
+VFMADDCPHZm 2432
+VFMADDCPHZmb 2433
+VFMADDCPHZmbk 2434
+VFMADDCPHZmbkz 2435
+VFMADDCPHZmk 2436
+VFMADDCPHZmkz 2437
+VFMADDCPHZr 2438
+VFMADDCPHZrb 2439
+VFMADDCPHZrbk 2440
+VFMADDCPHZrbkz 2441
+VFMADDCPHZrk 2442
+VFMADDCPHZrkz 2443
+VFMADDCSHZm 2444
+VFMADDCSHZmk 2445
+VFMADDCSHZmkz 2446
+VFMADDCSHZr 2447
+VFMADDCSHZrb 2448
+VFMADDCSHZrbk 2449
+VFMADDCSHZrbkz 2450
+VFMADDCSHZrk 2451
+VFMADDCSHZrkz 2452
+VFMADDPD 2453
+VFMADDPS 2454
+VFMADDSD 2455
+VFMADDSS 2456
+VFMADDSUB 2457
+VFMADDSUBPD 2458
+VFMADDSUBPS 2459
+VFMSUB 2460
+VFMSUBADD 2461
+VFMSUBADDPD 2462
+VFMSUBADDPS 2463
+VFMSUBPD 2464
+VFMSUBPS 2465
+VFMSUBSD 2466
+VFMSUBSS 2467
+VFMULCPHZ 2468
+VFMULCPHZrm 2469
+VFMULCPHZrmb 2470
+VFMULCPHZrmbk 2471
+VFMULCPHZrmbkz 2472
+VFMULCPHZrmk 2473
+VFMULCPHZrmkz 2474
+VFMULCPHZrr 2475
+VFMULCPHZrrb 2476
+VFMULCPHZrrbk 2477
+VFMULCPHZrrbkz 2478
+VFMULCPHZrrk 2479
+VFMULCPHZrrkz 2480
+VFMULCSHZrm 2481
+VFMULCSHZrmk 2482
+VFMULCSHZrmkz 2483
+VFMULCSHZrr 2484
+VFMULCSHZrrb 2485
+VFMULCSHZrrbk 2486
+VFMULCSHZrrbkz 2487
+VFMULCSHZrrk 2488
+VFMULCSHZrrkz 2489
+VFNMADD 2490
+VFNMADDPD 2491
+VFNMADDPS 2492
+VFNMADDSD 2493
+VFNMADDSS 2494
+VFNMSUB 2495
+VFNMSUBPD 2496
+VFNMSUBPS 2497
+VFNMSUBSD 2498
+VFNMSUBSS 2499
+VFPCLASSBF 2500
+VFPCLASSPDZ 2501
+VFPCLASSPDZmbi 2502
+VFPCLASSPDZmbik 2503
+VFPCLASSPDZmi 2504
+VFPCLASSPDZmik 2505
+VFPCLASSPDZri 2506
+VFPCLASSPDZrik 2507
+VFPCLASSPHZ 2508
+VFPCLASSPHZmbi 2509
+VFPCLASSPHZmbik 2510
+VFPCLASSPHZmi 2511
+VFPCLASSPHZmik 2512
+VFPCLASSPHZri 2513
+VFPCLASSPHZrik 2514
+VFPCLASSPSZ 2515
+VFPCLASSPSZmbi 2516
+VFPCLASSPSZmbik 2517
+VFPCLASSPSZmi 2518
+VFPCLASSPSZmik 2519
+VFPCLASSPSZri 2520
+VFPCLASSPSZrik 2521
+VFPCLASSSDZmi 2522
+VFPCLASSSDZmik 2523
+VFPCLASSSDZri 2524
+VFPCLASSSDZrik 2525
+VFPCLASSSHZmi 2526
+VFPCLASSSHZmik 2527
+VFPCLASSSHZri 2528
+VFPCLASSSHZrik 2529
+VFPCLASSSSZmi 2530
+VFPCLASSSSZmik 2531
+VFPCLASSSSZri 2532
+VFPCLASSSSZrik 2533
+VFRCZPDYrm 2534
+VFRCZPDYrr 2535
+VFRCZPDrm 2536
+VFRCZPDrr 2537
+VFRCZPSYrm 2538
+VFRCZPSYrr 2539
+VFRCZPSrm 2540
+VFRCZPSrr 2541
+VFRCZSDrm 2542
+VFRCZSDrr 2543
+VFRCZSSrm 2544
+VFRCZSSrr 2545
+VGATHERDPDYrm 2546
+VGATHERDPDZ 2547
+VGATHERDPDZrm 2548
+VGATHERDPDrm 2549
+VGATHERDPSYrm 2550
+VGATHERDPSZ 2551
+VGATHERDPSZrm 2552
+VGATHERDPSrm 2553
+VGATHERPF 2554
+VGATHERQPDYrm 2555
+VGATHERQPDZ 2556
+VGATHERQPDZrm 2557
+VGATHERQPDrm 2558
+VGATHERQPSYrm 2559
+VGATHERQPSZ 2560
+VGATHERQPSZrm 2561
+VGATHERQPSrm 2562
+VGETEXPBF 2563
+VGETEXPPDZ 2564
+VGETEXPPDZm 2565
+VGETEXPPDZmb 2566
+VGETEXPPDZmbk 2567
+VGETEXPPDZmbkz 2568
+VGETEXPPDZmk 2569
+VGETEXPPDZmkz 2570
+VGETEXPPDZr 2571
+VGETEXPPDZrb 2572
+VGETEXPPDZrbk 2573
+VGETEXPPDZrbkz 2574
+VGETEXPPDZrk 2575
+VGETEXPPDZrkz 2576
+VGETEXPPHZ 2577
+VGETEXPPHZm 2578
+VGETEXPPHZmb 2579
+VGETEXPPHZmbk 2580
+VGETEXPPHZmbkz 2581
+VGETEXPPHZmk 2582
+VGETEXPPHZmkz 2583
+VGETEXPPHZr 2584
+VGETEXPPHZrb 2585
+VGETEXPPHZrbk 2586
+VGETEXPPHZrbkz 2587
+VGETEXPPHZrk 2588
+VGETEXPPHZrkz 2589
+VGETEXPPSZ 2590
+VGETEXPPSZm 2591
+VGETEXPPSZmb 2592
+VGETEXPPSZmbk 2593
+VGETEXPPSZmbkz 2594
+VGETEXPPSZmk 2595
+VGETEXPPSZmkz 2596
+VGETEXPPSZr 2597
+VGETEXPPSZrb 2598
+VGETEXPPSZrbk 2599
+VGETEXPPSZrbkz 2600
+VGETEXPPSZrk 2601
+VGETEXPPSZrkz 2602
+VGETEXPSDZm 2603
+VGETEXPSDZmk 2604
+VGETEXPSDZmkz 2605
+VGETEXPSDZr 2606
+VGETEXPSDZrb 2607
+VGETEXPSDZrbk 2608
+VGETEXPSDZrbkz 2609
+VGETEXPSDZrk 2610
+VGETEXPSDZrkz 2611
+VGETEXPSHZm 2612
+VGETEXPSHZmk 2613
+VGETEXPSHZmkz 2614
+VGETEXPSHZr 2615
+VGETEXPSHZrb 2616
+VGETEXPSHZrbk 2617
+VGETEXPSHZrbkz 2618
+VGETEXPSHZrk 2619
+VGETEXPSHZrkz 2620
+VGETEXPSSZm 2621
+VGETEXPSSZmk 2622
+VGETEXPSSZmkz 2623
+VGETEXPSSZr 2624
+VGETEXPSSZrb 2625
+VGETEXPSSZrbk 2626
+VGETEXPSSZrbkz 2627
+VGETEXPSSZrk 2628
+VGETEXPSSZrkz 2629
+VGETMANTBF 2630
+VGETMANTPDZ 2631
+VGETMANTPDZrmbi 2632
+VGETMANTPDZrmbik 2633
+VGETMANTPDZrmbikz 2634
+VGETMANTPDZrmi 2635
+VGETMANTPDZrmik 2636
+VGETMANTPDZrmikz 2637
+VGETMANTPDZrri 2638
+VGETMANTPDZrrib 2639
+VGETMANTPDZrribk 2640
+VGETMANTPDZrribkz 2641
+VGETMANTPDZrrik 2642
+VGETMANTPDZrrikz 2643
+VGETMANTPHZ 2644
+VGETMANTPHZrmbi 2645
+VGETMANTPHZrmbik 2646
+VGETMANTPHZrmbikz 2647
+VGETMANTPHZrmi 2648
+VGETMANTPHZrmik 2649
+VGETMANTPHZrmikz 2650
+VGETMANTPHZrri 2651
+VGETMANTPHZrrib 2652
+VGETMANTPHZrribk 2653
+VGETMANTPHZrribkz 2654
+VGETMANTPHZrrik 2655
+VGETMANTPHZrrikz 2656
+VGETMANTPSZ 2657
+VGETMANTPSZrmbi 2658
+VGETMANTPSZrmbik 2659
+VGETMANTPSZrmbikz 2660
+VGETMANTPSZrmi 2661
+VGETMANTPSZrmik 2662
+VGETMANTPSZrmikz 2663
+VGETMANTPSZrri 2664
+VGETMANTPSZrrib 2665
+VGETMANTPSZrribk 2666
+VGETMANTPSZrribkz 2667
+VGETMANTPSZrrik 2668
+VGETMANTPSZrrikz 2669
+VGETMANTSDZrmi 2670
+VGETMANTSDZrmik 2671
+VGETMANTSDZrmikz 2672
+VGETMANTSDZrri 2673
+VGETMANTSDZrrib 2674
+VGETMANTSDZrribk 2675
+VGETMANTSDZrribkz 2676
+VGETMANTSDZrrik 2677
+VGETMANTSDZrrikz 2678
+VGETMANTSHZrmi 2679
+VGETMANTSHZrmik 2680
+VGETMANTSHZrmikz 2681
+VGETMANTSHZrri 2682
+VGETMANTSHZrrib 2683
+VGETMANTSHZrribk 2684
+VGETMANTSHZrribkz 2685
+VGETMANTSHZrrik 2686
+VGETMANTSHZrrikz 2687
+VGETMANTSSZrmi 2688
+VGETMANTSSZrmik 2689
+VGETMANTSSZrmikz 2690
+VGETMANTSSZrri 2691
+VGETMANTSSZrrib 2692
+VGETMANTSSZrribk 2693
+VGETMANTSSZrribkz 2694
+VGETMANTSSZrrik 2695
+VGETMANTSSZrrikz 2696
+VGF 2697
+VHADDPDYrm 2698
+VHADDPDYrr 2699
+VHADDPDrm 2700
+VHADDPDrr 2701
+VHADDPSYrm 2702
+VHADDPSYrr 2703
+VHADDPSrm 2704
+VHADDPSrr 2705
+VHSUBPDYrm 2706
+VHSUBPDYrr 2707
+VHSUBPDrm 2708
+VHSUBPDrr 2709
+VHSUBPSYrm 2710
+VHSUBPSYrr 2711
+VHSUBPSrm 2712
+VHSUBPSrr 2713
+VINSERTF 2714
+VINSERTI 2715
+VINSERTPSZrmi 2716
+VINSERTPSZrri 2717
+VINSERTPSrmi 2718
+VINSERTPSrri 2719
+VLDDQUYrm 2720
+VLDDQUrm 2721
+VLDMXCSR 2722
+VMASKMOVDQU 2723
+VMASKMOVPDYmr 2724
+VMASKMOVPDYrm 2725
+VMASKMOVPDmr 2726
+VMASKMOVPDrm 2727
+VMASKMOVPSYmr 2728
+VMASKMOVPSYrm 2729
+VMASKMOVPSmr 2730
+VMASKMOVPSrm 2731
+VMAXBF 2732
+VMAXCPDYrm 2733
+VMAXCPDYrr 2734
+VMAXCPDZ 2735
+VMAXCPDZrm 2736
+VMAXCPDZrmb 2737
+VMAXCPDZrmbk 2738
+VMAXCPDZrmbkz 2739
+VMAXCPDZrmk 2740
+VMAXCPDZrmkz 2741
+VMAXCPDZrr 2742
+VMAXCPDZrrk 2743
+VMAXCPDZrrkz 2744
+VMAXCPDrm 2745
+VMAXCPDrr 2746
+VMAXCPHZ 2747
+VMAXCPHZrm 2748
+VMAXCPHZrmb 2749
+VMAXCPHZrmbk 2750
+VMAXCPHZrmbkz 2751
+VMAXCPHZrmk 2752
+VMAXCPHZrmkz 2753
+VMAXCPHZrr 2754
+VMAXCPHZrrk 2755
+VMAXCPHZrrkz 2756
+VMAXCPSYrm 2757
+VMAXCPSYrr 2758
+VMAXCPSZ 2759
+VMAXCPSZrm 2760
+VMAXCPSZrmb 2761
+VMAXCPSZrmbk 2762
+VMAXCPSZrmbkz 2763
+VMAXCPSZrmk 2764
+VMAXCPSZrmkz 2765
+VMAXCPSZrr 2766
+VMAXCPSZrrk 2767
+VMAXCPSZrrkz 2768
+VMAXCPSrm 2769
+VMAXCPSrr 2770
+VMAXCSDZrm 2771
+VMAXCSDZrr 2772
+VMAXCSDrm 2773
+VMAXCSDrr 2774
+VMAXCSHZrm 2775
+VMAXCSHZrr 2776
+VMAXCSSZrm 2777
+VMAXCSSZrr 2778
+VMAXCSSrm 2779
+VMAXCSSrr 2780
+VMAXPDYrm 2781
+VMAXPDYrr 2782
+VMAXPDZ 2783
+VMAXPDZrm 2784
+VMAXPDZrmb 2785
+VMAXPDZrmbk 2786
+VMAXPDZrmbkz 2787
+VMAXPDZrmk 2788
+VMAXPDZrmkz 2789
+VMAXPDZrr 2790
+VMAXPDZrrb 2791
+VMAXPDZrrbk 2792
+VMAXPDZrrbkz 2793
+VMAXPDZrrk 2794
+VMAXPDZrrkz 2795
+VMAXPDrm 2796
+VMAXPDrr 2797
+VMAXPHZ 2798
+VMAXPHZrm 2799
+VMAXPHZrmb 2800
+VMAXPHZrmbk 2801
+VMAXPHZrmbkz 2802
+VMAXPHZrmk 2803
+VMAXPHZrmkz 2804
+VMAXPHZrr 2805
+VMAXPHZrrb 2806
+VMAXPHZrrbk 2807
+VMAXPHZrrbkz 2808
+VMAXPHZrrk 2809
+VMAXPHZrrkz 2810
+VMAXPSYrm 2811
+VMAXPSYrr 2812
+VMAXPSZ 2813
+VMAXPSZrm 2814
+VMAXPSZrmb 2815
+VMAXPSZrmbk 2816
+VMAXPSZrmbkz 2817
+VMAXPSZrmk 2818
+VMAXPSZrmkz 2819
+VMAXPSZrr 2820
+VMAXPSZrrb 2821
+VMAXPSZrrbk 2822
+VMAXPSZrrbkz 2823
+VMAXPSZrrk 2824
+VMAXPSZrrkz 2825
+VMAXPSrm 2826
+VMAXPSrr 2827
+VMAXSDZrm 2828
+VMAXSDZrm_Int 2829
+VMAXSDZrmk_Int 2830
+VMAXSDZrmkz_Int 2831
+VMAXSDZrr 2832
+VMAXSDZrr_Int 2833
+VMAXSDZrrb_Int 2834
+VMAXSDZrrbk_Int 2835
+VMAXSDZrrbkz_Int 2836
+VMAXSDZrrk_Int 2837
+VMAXSDZrrkz_Int 2838
+VMAXSDrm 2839
+VMAXSDrm_Int 2840
+VMAXSDrr 2841
+VMAXSDrr_Int 2842
+VMAXSHZrm 2843
+VMAXSHZrm_Int 2844
+VMAXSHZrmk_Int 2845
+VMAXSHZrmkz_Int 2846
+VMAXSHZrr 2847
+VMAXSHZrr_Int 2848
+VMAXSHZrrb_Int 2849
+VMAXSHZrrbk_Int 2850
+VMAXSHZrrbkz_Int 2851
+VMAXSHZrrk_Int 2852
+VMAXSHZrrkz_Int 2853
+VMAXSSZrm 2854
+VMAXSSZrm_Int 2855
+VMAXSSZrmk_Int 2856
+VMAXSSZrmkz_Int 2857
+VMAXSSZrr 2858
+VMAXSSZrr_Int 2859
+VMAXSSZrrb_Int 2860
+VMAXSSZrrbk_Int 2861
+VMAXSSZrrbkz_Int 2862
+VMAXSSZrrk_Int 2863
+VMAXSSZrrkz_Int 2864
+VMAXSSrm 2865
+VMAXSSrm_Int 2866
+VMAXSSrr 2867
+VMAXSSrr_Int 2868
+VMCALL 2869
+VMCLEARm 2870
+VMFUNC 2871
+VMINBF 2872
+VMINCPDYrm 2873
+VMINCPDYrr 2874
+VMINCPDZ 2875
+VMINCPDZrm 2876
+VMINCPDZrmb 2877
+VMINCPDZrmbk 2878
+VMINCPDZrmbkz 2879
+VMINCPDZrmk 2880
+VMINCPDZrmkz 2881
+VMINCPDZrr 2882
+VMINCPDZrrk 2883
+VMINCPDZrrkz 2884
+VMINCPDrm 2885
+VMINCPDrr 2886
+VMINCPHZ 2887
+VMINCPHZrm 2888
+VMINCPHZrmb 2889
+VMINCPHZrmbk 2890
+VMINCPHZrmbkz 2891
+VMINCPHZrmk 2892
+VMINCPHZrmkz 2893
+VMINCPHZrr 2894
+VMINCPHZrrk 2895
+VMINCPHZrrkz 2896
+VMINCPSYrm 2897
+VMINCPSYrr 2898
+VMINCPSZ 2899
+VMINCPSZrm 2900
+VMINCPSZrmb 2901
+VMINCPSZrmbk 2902
+VMINCPSZrmbkz 2903
+VMINCPSZrmk 2904
+VMINCPSZrmkz 2905
+VMINCPSZrr 2906
+VMINCPSZrrk 2907
+VMINCPSZrrkz 2908
+VMINCPSrm 2909
+VMINCPSrr 2910
+VMINCSDZrm 2911
+VMINCSDZrr 2912
+VMINCSDrm 2913
+VMINCSDrr 2914
+VMINCSHZrm 2915
+VMINCSHZrr 2916
+VMINCSSZrm 2917
+VMINCSSZrr 2918
+VMINCSSrm 2919
+VMINCSSrr 2920
+VMINMAXBF 2921
+VMINMAXPDZ 2922
+VMINMAXPDZrmbi 2923
+VMINMAXPDZrmbik 2924
+VMINMAXPDZrmbikz 2925
+VMINMAXPDZrmi 2926
+VMINMAXPDZrmik 2927
+VMINMAXPDZrmikz 2928
+VMINMAXPDZrri 2929
+VMINMAXPDZrrib 2930
+VMINMAXPDZrribk 2931
+VMINMAXPDZrribkz 2932
+VMINMAXPDZrrik 2933
+VMINMAXPDZrrikz 2934
+VMINMAXPHZ 2935
+VMINMAXPHZrmbi 2936
+VMINMAXPHZrmbik 2937
+VMINMAXPHZrmbikz 2938
+VMINMAXPHZrmi 2939
+VMINMAXPHZrmik 2940
+VMINMAXPHZrmikz 2941
+VMINMAXPHZrri 2942
+VMINMAXPHZrrib 2943
+VMINMAXPHZrribk 2944
+VMINMAXPHZrribkz 2945
+VMINMAXPHZrrik 2946
+VMINMAXPHZrrikz 2947
+VMINMAXPSZ 2948
+VMINMAXPSZrmbi 2949
+VMINMAXPSZrmbik 2950
+VMINMAXPSZrmbikz 2951
+VMINMAXPSZrmi 2952
+VMINMAXPSZrmik 2953
+VMINMAXPSZrmikz 2954
+VMINMAXPSZrri 2955
+VMINMAXPSZrrib 2956
+VMINMAXPSZrribk 2957
+VMINMAXPSZrribkz 2958
+VMINMAXPSZrrik 2959
+VMINMAXPSZrrikz 2960
+VMINMAXSDrmi 2961
+VMINMAXSDrmi_Int 2962
+VMINMAXSDrmik_Int 2963
+VMINMAXSDrmikz_Int 2964
+VMINMAXSDrri 2965
+VMINMAXSDrri_Int 2966
+VMINMAXSDrrib_Int 2967
+VMINMAXSDrribk_Int 2968
+VMINMAXSDrribkz_Int 2969
+VMINMAXSDrrik_Int 2970
+VMINMAXSDrrikz_Int 2971
+VMINMAXSHrmi 2972
+VMINMAXSHrmi_Int 2973
+VMINMAXSHrmik_Int 2974
+VMINMAXSHrmikz_Int 2975
+VMINMAXSHrri 2976
+VMINMAXSHrri_Int 2977
+VMINMAXSHrrib_Int 2978
+VMINMAXSHrribk_Int 2979
+VMINMAXSHrribkz_Int 2980
+VMINMAXSHrrik_Int 2981
+VMINMAXSHrrikz_Int 2982
+VMINMAXSSrmi 2983
+VMINMAXSSrmi_Int 2984
+VMINMAXSSrmik_Int 2985
+VMINMAXSSrmikz_Int 2986
+VMINMAXSSrri 2987
+VMINMAXSSrri_Int 2988
+VMINMAXSSrrib_Int 2989
+VMINMAXSSrribk_Int 2990
+VMINMAXSSrribkz_Int 2991
+VMINMAXSSrrik_Int 2992
+VMINMAXSSrrikz_Int 2993
+VMINPDYrm 2994
+VMINPDYrr 2995
+VMINPDZ 2996
+VMINPDZrm 2997
+VMINPDZrmb 2998
+VMINPDZrmbk 2999
+VMINPDZrmbkz 3000
+VMINPDZrmk 3001
+VMINPDZrmkz 3002
+VMINPDZrr 3003
+VMINPDZrrb 3004
+VMINPDZrrbk 3005
+VMINPDZrrbkz 3006
+VMINPDZrrk 3007
+VMINPDZrrkz 3008
+VMINPDrm 3009
+VMINPDrr 3010
+VMINPHZ 3011
+VMINPHZrm 3012
+VMINPHZrmb 3013
+VMINPHZrmbk 3014
+VMINPHZrmbkz 3015
+VMINPHZrmk 3016
+VMINPHZrmkz 3017
+VMINPHZrr 3018
+VMINPHZrrb 3019
+VMINPHZrrbk 3020
+VMINPHZrrbkz 3021
+VMINPHZrrk 3022
+VMINPHZrrkz 3023
+VMINPSYrm 3024
+VMINPSYrr 3025
+VMINPSZ 3026
+VMINPSZrm 3027
+VMINPSZrmb 3028
+VMINPSZrmbk 3029
+VMINPSZrmbkz 3030
+VMINPSZrmk 3031
+VMINPSZrmkz 3032
+VMINPSZrr 3033
+VMINPSZrrb 3034
+VMINPSZrrbk 3035
+VMINPSZrrbkz 3036
+VMINPSZrrk 3037
+VMINPSZrrkz 3038
+VMINPSrm 3039
+VMINPSrr 3040
+VMINSDZrm 3041
+VMINSDZrm_Int 3042
+VMINSDZrmk_Int 3043
+VMINSDZrmkz_Int 3044
+VMINSDZrr 3045
+VMINSDZrr_Int 3046
+VMINSDZrrb_Int 3047
+VMINSDZrrbk_Int 3048
+VMINSDZrrbkz_Int 3049
+VMINSDZrrk_Int 3050
+VMINSDZrrkz_Int 3051
+VMINSDrm 3052
+VMINSDrm_Int 3053
+VMINSDrr 3054
+VMINSDrr_Int 3055
+VMINSHZrm 3056
+VMINSHZrm_Int 3057
+VMINSHZrmk_Int 3058
+VMINSHZrmkz_Int 3059
+VMINSHZrr 3060
+VMINSHZrr_Int 3061
+VMINSHZrrb_Int 3062
+VMINSHZrrbk_Int 3063
+VMINSHZrrbkz_Int 3064
+VMINSHZrrk_Int 3065
+VMINSHZrrkz_Int 3066
+VMINSSZrm 3067
+VMINSSZrm_Int 3068
+VMINSSZrmk_Int 3069
+VMINSSZrmkz_Int 3070
+VMINSSZrr 3071
+VMINSSZrr_Int 3072
+VMINSSZrrb_Int 3073
+VMINSSZrrbk_Int 3074
+VMINSSZrrbkz_Int 3075
+VMINSSZrrk_Int 3076
+VMINSSZrrkz_Int 3077
+VMINSSrm 3078
+VMINSSrm_Int 3079
+VMINSSrr 3080
+VMINSSrr_Int 3081
+VMLAUNCH 3082
+VMLOAD 3083
+VMMCALL 3084
+VMOV 3085
+VMOVAPDYmr 3086
+VMOVAPDYrm 3087
+VMOVAPDYrr 3088
+VMOVAPDYrr_REV 3089
+VMOVAPDZ 3090
+VMOVAPDZmr 3091
+VMOVAPDZmrk 3092
+VMOVAPDZrm 3093
+VMOVAPDZrmk 3094
+VMOVAPDZrmkz 3095
+VMOVAPDZrr 3096
+VMOVAPDZrr_REV 3097
+VMOVAPDZrrk 3098
+VMOVAPDZrrk_REV 3099
+VMOVAPDZrrkz 3100
+VMOVAPDZrrkz_REV 3101
+VMOVAPDmr 3102
+VMOVAPDrm 3103
+VMOVAPDrr 3104
+VMOVAPDrr_REV 3105
+VMOVAPSYmr 3106
+VMOVAPSYrm 3107
+VMOVAPSYrr 3108
+VMOVAPSYrr_REV 3109
+VMOVAPSZ 3110
+VMOVAPSZmr 3111
+VMOVAPSZmrk 3112
+VMOVAPSZrm 3113
+VMOVAPSZrmk 3114
+VMOVAPSZrmkz 3115
+VMOVAPSZrr 3116
+VMOVAPSZrr_REV 3117
+VMOVAPSZrrk 3118
+VMOVAPSZrrk_REV 3119
+VMOVAPSZrrkz 3120
+VMOVAPSZrrkz_REV 3121
+VMOVAPSmr 3122
+VMOVAPSrm 3123
+VMOVAPSrr 3124
+VMOVAPSrr_REV 3125
+VMOVDDUPYrm 3126
+VMOVDDUPYrr 3127
+VMOVDDUPZ 3128
+VMOVDDUPZrm 3129
+VMOVDDUPZrmk 3130
+VMOVDDUPZrmkz 3131
+VMOVDDUPZrr 3132
+VMOVDDUPZrrk 3133
+VMOVDDUPZrrkz 3134
+VMOVDDUPrm 3135
+VMOVDDUPrr 3136
+VMOVDI 3137
+VMOVDQA 3138
+VMOVDQAYmr 3139
+VMOVDQAYrm 3140
+VMOVDQAYrr 3141
+VMOVDQAYrr_REV 3142
+VMOVDQAmr 3143
+VMOVDQArm 3144
+VMOVDQArr 3145
+VMOVDQArr_REV 3146
+VMOVDQU 3147
+VMOVDQUYmr 3148
+VMOVDQUYrm 3149
+VMOVDQUYrr 3150
+VMOVDQUYrr_REV 3151
+VMOVDQUmr 3152
+VMOVDQUrm 3153
+VMOVDQUrr 3154
+VMOVDQUrr_REV 3155
+VMOVHLPSZrr 3156
+VMOVHLPSrr 3157
+VMOVHPDZ 3158
+VMOVHPDmr 3159
+VMOVHPDrm 3160
+VMOVHPSZ 3161
+VMOVHPSmr 3162
+VMOVHPSrm 3163
+VMOVLHPSZrr 3164
+VMOVLHPSrr 3165
+VMOVLPDZ 3166
+VMOVLPDmr 3167
+VMOVLPDrm 3168
+VMOVLPSZ 3169
+VMOVLPSmr 3170
+VMOVLPSrm 3171
+VMOVMSKPDYrr 3172
+VMOVMSKPDrr 3173
+VMOVMSKPSYrr 3174
+VMOVMSKPSrr 3175
+VMOVNTDQAYrm 3176
+VMOVNTDQAZ 3177
+VMOVNTDQAZrm 3178
+VMOVNTDQArm 3179
+VMOVNTDQYmr 3180
+VMOVNTDQZ 3181
+VMOVNTDQZmr 3182
+VMOVNTDQmr 3183
+VMOVNTPDYmr 3184
+VMOVNTPDZ 3185
+VMOVNTPDZmr 3186
+VMOVNTPDmr 3187
+VMOVNTPSYmr 3188
+VMOVNTPSZ 3189
+VMOVNTPSZmr 3190
+VMOVNTPSmr 3191
+VMOVPDI 3192
+VMOVPQI 3193
+VMOVPQIto 3194
+VMOVQI 3195
+VMOVRSBZ 3196
+VMOVRSBZm 3197
+VMOVRSBZmk 3198
+VMOVRSBZmkz 3199
+VMOVRSDZ 3200
+VMOVRSDZm 3201
+VMOVRSDZmk 3202
+VMOVRSDZmkz 3203
+VMOVRSQZ 3204
+VMOVRSQZm 3205
+VMOVRSQZmk 3206
+VMOVRSQZmkz 3207
+VMOVRSWZ 3208
+VMOVRSWZm 3209
+VMOVRSWZmk 3210
+VMOVRSWZmkz 3211
+VMOVSDZmr 3212
+VMOVSDZmrk 3213
+VMOVSDZrm 3214
+VMOVSDZrm_alt 3215
+VMOVSDZrmk 3216
+VMOVSDZrmkz 3217
+VMOVSDZrr 3218
+VMOVSDZrr_REV 3219
+VMOVSDZrrk 3220
+VMOVSDZrrk_REV 3221
+VMOVSDZrrkz 3222
+VMOVSDZrrkz_REV 3223
+VMOVSDmr 3224
+VMOVSDrm 3225
+VMOVSDrm_alt 3226
+VMOVSDrr 3227
+VMOVSDrr_REV 3228
+VMOVSDto 3229
+VMOVSH 3230
+VMOVSHDUPYrm 3231
+VMOVSHDUPYrr 3232
+VMOVSHDUPZ 3233
+VMOVSHDUPZrm 3234
+VMOVSHDUPZrmk 3235
+VMOVSHDUPZrmkz 3236
+VMOVSHDUPZrr 3237
+VMOVSHDUPZrrk 3238
+VMOVSHDUPZrrkz 3239
+VMOVSHDUPrm 3240
+VMOVSHDUPrr 3241
+VMOVSHZmr 3242
+VMOVSHZmrk 3243
+VMOVSHZrm 3244
+VMOVSHZrm_alt 3245
+VMOVSHZrmk 3246
+VMOVSHZrmkz 3247
+VMOVSHZrr 3248
+VMOVSHZrr_REV 3249
+VMOVSHZrrk 3250
+VMOVSHZrrk_REV 3251
+VMOVSHZrrkz 3252
+VMOVSHZrrkz_REV 3253
+VMOVSHtoW 3254
+VMOVSLDUPYrm 3255
+VMOVSLDUPYrr 3256
+VMOVSLDUPZ 3257
+VMOVSLDUPZrm 3258
+VMOVSLDUPZrmk 3259
+VMOVSLDUPZrmkz 3260
+VMOVSLDUPZrr 3261
+VMOVSLDUPZrrk 3262
+VMOVSLDUPZrrkz 3263
+VMOVSLDUPrm 3264
+VMOVSLDUPrr 3265
+VMOVSS 3266
+VMOVSSZmr 3267
+VMOVSSZmrk 3268
+VMOVSSZrm 3269
+VMOVSSZrm_alt 3270
+VMOVSSZrmk 3271
+VMOVSSZrmkz 3272
+VMOVSSZrr 3273
+VMOVSSZrr_REV 3274
+VMOVSSZrrk 3275
+VMOVSSZrrk_REV 3276
+VMOVSSZrrkz 3277
+VMOVSSZrrkz_REV 3278
+VMOVSSmr 3279
+VMOVSSrm 3280
+VMOVSSrm_alt 3281
+VMOVSSrr 3282
+VMOVSSrr_REV 3283
+VMOVUPDYmr 3284
+VMOVUPDYrm 3285
+VMOVUPDYrr 3286
+VMOVUPDYrr_REV 3287
+VMOVUPDZ 3288
+VMOVUPDZmr 3289
+VMOVUPDZmrk 3290
+VMOVUPDZrm 3291
+VMOVUPDZrmk 3292
+VMOVUPDZrmkz 3293
+VMOVUPDZrr 3294
+VMOVUPDZrr_REV 3295
+VMOVUPDZrrk 3296
+VMOVUPDZrrk_REV 3297
+VMOVUPDZrrkz 3298
+VMOVUPDZrrkz_REV 3299
+VMOVUPDmr 3300
+VMOVUPDrm 3301
+VMOVUPDrr 3302
+VMOVUPDrr_REV 3303
+VMOVUPSYmr 3304
+VMOVUPSYrm 3305
+VMOVUPSYrr 3306
+VMOVUPSYrr_REV 3307
+VMOVUPSZ 3308
+VMOVUPSZmr 3309
+VMOVUPSZmrk 3310
+VMOVUPSZrm 3311
+VMOVUPSZrmk 3312
+VMOVUPSZrmkz 3313
+VMOVUPSZrr 3314
+VMOVUPSZrr_REV 3315
+VMOVUPSZrrk 3316
+VMOVUPSZrrk_REV 3317
+VMOVUPSZrrkz 3318
+VMOVUPSZrrkz_REV 3319
+VMOVUPSmr 3320
+VMOVUPSrm 3321
+VMOVUPSrr 3322
+VMOVUPSrr_REV 3323
+VMOVW 3324
+VMOVWmr 3325
+VMOVWrm 3326
+VMOVZPDILo 3327
+VMOVZPQILo 3328
+VMOVZPWILo 3329
+VMPSADBWYrmi 3330
+VMPSADBWYrri 3331
+VMPSADBWZ 3332
+VMPSADBWZrmi 3333
+VMPSADBWZrmik 3334
+VMPSADBWZrmikz 3335
+VMPSADBWZrri 3336
+VMPSADBWZrrik 3337
+VMPSADBWZrrikz 3338
+VMPSADBWrmi 3339
+VMPSADBWrri 3340
+VMPTRLDm 3341
+VMPTRSTm 3342
+VMREAD 3343
+VMRESUME 3344
+VMRUN 3345
+VMSAVE 3346
+VMULBF 3347
+VMULPDYrm 3348
+VMULPDYrr 3349
+VMULPDZ 3350
+VMULPDZrm 3351
+VMULPDZrmb 3352
+VMULPDZrmbk 3353
+VMULPDZrmbkz 3354
+VMULPDZrmk 3355
+VMULPDZrmkz 3356
+VMULPDZrr 3357
+VMULPDZrrb 3358
+VMULPDZrrbk 3359
+VMULPDZrrbkz 3360
+VMULPDZrrk 3361
+VMULPDZrrkz 3362
+VMULPDrm 3363
+VMULPDrr 3364
+VMULPHZ 3365
+VMULPHZrm 3366
+VMULPHZrmb 3367
+VMULPHZrmbk 3368
+VMULPHZrmbkz 3369
+VMULPHZrmk 3370
+VMULPHZrmkz 3371
+VMULPHZrr 3372
+VMULPHZrrb 3373
+VMULPHZrrbk 3374
+VMULPHZrrbkz 3375
+VMULPHZrrk 3376
+VMULPHZrrkz 3377
+VMULPSYrm 3378
+VMULPSYrr 3379
+VMULPSZ 3380
+VMULPSZrm 3381
+VMULPSZrmb 3382
+VMULPSZrmbk 3383
+VMULPSZrmbkz 3384
+VMULPSZrmk 3385
+VMULPSZrmkz 3386
+VMULPSZrr 3387
+VMULPSZrrb 3388
+VMULPSZrrbk 3389
+VMULPSZrrbkz 3390
+VMULPSZrrk 3391
+VMULPSZrrkz 3392
+VMULPSrm 3393
+VMULPSrr 3394
+VMULSDZrm 3395
+VMULSDZrm_Int 3396
+VMULSDZrmk_Int 3397
+VMULSDZrmkz_Int 3398
+VMULSDZrr 3399
+VMULSDZrr_Int 3400
+VMULSDZrrb_Int 3401
+VMULSDZrrbk_Int 3402
+VMULSDZrrbkz_Int 3403
+VMULSDZrrk_Int 3404
+VMULSDZrrkz_Int 3405
+VMULSDrm 3406
+VMULSDrm_Int 3407
+VMULSDrr 3408
+VMULSDrr_Int 3409
+VMULSHZrm 3410
+VMULSHZrm_Int 3411
+VMULSHZrmk_Int 3412
+VMULSHZrmkz_Int 3413
+VMULSHZrr 3414
+VMULSHZrr_Int 3415
+VMULSHZrrb_Int 3416
+VMULSHZrrbk_Int 3417
+VMULSHZrrbkz_Int 3418
+VMULSHZrrk_Int 3419
+VMULSHZrrkz_Int 3420
+VMULSSZrm 3421
+VMULSSZrm_Int 3422
+VMULSSZrmk_Int 3423
+VMULSSZrmkz_Int 3424
+VMULSSZrr 3425
+VMULSSZrr_Int 3426
+VMULSSZrrb_Int 3427
+VMULSSZrrbk_Int 3428
+VMULSSZrrbkz_Int 3429
+VMULSSZrrk_Int 3430
+VMULSSZrrkz_Int 3431
+VMULSSrm 3432
+VMULSSrm_Int 3433
+VMULSSrr 3434
+VMULSSrr_Int 3435
+VMWRITE 3436
+VMXOFF 3437
+VMXON 3438
+VORPDYrm 3439
+VORPDYrr 3440
+VORPDZ 3441
+VORPDZrm 3442
+VORPDZrmb 3443
+VORPDZrmbk 3444
+VORPDZrmbkz 3445
+VORPDZrmk 3446
+VORPDZrmkz 3447
+VORPDZrr 3448
+VORPDZrrk 3449
+VORPDZrrkz 3450
+VORPDrm 3451
+VORPDrr 3452
+VORPSYrm 3453
+VORPSYrr 3454
+VORPSZ 3455
+VORPSZrm 3456
+VORPSZrmb 3457
+VORPSZrmbk 3458
+VORPSZrmbkz 3459
+VORPSZrmk 3460
+VORPSZrmkz 3461
+VORPSZrr 3462
+VORPSZrrk 3463
+VORPSZrrkz 3464
+VORPSrm 3465
+VORPSrr 3466
+VP 3467
+VPABSBYrm 3468
+VPABSBYrr 3469
+VPABSBZ 3470
+VPABSBZrm 3471
+VPABSBZrmk 3472
+VPABSBZrmkz 3473
+VPABSBZrr 3474
+VPABSBZrrk 3475
+VPABSBZrrkz 3476
+VPABSBrm 3477
+VPABSBrr 3478
+VPABSDYrm 3479
+VPABSDYrr 3480
+VPABSDZ 3481
+VPABSDZrm 3482
+VPABSDZrmb 3483
+VPABSDZrmbk 3484
+VPABSDZrmbkz 3485
+VPABSDZrmk 3486
+VPABSDZrmkz 3487
+VPABSDZrr 3488
+VPABSDZrrk 3489
+VPABSDZrrkz 3490
+VPABSDrm 3491
+VPABSDrr 3492
+VPABSQZ 3493
+VPABSQZrm 3494
+VPABSQZrmb 3495
+VPABSQZrmbk 3496
+VPABSQZrmbkz 3497
+VPABSQZrmk 3498
+VPABSQZrmkz 3499
+VPABSQZrr 3500
+VPABSQZrrk 3501
+VPABSQZrrkz 3502
+VPABSWYrm 3503
+VPABSWYrr 3504
+VPABSWZ 3505
+VPABSWZrm 3506
+VPABSWZrmk 3507
+VPABSWZrmkz 3508
+VPABSWZrr 3509
+VPABSWZrrk 3510
+VPABSWZrrkz 3511
+VPABSWrm 3512
+VPABSWrr 3513
+VPACKSSDWYrm 3514
+VPACKSSDWYrr 3515
+VPACKSSDWZ 3516
+VPACKSSDWZrm 3517
+VPACKSSDWZrmb 3518
+VPACKSSDWZrmbk 3519
+VPACKSSDWZrmbkz 3520
+VPACKSSDWZrmk 3521
+VPACKSSDWZrmkz 3522
+VPACKSSDWZrr 3523
+VPACKSSDWZrrk 3524
+VPACKSSDWZrrkz 3525
+VPACKSSDWrm 3526
+VPACKSSDWrr 3527
+VPACKSSWBYrm 3528
+VPACKSSWBYrr 3529
+VPACKSSWBZ 3530
+VPACKSSWBZrm 3531
+VPACKSSWBZrmk 3532
+VPACKSSWBZrmkz 3533
+VPACKSSWBZrr 3534
+VPACKSSWBZrrk 3535
+VPACKSSWBZrrkz 3536
+VPACKSSWBrm 3537
+VPACKSSWBrr 3538
+VPACKUSDWYrm 3539
+VPACKUSDWYrr 3540
+VPACKUSDWZ 3541
+VPACKUSDWZrm 3542
+VPACKUSDWZrmb 3543
+VPACKUSDWZrmbk 3544
+VPACKUSDWZrmbkz 3545
+VPACKUSDWZrmk 3546
+VPACKUSDWZrmkz 3547
+VPACKUSDWZrr 3548
+VPACKUSDWZrrk 3549
+VPACKUSDWZrrkz 3550
+VPACKUSDWrm 3551
+VPACKUSDWrr 3552
+VPACKUSWBYrm 3553
+VPACKUSWBYrr 3554
+VPACKUSWBZ 3555
+VPACKUSWBZrm 3556
+VPACKUSWBZrmk 3557
+VPACKUSWBZrmkz 3558
+VPACKUSWBZrr 3559
+VPACKUSWBZrrk 3560
+VPACKUSWBZrrkz 3561
+VPACKUSWBrm 3562
+VPACKUSWBrr 3563
+VPADDBYrm 3564
+VPADDBYrr 3565
+VPADDBZ 3566
+VPADDBZrm 3567
+VPADDBZrmk 3568
+VPADDBZrmkz 3569
+VPADDBZrr 3570
+VPADDBZrrk 3571
+VPADDBZrrkz 3572
+VPADDBrm 3573
+VPADDBrr 3574
+VPADDDYrm 3575
+VPADDDYrr 3576
+VPADDDZ 3577
+VPADDDZrm 3578
+VPADDDZrmb 3579
+VPADDDZrmbk 3580
+VPADDDZrmbkz 3581
+VPADDDZrmk 3582
+VPADDDZrmkz 3583
+VPADDDZrr 3584
+VPADDDZrrk 3585
+VPADDDZrrkz 3586
+VPADDDrm 3587
+VPADDDrr 3588
+VPADDQYrm 3589
+VPADDQYrr 3590
+VPADDQZ 3591
+VPADDQZrm 3592
+VPADDQZrmb 3593
+VPADDQZrmbk 3594
+VPADDQZrmbkz 3595
+VPADDQZrmk 3596
+VPADDQZrmkz 3597
+VPADDQZrr 3598
+VPADDQZrrk 3599
+VPADDQZrrkz 3600
+VPADDQrm 3601
+VPADDQrr 3602
+VPADDSBYrm 3603
+VPADDSBYrr 3604
+VPADDSBZ 3605
+VPADDSBZrm 3606
+VPADDSBZrmk 3607
+VPADDSBZrmkz 3608
+VPADDSBZrr 3609
+VPADDSBZrrk 3610
+VPADDSBZrrkz 3611
+VPADDSBrm 3612
+VPADDSBrr 3613
+VPADDSWYrm 3614
+VPADDSWYrr 3615
+VPADDSWZ 3616
+VPADDSWZrm 3617
+VPADDSWZrmk 3618
+VPADDSWZrmkz 3619
+VPADDSWZrr 3620
+VPADDSWZrrk 3621
+VPADDSWZrrkz 3622
+VPADDSWrm 3623
+VPADDSWrr 3624
+VPADDUSBYrm 3625
+VPADDUSBYrr 3626
+VPADDUSBZ 3627
+VPADDUSBZrm 3628
+VPADDUSBZrmk 3629
+VPADDUSBZrmkz 3630
+VPADDUSBZrr 3631
+VPADDUSBZrrk 3632
+VPADDUSBZrrkz 3633
+VPADDUSBrm 3634
+VPADDUSBrr 3635
+VPADDUSWYrm 3636
+VPADDUSWYrr 3637
+VPADDUSWZ 3638
+VPADDUSWZrm 3639
+VPADDUSWZrmk 3640
+VPADDUSWZrmkz 3641
+VPADDUSWZrr 3642
+VPADDUSWZrrk 3643
+VPADDUSWZrrkz 3644
+VPADDUSWrm 3645
+VPADDUSWrr 3646
+VPADDWYrm 3647
+VPADDWYrr 3648
+VPADDWZ 3649
+VPADDWZrm 3650
+VPADDWZrmk 3651
+VPADDWZrmkz 3652
+VPADDWZrr 3653
+VPADDWZrrk 3654
+VPADDWZrrkz 3655
+VPADDWrm 3656
+VPADDWrr 3657
+VPALIGNRYrmi 3658
+VPALIGNRYrri 3659
+VPALIGNRZ 3660
+VPALIGNRZrmi 3661
+VPALIGNRZrmik 3662
+VPALIGNRZrmikz 3663
+VPALIGNRZrri 3664
+VPALIGNRZrrik 3665
+VPALIGNRZrrikz 3666
+VPALIGNRrmi 3667
+VPALIGNRrri 3668
+VPANDDZ 3669
+VPANDDZrm 3670
+VPANDDZrmb 3671
+VPANDDZrmbk 3672
+VPANDDZrmbkz 3673
+VPANDDZrmk 3674
+VPANDDZrmkz 3675
+VPANDDZrr 3676
+VPANDDZrrk 3677
+VPANDDZrrkz 3678
+VPANDNDZ 3679
+VPANDNDZrm 3680
+VPANDNDZrmb 3681
+VPANDNDZrmbk 3682
+VPANDNDZrmbkz 3683
+VPANDNDZrmk 3684
+VPANDNDZrmkz 3685
+VPANDNDZrr 3686
+VPANDNDZrrk 3687
+VPANDNDZrrkz 3688
+VPANDNQZ 3689
+VPANDNQZrm 3690
+VPANDNQZrmb 3691
+VPANDNQZrmbk 3692
+VPANDNQZrmbkz 3693
+VPANDNQZrmk 3694
+VPANDNQZrmkz 3695
+VPANDNQZrr 3696
+VPANDNQZrrk 3697
+VPANDNQZrrkz 3698
+VPANDNYrm 3699
+VPANDNYrr 3700
+VPANDNrm 3701
+VPANDNrr 3702
+VPANDQZ 3703
+VPANDQZrm 3704
+VPANDQZrmb 3705
+VPANDQZrmbk 3706
+VPANDQZrmbkz 3707
+VPANDQZrmk 3708
+VPANDQZrmkz 3709
+VPANDQZrr 3710
+VPANDQZrrk 3711
+VPANDQZrrkz 3712
+VPANDYrm 3713
+VPANDYrr 3714
+VPANDrm 3715
+VPANDrr 3716
+VPAVGBYrm 3717
+VPAVGBYrr 3718
+VPAVGBZ 3719
+VPAVGBZrm 3720
+VPAVGBZrmk 3721
+VPAVGBZrmkz 3722
+VPAVGBZrr 3723
+VPAVGBZrrk 3724
+VPAVGBZrrkz 3725
+VPAVGBrm 3726
+VPAVGBrr 3727
+VPAVGWYrm 3728
+VPAVGWYrr 3729
+VPAVGWZ 3730
+VPAVGWZrm 3731
+VPAVGWZrmk 3732
+VPAVGWZrmkz 3733
+VPAVGWZrr 3734
+VPAVGWZrrk 3735
+VPAVGWZrrkz 3736
+VPAVGWrm 3737
+VPAVGWrr 3738
+VPBLENDDYrmi 3739
+VPBLENDDYrri 3740
+VPBLENDDrmi 3741
+VPBLENDDrri 3742
+VPBLENDMBZ 3743
+VPBLENDMBZrm 3744
+VPBLENDMBZrmk 3745
+VPBLENDMBZrmkz 3746
+VPBLENDMBZrr 3747
+VPBLENDMBZrrk 3748
+VPBLENDMBZrrkz 3749
+VPBLENDMDZ 3750
+VPBLENDMDZrm 3751
+VPBLENDMDZrmb 3752
+VPBLENDMDZrmbk 3753
+VPBLENDMDZrmbkz 3754
+VPBLENDMDZrmk 3755
+VPBLENDMDZrmkz 3756
+VPBLENDMDZrr 3757
+VPBLENDMDZrrk 3758
+VPBLENDMDZrrkz 3759
+VPBLENDMQZ 3760
+VPBLENDMQZrm 3761
+VPBLENDMQZrmb 3762
+VPBLENDMQZrmbk 3763
+VPBLENDMQZrmbkz 3764
+VPBLENDMQZrmk 3765
+VPBLENDMQZrmkz 3766
+VPBLENDMQZrr 3767
+VPBLENDMQZrrk 3768
+VPBLENDMQZrrkz 3769
+VPBLENDMWZ 3770
+VPBLENDMWZrm 3771
+VPBLENDMWZrmk 3772
+VPBLENDMWZrmkz 3773
+VPBLENDMWZrr 3774
+VPBLENDMWZrrk 3775
+VPBLENDMWZrrkz 3776
+VPBLENDVBYrmr 3777
+VPBLENDVBYrrr 3778
+VPBLENDVBrmr 3779
+VPBLENDVBrrr 3780
+VPBLENDWYrmi 3781
+VPBLENDWYrri 3782
+VPBLENDWrmi 3783
+VPBLENDWrri 3784
+VPBROADCASTBYrm 3785
+VPBROADCASTBYrr 3786
+VPBROADCASTBZ 3787
+VPBROADCASTBZrm 3788
+VPBROADCASTBZrmk 3789
+VPBROADCASTBZrmkz 3790
+VPBROADCASTBZrr 3791
+VPBROADCASTBZrrk 3792
+VPBROADCASTBZrrkz 3793
+VPBROADCASTBrZ 3794
+VPBROADCASTBrZrr 3795
+VPBROADCASTBrZrrk 3796
+VPBROADCASTBrZrrkz 3797
+VPBROADCASTBrm 3798
+VPBROADCASTBrr 3799
+VPBROADCASTDYrm 3800
+VPBROADCASTDYrr 3801
+VPBROADCASTDZ 3802
+VPBROADCASTDZrm 3803
+VPBROADCASTDZrmk 3804
+VPBROADCASTDZrmkz 3805
+VPBROADCASTDZrr 3806
+VPBROADCASTDZrrk 3807
+VPBROADCASTDZrrkz 3808
+VPBROADCASTDrZ 3809
+VPBROADCASTDrZrr 3810
+VPBROADCASTDrZrrk 3811
+VPBROADCASTDrZrrkz 3812
+VPBROADCASTDrm 3813
+VPBROADCASTDrr 3814
+VPBROADCASTMB 3815
+VPBROADCASTMW 3816
+VPBROADCASTQYrm 3817
+VPBROADCASTQYrr 3818
+VPBROADCASTQZ 3819
+VPBROADCASTQZrm 3820
+VPBROADCASTQZrmk 3821
+VPBROADCASTQZrmkz 3822
+VPBROADCASTQZrr 3823
+VPBROADCASTQZrrk 3824
+VPBROADCASTQZrrkz 3825
+VPBROADCASTQrZ 3826
+VPBROADCASTQrZrr 3827
+VPBROADCASTQrZrrk 3828
+VPBROADCASTQrZrrkz 3829
+VPBROADCASTQrm 3830
+VPBROADCASTQrr 3831
+VPBROADCASTWYrm 3832
+VPBROADCASTWYrr 3833
+VPBROADCASTWZ 3834
+VPBROADCASTWZrm 3835
+VPBROADCASTWZrmk 3836
+VPBROADCASTWZrmkz 3837
+VPBROADCASTWZrr 3838
+VPBROADCASTWZrrk 3839
+VPBROADCASTWZrrkz 3840
+VPBROADCASTWrZ 3841
+VPBROADCASTWrZrr 3842
+VPBROADCASTWrZrrk 3843
+VPBROADCASTWrZrrkz 3844
+VPBROADCASTWrm 3845
+VPBROADCASTWrr 3846
+VPCLMULQDQYrmi 3847
+VPCLMULQDQYrri 3848
+VPCLMULQDQZ 3849
+VPCLMULQDQZrmi 3850
+VPCLMULQDQZrri 3851
+VPCLMULQDQrmi 3852
+VPCLMULQDQrri 3853
+VPCMOVYrmr 3854
+VPCMOVYrrm 3855
+VPCMOVYrrr 3856
+VPCMOVYrrr_REV 3857
+VPCMOVrmr 3858
+VPCMOVrrm 3859
+VPCMOVrrr 3860
+VPCMOVrrr_REV 3861
+VPCMPBZ 3862
+VPCMPBZrmi 3863
+VPCMPBZrmik 3864
+VPCMPBZrri 3865
+VPCMPBZrrik 3866
+VPCMPDZ 3867
+VPCMPDZrmbi 3868
+VPCMPDZrmbik 3869
+VPCMPDZrmi 3870
+VPCMPDZrmik 3871
+VPCMPDZrri 3872
+VPCMPDZrrik 3873
+VPCMPEQBYrm 3874
+VPCMPEQBYrr 3875
+VPCMPEQBZ 3876
+VPCMPEQBZrm 3877
+VPCMPEQBZrmk 3878
+VPCMPEQBZrr 3879
+VPCMPEQBZrrk 3880
+VPCMPEQBrm 3881
+VPCMPEQBrr 3882
+VPCMPEQDYrm 3883
+VPCMPEQDYrr 3884
+VPCMPEQDZ 3885
+VPCMPEQDZrm 3886
+VPCMPEQDZrmb 3887
+VPCMPEQDZrmbk 3888
+VPCMPEQDZrmk 3889
+VPCMPEQDZrr 3890
+VPCMPEQDZrrk 3891
+VPCMPEQDrm 3892
+VPCMPEQDrr 3893
+VPCMPEQQYrm 3894
+VPCMPEQQYrr 3895
+VPCMPEQQZ 3896
+VPCMPEQQZrm 3897
+VPCMPEQQZrmb 3898
+VPCMPEQQZrmbk 3899
+VPCMPEQQZrmk 3900
+VPCMPEQQZrr 3901
+VPCMPEQQZrrk 3902
+VPCMPEQQrm 3903
+VPCMPEQQrr 3904
+VPCMPEQWYrm 3905
+VPCMPEQWYrr 3906
+VPCMPEQWZ 3907
+VPCMPEQWZrm 3908
+VPCMPEQWZrmk 3909
+VPCMPEQWZrr 3910
+VPCMPEQWZrrk 3911
+VPCMPEQWrm 3912
+VPCMPEQWrr 3913
+VPCMPESTRIrmi 3914
+VPCMPESTRIrri 3915
+VPCMPESTRMrmi 3916
+VPCMPESTRMrri 3917
+VPCMPGTBYrm 3918
+VPCMPGTBYrr 3919
+VPCMPGTBZ 3920
+VPCMPGTBZrm 3921
+VPCMPGTBZrmk 3922
+VPCMPGTBZrr 3923
+VPCMPGTBZrrk 3924
+VPCMPGTBrm 3925
+VPCMPGTBrr 3926
+VPCMPGTDYrm 3927
+VPCMPGTDYrr 3928
+VPCMPGTDZ 3929
+VPCMPGTDZrm 3930
+VPCMPGTDZrmb 3931
+VPCMPGTDZrmbk 3932
+VPCMPGTDZrmk 3933
+VPCMPGTDZrr 3934
+VPCMPGTDZrrk 3935
+VPCMPGTDrm 3936
+VPCMPGTDrr 3937
+VPCMPGTQYrm 3938
+VPCMPGTQYrr 3939
+VPCMPGTQZ 3940
+VPCMPGTQZrm 3941
+VPCMPGTQZrmb 3942
+VPCMPGTQZrmbk 3943
+VPCMPGTQZrmk 3944
+VPCMPGTQZrr 3945
+VPCMPGTQZrrk 3946
+VPCMPGTQrm 3947
+VPCMPGTQrr 3948
+VPCMPGTWYrm 3949
+VPCMPGTWYrr 3950
+VPCMPGTWZ 3951
+VPCMPGTWZrm 3952
+VPCMPGTWZrmk 3953
+VPCMPGTWZrr 3954
+VPCMPGTWZrrk 3955
+VPCMPGTWrm 3956
+VPCMPGTWrr 3957
+VPCMPISTRIrmi 3958
+VPCMPISTRIrri 3959
+VPCMPISTRMrmi 3960
+VPCMPISTRMrri 3961
+VPCMPQZ 3962
+VPCMPQZrmbi 3963
+VPCMPQZrmbik 3964
+VPCMPQZrmi 3965
+VPCMPQZrmik 3966
+VPCMPQZrri 3967
+VPCMPQZrrik 3968
+VPCMPUBZ 3969
+VPCMPUBZrmi 3970
+VPCMPUBZrmik 3971
+VPCMPUBZrri 3972
+VPCMPUBZrrik 3973
+VPCMPUDZ 3974
+VPCMPUDZrmbi 3975
+VPCMPUDZrmbik 3976
+VPCMPUDZrmi 3977
+VPCMPUDZrmik 3978
+VPCMPUDZrri 3979
+VPCMPUDZrrik 3980
+VPCMPUQZ 3981
+VPCMPUQZrmbi 3982
+VPCMPUQZrmbik 3983
+VPCMPUQZrmi 3984
+VPCMPUQZrmik 3985
+VPCMPUQZrri 3986
+VPCMPUQZrrik 3987
+VPCMPUWZ 3988
+VPCMPUWZrmi 3989
+VPCMPUWZrmik 3990
+VPCMPUWZrri 3991
+VPCMPUWZrrik 3992
+VPCMPWZ 3993
+VPCMPWZrmi 3994
+VPCMPWZrmik 3995
+VPCMPWZrri 3996
+VPCMPWZrrik 3997
+VPCOMBmi 3998
+VPCOMBri 3999
+VPCOMDmi 4000
+VPCOMDri 4001
+VPCOMPRESSBZ 4002
+VPCOMPRESSBZmr 4003
+VPCOMPRESSBZmrk 4004
+VPCOMPRESSBZrr 4005
+VPCOMPRESSBZrrk 4006
+VPCOMPRESSBZrrkz 4007
+VPCOMPRESSDZ 4008
+VPCOMPRESSDZmr 4009
+VPCOMPRESSDZmrk 4010
+VPCOMPRESSDZrr 4011
+VPCOMPRESSDZrrk 4012
+VPCOMPRESSDZrrkz 4013
+VPCOMPRESSQZ 4014
+VPCOMPRESSQZmr 4015
+VPCOMPRESSQZmrk 4016
+VPCOMPRESSQZrr 4017
+VPCOMPRESSQZrrk 4018
+VPCOMPRESSQZrrkz 4019
+VPCOMPRESSWZ 4020
+VPCOMPRESSWZmr 4021
+VPCOMPRESSWZmrk 4022
+VPCOMPRESSWZrr 4023
+VPCOMPRESSWZrrk 4024
+VPCOMPRESSWZrrkz 4025
+VPCOMQmi 4026
+VPCOMQri 4027
+VPCOMUBmi 4028
+VPCOMUBri 4029
+VPCOMUDmi 4030
+VPCOMUDri 4031
+VPCOMUQmi 4032
+VPCOMUQri 4033
+VPCOMUWmi 4034
+VPCOMUWri 4035
+VPCOMWmi 4036
+VPCOMWri 4037
+VPCONFLICTDZ 4038
+VPCONFLICTDZrm 4039
+VPCONFLICTDZrmb 4040
+VPCONFLICTDZrmbk 4041
+VPCONFLICTDZrmbkz 4042
+VPCONFLICTDZrmk 4043
+VPCONFLICTDZrmkz 4044
+VPCONFLICTDZrr 4045
+VPCONFLICTDZrrk 4046
+VPCONFLICTDZrrkz 4047
+VPCONFLICTQZ 4048
+VPCONFLICTQZrm 4049
+VPCONFLICTQZrmb 4050
+VPCONFLICTQZrmbk 4051
+VPCONFLICTQZrmbkz 4052
+VPCONFLICTQZrmk 4053
+VPCONFLICTQZrmkz 4054
+VPCONFLICTQZrr 4055
+VPCONFLICTQZrrk 4056
+VPCONFLICTQZrrkz 4057
+VPDPBSSDSYrm 4058
+VPDPBSSDSYrr 4059
+VPDPBSSDSZ 4060
+VPDPBSSDSZrm 4061
+VPDPBSSDSZrmb 4062
+VPDPBSSDSZrmbk 4063
+VPDPBSSDSZrmbkz 4064
+VPDPBSSDSZrmk 4065
+VPDPBSSDSZrmkz 4066
+VPDPBSSDSZrr 4067
+VPDPBSSDSZrrk 4068
+VPDPBSSDSZrrkz 4069
+VPDPBSSDSrm 4070
+VPDPBSSDSrr 4071
+VPDPBSSDYrm 4072
+VPDPBSSDYrr 4073
+VPDPBSSDZ 4074
+VPDPBSSDZrm 4075
+VPDPBSSDZrmb 4076
+VPDPBSSDZrmbk 4077
+VPDPBSSDZrmbkz 4078
+VPDPBSSDZrmk 4079
+VPDPBSSDZrmkz 4080
+VPDPBSSDZrr 4081
+VPDPBSSDZrrk 4082
+VPDPBSSDZrrkz 4083
+VPDPBSSDrm 4084
+VPDPBSSDrr 4085
+VPDPBSUDSYrm 4086
+VPDPBSUDSYrr 4087
+VPDPBSUDSZ 4088
+VPDPBSUDSZrm 4089
+VPDPBSUDSZrmb 4090
+VPDPBSUDSZrmbk 4091
+VPDPBSUDSZrmbkz 4092
+VPDPBSUDSZrmk 4093
+VPDPBSUDSZrmkz 4094
+VPDPBSUDSZrr 4095
+VPDPBSUDSZrrk 4096
+VPDPBSUDSZrrkz 4097
+VPDPBSUDSrm 4098
+VPDPBSUDSrr 4099
+VPDPBSUDYrm 4100
+VPDPBSUDYrr 4101
+VPDPBSUDZ 4102
+VPDPBSUDZrm 4103
+VPDPBSUDZrmb 4104
+VPDPBSUDZrmbk 4105
+VPDPBSUDZrmbkz 4106
+VPDPBSUDZrmk 4107
+VPDPBSUDZrmkz 4108
+VPDPBSUDZrr 4109
+VPDPBSUDZrrk 4110
+VPDPBSUDZrrkz 4111
+VPDPBSUDrm 4112
+VPDPBSUDrr 4113
+VPDPBUSDSYrm 4114
+VPDPBUSDSYrr 4115
+VPDPBUSDSZ 4116
+VPDPBUSDSZrm 4117
+VPDPBUSDSZrmb 4118
+VPDPBUSDSZrmbk 4119
+VPDPBUSDSZrmbkz 4120
+VPDPBUSDSZrmk 4121
+VPDPBUSDSZrmkz 4122
+VPDPBUSDSZrr 4123
+VPDPBUSDSZrrk 4124
+VPDPBUSDSZrrkz 4125
+VPDPBUSDSrm 4126
+VPDPBUSDSrr 4127
+VPDPBUSDYrm 4128
+VPDPBUSDYrr 4129
+VPDPBUSDZ 4130
+VPDPBUSDZrm 4131
+VPDPBUSDZrmb 4132
+VPDPBUSDZrmbk 4133
+VPDPBUSDZrmbkz 4134
+VPDPBUSDZrmk 4135
+VPDPBUSDZrmkz 4136
+VPDPBUSDZrr 4137
+VPDPBUSDZrrk 4138
+VPDPBUSDZrrkz 4139
+VPDPBUSDrm 4140
+VPDPBUSDrr 4141
+VPDPBUUDSYrm 4142
+VPDPBUUDSYrr 4143
+VPDPBUUDSZ 4144
+VPDPBUUDSZrm 4145
+VPDPBUUDSZrmb 4146
+VPDPBUUDSZrmbk 4147
+VPDPBUUDSZrmbkz 4148
+VPDPBUUDSZrmk 4149
+VPDPBUUDSZrmkz 4150
+VPDPBUUDSZrr 4151
+VPDPBUUDSZrrk 4152
+VPDPBUUDSZrrkz 4153
+VPDPBUUDSrm 4154
+VPDPBUUDSrr 4155
+VPDPBUUDYrm 4156
+VPDPBUUDYrr 4157
+VPDPBUUDZ 4158
+VPDPBUUDZrm 4159
+VPDPBUUDZrmb 4160
+VPDPBUUDZrmbk 4161
+VPDPBUUDZrmbkz 4162
+VPDPBUUDZrmk 4163
+VPDPBUUDZrmkz 4164
+VPDPBUUDZrr 4165
+VPDPBUUDZrrk 4166
+VPDPBUUDZrrkz 4167
+VPDPBUUDrm 4168
+VPDPBUUDrr 4169
+VPDPWSSDSYrm 4170
+VPDPWSSDSYrr 4171
+VPDPWSSDSZ 4172
+VPDPWSSDSZrm 4173
+VPDPWSSDSZrmb 4174
+VPDPWSSDSZrmbk 4175
+VPDPWSSDSZrmbkz 4176
+VPDPWSSDSZrmk 4177
+VPDPWSSDSZrmkz 4178
+VPDPWSSDSZrr 4179
+VPDPWSSDSZrrk 4180
+VPDPWSSDSZrrkz 4181
+VPDPWSSDSrm 4182
+VPDPWSSDSrr 4183
+VPDPWSSDYrm 4184
+VPDPWSSDYrr 4185
+VPDPWSSDZ 4186
+VPDPWSSDZrm 4187
+VPDPWSSDZrmb 4188
+VPDPWSSDZrmbk 4189
+VPDPWSSDZrmbkz 4190
+VPDPWSSDZrmk 4191
+VPDPWSSDZrmkz 4192
+VPDPWSSDZrr 4193
+VPDPWSSDZrrk 4194
+VPDPWSSDZrrkz 4195
+VPDPWSSDrm 4196
+VPDPWSSDrr 4197
+VPDPWSUDSYrm 4198
+VPDPWSUDSYrr 4199
+VPDPWSUDSZ 4200
+VPDPWSUDSZrm 4201
+VPDPWSUDSZrmb 4202
+VPDPWSUDSZrmbk 4203
+VPDPWSUDSZrmbkz 4204
+VPDPWSUDSZrmk 4205
+VPDPWSUDSZrmkz 4206
+VPDPWSUDSZrr 4207
+VPDPWSUDSZrrk 4208
+VPDPWSUDSZrrkz 4209
+VPDPWSUDSrm 4210
+VPDPWSUDSrr 4211
+VPDPWSUDYrm 4212
+VPDPWSUDYrr 4213
+VPDPWSUDZ 4214
+VPDPWSUDZrm 4215
+VPDPWSUDZrmb 4216
+VPDPWSUDZrmbk 4217
+VPDPWSUDZrmbkz 4218
+VPDPWSUDZrmk 4219
+VPDPWSUDZrmkz 4220
+VPDPWSUDZrr 4221
+VPDPWSUDZrrk 4222
+VPDPWSUDZrrkz 4223
+VPDPWSUDrm 4224
+VPDPWSUDrr 4225
+VPDPWUSDSYrm 4226
+VPDPWUSDSYrr 4227
+VPDPWUSDSZ 4228
+VPDPWUSDSZrm 4229
+VPDPWUSDSZrmb 4230
+VPDPWUSDSZrmbk 4231
+VPDPWUSDSZrmbkz 4232
+VPDPWUSDSZrmk 4233
+VPDPWUSDSZrmkz 4234
+VPDPWUSDSZrr 4235
+VPDPWUSDSZrrk 4236
+VPDPWUSDSZrrkz 4237
+VPDPWUSDSrm 4238
+VPDPWUSDSrr 4239
+VPDPWUSDYrm 4240
+VPDPWUSDYrr 4241
+VPDPWUSDZ 4242
+VPDPWUSDZrm 4243
+VPDPWUSDZrmb 4244
+VPDPWUSDZrmbk 4245
+VPDPWUSDZrmbkz 4246
+VPDPWUSDZrmk 4247
+VPDPWUSDZrmkz 4248
+VPDPWUSDZrr 4249
+VPDPWUSDZrrk 4250
+VPDPWUSDZrrkz 4251
+VPDPWUSDrm 4252
+VPDPWUSDrr 4253
+VPDPWUUDSYrm 4254
+VPDPWUUDSYrr 4255
+VPDPWUUDSZ 4256
+VPDPWUUDSZrm 4257
+VPDPWUUDSZrmb 4258
+VPDPWUUDSZrmbk 4259
+VPDPWUUDSZrmbkz 4260
+VPDPWUUDSZrmk 4261
+VPDPWUUDSZrmkz 4262
+VPDPWUUDSZrr 4263
+VPDPWUUDSZrrk 4264
+VPDPWUUDSZrrkz 4265
+VPDPWUUDSrm 4266
+VPDPWUUDSrr 4267
+VPDPWUUDYrm 4268
+VPDPWUUDYrr 4269
+VPDPWUUDZ 4270
+VPDPWUUDZrm 4271
+VPDPWUUDZrmb 4272
+VPDPWUUDZrmbk 4273
+VPDPWUUDZrmbkz 4274
+VPDPWUUDZrmk 4275
+VPDPWUUDZrmkz 4276
+VPDPWUUDZrr 4277
+VPDPWUUDZrrk 4278
+VPDPWUUDZrrkz 4279
+VPDPWUUDrm 4280
+VPDPWUUDrr 4281
+VPERM 4282
+VPERMBZ 4283
+VPERMBZrm 4284
+VPERMBZrmk 4285
+VPERMBZrmkz 4286
+VPERMBZrr 4287
+VPERMBZrrk 4288
+VPERMBZrrkz 4289
+VPERMDYrm 4290
+VPERMDYrr 4291
+VPERMDZ 4292
+VPERMDZrm 4293
+VPERMDZrmb 4294
+VPERMDZrmbk 4295
+VPERMDZrmbkz 4296
+VPERMDZrmk 4297
+VPERMDZrmkz 4298
+VPERMDZrr 4299
+VPERMDZrrk 4300
+VPERMDZrrkz 4301
+VPERMI 4302
+VPERMIL 4303
+VPERMILPDYmi 4304
+VPERMILPDYri 4305
+VPERMILPDYrm 4306
+VPERMILPDYrr 4307
+VPERMILPDZ 4308
+VPERMILPDZmbi 4309
+VPERMILPDZmbik 4310
+VPERMILPDZmbikz 4311
+VPERMILPDZmi 4312
+VPERMILPDZmik 4313
+VPERMILPDZmikz 4314
+VPERMILPDZri 4315
+VPERMILPDZrik 4316
+VPERMILPDZrikz 4317
+VPERMILPDZrm 4318
+VPERMILPDZrmb 4319
+VPERMILPDZrmbk 4320
+VPERMILPDZrmbkz 4321
+VPERMILPDZrmk 4322
+VPERMILPDZrmkz 4323
+VPERMILPDZrr 4324
+VPERMILPDZrrk 4325
+VPERMILPDZrrkz 4326
+VPERMILPDmi 4327
+VPERMILPDri 4328
+VPERMILPDrm 4329
+VPERMILPDrr 4330
+VPERMILPSYmi 4331
+VPERMILPSYri 4332
+VPERMILPSYrm 4333
+VPERMILPSYrr 4334
+VPERMILPSZ 4335
+VPERMILPSZmbi 4336
+VPERMILPSZmbik 4337
+VPERMILPSZmbikz 4338
+VPERMILPSZmi 4339
+VPERMILPSZmik 4340
+VPERMILPSZmikz 4341
+VPERMILPSZri 4342
+VPERMILPSZrik 4343
+VPERMILPSZrikz 4344
+VPERMILPSZrm 4345
+VPERMILPSZrmb 4346
+VPERMILPSZrmbk 4347
+VPERMILPSZrmbkz 4348
+VPERMILPSZrmk 4349
+VPERMILPSZrmkz 4350
+VPERMILPSZrr 4351
+VPERMILPSZrrk 4352
+VPERMILPSZrrkz 4353
+VPERMILPSmi 4354
+VPERMILPSri 4355
+VPERMILPSrm 4356
+VPERMILPSrr 4357
+VPERMPDYmi 4358
+VPERMPDYri 4359
+VPERMPDZ 4360
+VPERMPDZmbi 4361
+VPERMPDZmbik 4362
+VPERMPDZmbikz 4363
+VPERMPDZmi 4364
+VPERMPDZmik 4365
+VPERMPDZmikz 4366
+VPERMPDZri 4367
+VPERMPDZrik 4368
+VPERMPDZrikz 4369
+VPERMPDZrm 4370
+VPERMPDZrmb 4371
+VPERMPDZrmbk 4372
+VPERMPDZrmbkz 4373
+VPERMPDZrmk 4374
+VPERMPDZrmkz 4375
+VPERMPDZrr 4376
+VPERMPDZrrk 4377
+VPERMPDZrrkz 4378
+VPERMPSYrm 4379
+VPERMPSYrr 4380
+VPERMPSZ 4381
+VPERMPSZrm 4382
+VPERMPSZrmb 4383
+VPERMPSZrmbk 4384
+VPERMPSZrmbkz 4385
+VPERMPSZrmk 4386
+VPERMPSZrmkz 4387
+VPERMPSZrr 4388
+VPERMPSZrrk 4389
+VPERMPSZrrkz 4390
+VPERMQYmi 4391
+VPERMQYri 4392
+VPERMQZ 4393
+VPERMQZmbi 4394
+VPERMQZmbik 4395
+VPERMQZmbikz 4396
+VPERMQZmi 4397
+VPERMQZmik 4398
+VPERMQZmikz 4399
+VPERMQZri 4400
+VPERMQZrik 4401
+VPERMQZrikz 4402
+VPERMQZrm 4403
+VPERMQZrmb 4404
+VPERMQZrmbk 4405
+VPERMQZrmbkz 4406
+VPERMQZrmk 4407
+VPERMQZrmkz 4408
+VPERMQZrr 4409
+VPERMQZrrk 4410
+VPERMQZrrkz 4411
+VPERMT 4412
+VPERMWZ 4413
+VPERMWZrm 4414
+VPERMWZrmk 4415
+VPERMWZrmkz 4416
+VPERMWZrr 4417
+VPERMWZrrk 4418
+VPERMWZrrkz 4419
+VPEXPANDBZ 4420
+VPEXPANDBZrm 4421
+VPEXPANDBZrmk 4422
+VPEXPANDBZrmkz 4423
+VPEXPANDBZrr 4424
+VPEXPANDBZrrk 4425
+VPEXPANDBZrrkz 4426
+VPEXPANDDZ 4427
+VPEXPANDDZrm 4428
+VPEXPANDDZrmk 4429
+VPEXPANDDZrmkz 4430
+VPEXPANDDZrr 4431
+VPEXPANDDZrrk 4432
+VPEXPANDDZrrkz 4433
+VPEXPANDQZ 4434
+VPEXPANDQZrm 4435
+VPEXPANDQZrmk 4436
+VPEXPANDQZrmkz 4437
+VPEXPANDQZrr 4438
+VPEXPANDQZrrk 4439
+VPEXPANDQZrrkz 4440
+VPEXPANDWZ 4441
+VPEXPANDWZrm 4442
+VPEXPANDWZrmk 4443
+VPEXPANDWZrmkz 4444
+VPEXPANDWZrr 4445
+VPEXPANDWZrrk 4446
+VPEXPANDWZrrkz 4447
+VPEXTRBZmri 4448
+VPEXTRBZrri 4449
+VPEXTRBmri 4450
+VPEXTRBrri 4451
+VPEXTRDZmri 4452
+VPEXTRDZrri 4453
+VPEXTRDmri 4454
+VPEXTRDrri 4455
+VPEXTRQZmri 4456
+VPEXTRQZrri 4457
+VPEXTRQmri 4458
+VPEXTRQrri 4459
+VPEXTRWZmri 4460
+VPEXTRWZrri 4461
+VPEXTRWZrri_REV 4462
+VPEXTRWmri 4463
+VPEXTRWrri 4464
+VPEXTRWrri_REV 4465
+VPGATHERDDYrm 4466
+VPGATHERDDZ 4467
+VPGATHERDDZrm 4468
+VPGATHERDDrm 4469
+VPGATHERDQYrm 4470
+VPGATHERDQZ 4471
+VPGATHERDQZrm 4472
+VPGATHERDQrm 4473
+VPGATHERQDYrm 4474
+VPGATHERQDZ 4475
+VPGATHERQDZrm 4476
+VPGATHERQDrm 4477
+VPGATHERQQYrm 4478
+VPGATHERQQZ 4479
+VPGATHERQQZrm 4480
+VPGATHERQQrm 4481
+VPHADDBDrm 4482
+VPHADDBDrr 4483
+VPHADDBQrm 4484
+VPHADDBQrr 4485
+VPHADDBWrm 4486
+VPHADDBWrr 4487
+VPHADDDQrm 4488
+VPHADDDQrr 4489
+VPHADDDYrm 4490
+VPHADDDYrr 4491
+VPHADDDrm 4492
+VPHADDDrr 4493
+VPHADDSWYrm 4494
+VPHADDSWYrr 4495
+VPHADDSWrm 4496
+VPHADDSWrr 4497
+VPHADDUBDrm 4498
+VPHADDUBDrr 4499
+VPHADDUBQrm 4500
+VPHADDUBQrr 4501
+VPHADDUBWrm 4502
+VPHADDUBWrr 4503
+VPHADDUDQrm 4504
+VPHADDUDQrr 4505
+VPHADDUWDrm 4506
+VPHADDUWDrr 4507
+VPHADDUWQrm 4508
+VPHADDUWQrr 4509
+VPHADDWDrm 4510
+VPHADDWDrr 4511
+VPHADDWQrm 4512
+VPHADDWQrr 4513
+VPHADDWYrm 4514
+VPHADDWYrr 4515
+VPHADDWrm 4516
+VPHADDWrr 4517
+VPHMINPOSUWrm 4518
+VPHMINPOSUWrr 4519
+VPHSUBBWrm 4520
+VPHSUBBWrr 4521
+VPHSUBDQrm 4522
+VPHSUBDQrr 4523
+VPHSUBDYrm 4524
+VPHSUBDYrr 4525
+VPHSUBDrm 4526
+VPHSUBDrr 4527
+VPHSUBSWYrm 4528
+VPHSUBSWYrr 4529
+VPHSUBSWrm 4530
+VPHSUBSWrr 4531
+VPHSUBWDrm 4532
+VPHSUBWDrr 4533
+VPHSUBWYrm 4534
+VPHSUBWYrr 4535
+VPHSUBWrm 4536
+VPHSUBWrr 4537
+VPINSRBZrmi 4538
+VPINSRBZrri 4539
+VPINSRBrmi 4540
+VPINSRBrri 4541
+VPINSRDZrmi 4542
+VPINSRDZrri 4543
+VPINSRDrmi 4544
+VPINSRDrri 4545
+VPINSRQZrmi 4546
+VPINSRQZrri 4547
+VPINSRQrmi 4548
+VPINSRQrri 4549
+VPINSRWZrmi 4550
+VPINSRWZrri 4551
+VPINSRWrmi 4552
+VPINSRWrri 4553
+VPLZCNTDZ 4554
+VPLZCNTDZrm 4555
+VPLZCNTDZrmb 4556
+VPLZCNTDZrmbk 4557
+VPLZCNTDZrmbkz 4558
+VPLZCNTDZrmk 4559
+VPLZCNTDZrmkz 4560
+VPLZCNTDZrr 4561
+VPLZCNTDZrrk 4562
+VPLZCNTDZrrkz 4563
+VPLZCNTQZ 4564
+VPLZCNTQZrm 4565
+VPLZCNTQZrmb 4566
+VPLZCNTQZrmbk 4567
+VPLZCNTQZrmbkz 4568
+VPLZCNTQZrmk 4569
+VPLZCNTQZrmkz 4570
+VPLZCNTQZrr 4571
+VPLZCNTQZrrk 4572
+VPLZCNTQZrrkz 4573
+VPMACSDDrm 4574
+VPMACSDDrr 4575
+VPMACSDQHrm 4576
+VPMACSDQHrr 4577
+VPMACSDQLrm 4578
+VPMACSDQLrr 4579
+VPMACSSDDrm 4580
+VPMACSSDDrr 4581
+VPMACSSDQHrm 4582
+VPMACSSDQHrr 4583
+VPMACSSDQLrm 4584
+VPMACSSDQLrr 4585
+VPMACSSWDrm 4586
+VPMACSSWDrr 4587
+VPMACSSWWrm 4588
+VPMACSSWWrr 4589
+VPMACSWDrm 4590
+VPMACSWDrr 4591
+VPMACSWWrm 4592
+VPMACSWWrr 4593
+VPMADCSSWDrm 4594
+VPMADCSSWDrr 4595
+VPMADCSWDrm 4596
+VPMADCSWDrr 4597
+VPMADD 4598
+VPMADDUBSWYrm 4599
+VPMADDUBSWYrr 4600
+VPMADDUBSWZ 4601
+VPMADDUBSWZrm 4602
+VPMADDUBSWZrmk 4603
+VPMADDUBSWZrmkz 4604
+VPMADDUBSWZrr 4605
+VPMADDUBSWZrrk 4606
+VPMADDUBSWZrrkz 4607
+VPMADDUBSWrm 4608
+VPMADDUBSWrr 4609
+VPMADDWDYrm 4610
+VPMADDWDYrr 4611
+VPMADDWDZ 4612
+VPMADDWDZrm 4613
+VPMADDWDZrmk 4614
+VPMADDWDZrmkz 4615
+VPMADDWDZrr 4616
+VPMADDWDZrrk 4617
+VPMADDWDZrrkz 4618
+VPMADDWDrm 4619
+VPMADDWDrr 4620
+VPMASKMOVDYmr 4621
+VPMASKMOVDYrm 4622
+VPMASKMOVDmr 4623
+VPMASKMOVDrm 4624
+VPMASKMOVQYmr 4625
+VPMASKMOVQYrm 4626
+VPMASKMOVQmr 4627
+VPMASKMOVQrm 4628
+VPMAXSBYrm 4629
+VPMAXSBYrr 4630
+VPMAXSBZ 4631
+VPMAXSBZrm 4632
+VPMAXSBZrmk 4633
+VPMAXSBZrmkz 4634
+VPMAXSBZrr 4635
+VPMAXSBZrrk 4636
+VPMAXSBZrrkz 4637
+VPMAXSBrm 4638
+VPMAXSBrr 4639
+VPMAXSDYrm 4640
+VPMAXSDYrr 4641
+VPMAXSDZ 4642
+VPMAXSDZrm 4643
+VPMAXSDZrmb 4644
+VPMAXSDZrmbk 4645
+VPMAXSDZrmbkz 4646
+VPMAXSDZrmk 4647
+VPMAXSDZrmkz 4648
+VPMAXSDZrr 4649
+VPMAXSDZrrk 4650
+VPMAXSDZrrkz 4651
+VPMAXSDrm 4652
+VPMAXSDrr 4653
+VPMAXSQZ 4654
+VPMAXSQZrm 4655
+VPMAXSQZrmb 4656
+VPMAXSQZrmbk 4657
+VPMAXSQZrmbkz 4658
+VPMAXSQZrmk 4659
+VPMAXSQZrmkz 4660
+VPMAXSQZrr 4661
+VPMAXSQZrrk 4662
+VPMAXSQZrrkz 4663
+VPMAXSWYrm 4664
+VPMAXSWYrr 4665
+VPMAXSWZ 4666
+VPMAXSWZrm 4667
+VPMAXSWZrmk 4668
+VPMAXSWZrmkz 4669
+VPMAXSWZrr 4670
+VPMAXSWZrrk 4671
+VPMAXSWZrrkz 4672
+VPMAXSWrm 4673
+VPMAXSWrr 4674
+VPMAXUBYrm 4675
+VPMAXUBYrr 4676
+VPMAXUBZ 4677
+VPMAXUBZrm 4678
+VPMAXUBZrmk 4679
+VPMAXUBZrmkz 4680
+VPMAXUBZrr 4681
+VPMAXUBZrrk 4682
+VPMAXUBZrrkz 4683
+VPMAXUBrm 4684
+VPMAXUBrr 4685
+VPMAXUDYrm 4686
+VPMAXUDYrr 4687
+VPMAXUDZ 4688
+VPMAXUDZrm 4689
+VPMAXUDZrmb 4690
+VPMAXUDZrmbk 4691
+VPMAXUDZrmbkz 4692
+VPMAXUDZrmk 4693
+VPMAXUDZrmkz 4694
+VPMAXUDZrr 4695
+VPMAXUDZrrk 4696
+VPMAXUDZrrkz 4697
+VPMAXUDrm 4698
+VPMAXUDrr 4699
+VPMAXUQZ 4700
+VPMAXUQZrm 4701
+VPMAXUQZrmb 4702
+VPMAXUQZrmbk 4703
+VPMAXUQZrmbkz 4704
+VPMAXUQZrmk 4705
+VPMAXUQZrmkz 4706
+VPMAXUQZrr 4707
+VPMAXUQZrrk 4708
+VPMAXUQZrrkz 4709
+VPMAXUWYrm 4710
+VPMAXUWYrr 4711
+VPMAXUWZ 4712
+VPMAXUWZrm 4713
+VPMAXUWZrmk 4714
+VPMAXUWZrmkz 4715
+VPMAXUWZrr 4716
+VPMAXUWZrrk 4717
+VPMAXUWZrrkz 4718
+VPMAXUWrm 4719
+VPMAXUWrr 4720
+VPMINSBYrm 4721
+VPMINSBYrr 4722
+VPMINSBZ 4723
+VPMINSBZrm 4724
+VPMINSBZrmk 4725
+VPMINSBZrmkz 4726
+VPMINSBZrr 4727
+VPMINSBZrrk 4728
+VPMINSBZrrkz 4729
+VPMINSBrm 4730
+VPMINSBrr 4731
+VPMINSDYrm 4732
+VPMINSDYrr 4733
+VPMINSDZ 4734
+VPMINSDZrm 4735
+VPMINSDZrmb 4736
+VPMINSDZrmbk 4737
+VPMINSDZrmbkz 4738
+VPMINSDZrmk 4739
+VPMINSDZrmkz 4740
+VPMINSDZrr 4741
+VPMINSDZrrk 4742
+VPMINSDZrrkz 4743
+VPMINSDrm 4744
+VPMINSDrr 4745
+VPMINSQZ 4746
+VPMINSQZrm 4747
+VPMINSQZrmb 4748
+VPMINSQZrmbk 4749
+VPMINSQZrmbkz 4750
+VPMINSQZrmk 4751
+VPMINSQZrmkz 4752
+VPMINSQZrr 4753
+VPMINSQZrrk 4754
+VPMINSQZrrkz 4755
+VPMINSWYrm 4756
+VPMINSWYrr 4757
+VPMINSWZ 4758
+VPMINSWZrm 4759
+VPMINSWZrmk 4760
+VPMINSWZrmkz 4761
+VPMINSWZrr 4762
+VPMINSWZrrk 4763
+VPMINSWZrrkz 4764
+VPMINSWrm 4765
+VPMINSWrr 4766
+VPMINUBYrm 4767
+VPMINUBYrr 4768
+VPMINUBZ 4769
+VPMINUBZrm 4770
+VPMINUBZrmk 4771
+VPMINUBZrmkz 4772
+VPMINUBZrr 4773
+VPMINUBZrrk 4774
+VPMINUBZrrkz 4775
+VPMINUBrm 4776
+VPMINUBrr 4777
+VPMINUDYrm 4778
+VPMINUDYrr 4779
+VPMINUDZ 4780
+VPMINUDZrm 4781
+VPMINUDZrmb 4782
+VPMINUDZrmbk 4783
+VPMINUDZrmbkz 4784
+VPMINUDZrmk 4785
+VPMINUDZrmkz 4786
+VPMINUDZrr 4787
+VPMINUDZrrk 4788
+VPMINUDZrrkz 4789
+VPMINUDrm 4790
+VPMINUDrr 4791
+VPMINUQZ 4792
+VPMINUQZrm 4793
+VPMINUQZrmb 4794
+VPMINUQZrmbk 4795
+VPMINUQZrmbkz 4796
+VPMINUQZrmk 4797
+VPMINUQZrmkz 4798
+VPMINUQZrr 4799
+VPMINUQZrrk 4800
+VPMINUQZrrkz 4801
+VPMINUWYrm 4802
+VPMINUWYrr 4803
+VPMINUWZ 4804
+VPMINUWZrm 4805
+VPMINUWZrmk 4806
+VPMINUWZrmkz 4807
+VPMINUWZrr 4808
+VPMINUWZrrk 4809
+VPMINUWZrrkz 4810
+VPMINUWrm 4811
+VPMINUWrr 4812
+VPMOVB 4813
+VPMOVD 4814
+VPMOVDBZ 4815
+VPMOVDBZmr 4816
+VPMOVDBZmrk 4817
+VPMOVDBZrr 4818
+VPMOVDBZrrk 4819
+VPMOVDBZrrkz 4820
+VPMOVDWZ 4821
+VPMOVDWZmr 4822
+VPMOVDWZmrk 4823
+VPMOVDWZrr 4824
+VPMOVDWZrrk 4825
+VPMOVDWZrrkz 4826
+VPMOVM 4827
+VPMOVMSKBYrr 4828
+VPMOVMSKBrr 4829
+VPMOVQ 4830
+VPMOVQBZ 4831
+VPMOVQBZmr 4832
+VPMOVQBZmrk 4833
+VPMOVQBZrr 4834
+VPMOVQBZrrk 4835
+VPMOVQBZrrkz 4836
+VPMOVQDZ 4837
+VPMOVQDZmr 4838
+VPMOVQDZmrk 4839
+VPMOVQDZrr 4840
+VPMOVQDZrrk 4841
+VPMOVQDZrrkz 4842
+VPMOVQWZ 4843
+VPMOVQWZmr 4844
+VPMOVQWZmrk 4845
+VPMOVQWZrr 4846
+VPMOVQWZrrk 4847
+VPMOVQWZrrkz 4848
+VPMOVSDBZ 4849
+VPMOVSDBZmr 4850
+VPMOVSDBZmrk 4851
+VPMOVSDBZrr 4852
+VPMOVSDBZrrk 4853
+VPMOVSDBZrrkz 4854
+VPMOVSDWZ 4855
+VPMOVSDWZmr 4856
+VPMOVSDWZmrk 4857
+VPMOVSDWZrr 4858
+VPMOVSDWZrrk 4859
+VPMOVSDWZrrkz 4860
+VPMOVSQBZ 4861
+VPMOVSQBZmr 4862
+VPMOVSQBZmrk 4863
+VPMOVSQBZrr 4864
+VPMOVSQBZrrk 4865
+VPMOVSQBZrrkz 4866
+VPMOVSQDZ 4867
+VPMOVSQDZmr 4868
+VPMOVSQDZmrk 4869
+VPMOVSQDZrr 4870
+VPMOVSQDZrrk 4871
+VPMOVSQDZrrkz 4872
+VPMOVSQWZ 4873
+VPMOVSQWZmr 4874
+VPMOVSQWZmrk 4875
+VPMOVSQWZrr 4876
+VPMOVSQWZrrk 4877
+VPMOVSQWZrrkz 4878
+VPMOVSWBZ 4879
+VPMOVSWBZmr 4880
+VPMOVSWBZmrk 4881
+VPMOVSWBZrr 4882
+VPMOVSWBZrrk 4883
+VPMOVSWBZrrkz 4884
+VPMOVSXBDYrm 4885
+VPMOVSXBDYrr 4886
+VPMOVSXBDZ 4887
+VPMOVSXBDZrm 4888
+VPMOVSXBDZrmk 4889
+VPMOVSXBDZrmkz 4890
+VPMOVSXBDZrr 4891
+VPMOVSXBDZrrk 4892
+VPMOVSXBDZrrkz 4893
+VPMOVSXBDrm 4894
+VPMOVSXBDrr 4895
+VPMOVSXBQYrm 4896
+VPMOVSXBQYrr 4897
+VPMOVSXBQZ 4898
+VPMOVSXBQZrm 4899
+VPMOVSXBQZrmk 4900
+VPMOVSXBQZrmkz 4901
+VPMOVSXBQZrr 4902
+VPMOVSXBQZrrk 4903
+VPMOVSXBQZrrkz 4904
+VPMOVSXBQrm 4905
+VPMOVSXBQrr 4906
+VPMOVSXBWYrm 4907
+VPMOVSXBWYrr 4908
+VPMOVSXBWZ 4909
+VPMOVSXBWZrm 4910
+VPMOVSXBWZrmk 4911
+VPMOVSXBWZrmkz 4912
+VPMOVSXBWZrr 4913
+VPMOVSXBWZrrk 4914
+VPMOVSXBWZrrkz 4915
+VPMOVSXBWrm 4916
+VPMOVSXBWrr 4917
+VPMOVSXDQYrm 4918
+VPMOVSXDQYrr 4919
+VPMOVSXDQZ 4920
+VPMOVSXDQZrm 4921
+VPMOVSXDQZrmk 4922
+VPMOVSXDQZrmkz 4923
+VPMOVSXDQZrr 4924
+VPMOVSXDQZrrk 4925
+VPMOVSXDQZrrkz 4926
+VPMOVSXDQrm 4927
+VPMOVSXDQrr 4928
+VPMOVSXWDYrm 4929
+VPMOVSXWDYrr 4930
+VPMOVSXWDZ 4931
+VPMOVSXWDZrm 4932
+VPMOVSXWDZrmk 4933
+VPMOVSXWDZrmkz 4934
+VPMOVSXWDZrr 4935
+VPMOVSXWDZrrk 4936
+VPMOVSXWDZrrkz 4937
+VPMOVSXWDrm 4938
+VPMOVSXWDrr 4939
+VPMOVSXWQYrm 4940
+VPMOVSXWQYrr 4941
+VPMOVSXWQZ 4942
+VPMOVSXWQZrm 4943
+VPMOVSXWQZrmk 4944
+VPMOVSXWQZrmkz 4945
+VPMOVSXWQZrr 4946
+VPMOVSXWQZrrk 4947
+VPMOVSXWQZrrkz 4948
+VPMOVSXWQrm 4949
+VPMOVSXWQrr 4950
+VPMOVUSDBZ 4951
+VPMOVUSDBZmr 4952
+VPMOVUSDBZmrk 4953
+VPMOVUSDBZrr 4954
+VPMOVUSDBZrrk 4955
+VPMOVUSDBZrrkz 4956
+VPMOVUSDWZ 4957
+VPMOVUSDWZmr 4958
+VPMOVUSDWZmrk 4959
+VPMOVUSDWZrr 4960
+VPMOVUSDWZrrk 4961
+VPMOVUSDWZrrkz 4962
+VPMOVUSQBZ 4963
+VPMOVUSQBZmr 4964
+VPMOVUSQBZmrk 4965
+VPMOVUSQBZrr 4966
+VPMOVUSQBZrrk 4967
+VPMOVUSQBZrrkz 4968
+VPMOVUSQDZ 4969
+VPMOVUSQDZmr 4970
+VPMOVUSQDZmrk 4971
+VPMOVUSQDZrr 4972
+VPMOVUSQDZrrk 4973
+VPMOVUSQDZrrkz 4974
+VPMOVUSQWZ 4975
+VPMOVUSQWZmr 4976
+VPMOVUSQWZmrk 4977
+VPMOVUSQWZrr 4978
+VPMOVUSQWZrrk 4979
+VPMOVUSQWZrrkz 4980
+VPMOVUSWBZ 4981
+VPMOVUSWBZmr 4982
+VPMOVUSWBZmrk 4983
+VPMOVUSWBZrr 4984
+VPMOVUSWBZrrk 4985
+VPMOVUSWBZrrkz 4986
+VPMOVW 4987
+VPMOVWBZ 4988
+VPMOVWBZmr 4989
+VPMOVWBZmrk 4990
+VPMOVWBZrr 4991
+VPMOVWBZrrk 4992
+VPMOVWBZrrkz 4993
+VPMOVZXBDYrm 4994
+VPMOVZXBDYrr 4995
+VPMOVZXBDZ 4996
+VPMOVZXBDZrm 4997
+VPMOVZXBDZrmk 4998
+VPMOVZXBDZrmkz 4999
+VPMOVZXBDZrr 5000
+VPMOVZXBDZrrk 5001
+VPMOVZXBDZrrkz 5002
+VPMOVZXBDrm 5003
+VPMOVZXBDrr 5004
+VPMOVZXBQYrm 5005
+VPMOVZXBQYrr 5006
+VPMOVZXBQZ 5007
+VPMOVZXBQZrm 5008
+VPMOVZXBQZrmk 5009
+VPMOVZXBQZrmkz 5010
+VPMOVZXBQZrr 5011
+VPMOVZXBQZrrk 5012
+VPMOVZXBQZrrkz 5013
+VPMOVZXBQrm 5014
+VPMOVZXBQrr 5015
+VPMOVZXBWYrm 5016
+VPMOVZXBWYrr 5017
+VPMOVZXBWZ 5018
+VPMOVZXBWZrm 5019
+VPMOVZXBWZrmk 5020
+VPMOVZXBWZrmkz 5021
+VPMOVZXBWZrr 5022
+VPMOVZXBWZrrk 5023
+VPMOVZXBWZrrkz 5024
+VPMOVZXBWrm 5025
+VPMOVZXBWrr 5026
+VPMOVZXDQYrm 5027
+VPMOVZXDQYrr 5028
+VPMOVZXDQZ 5029
+VPMOVZXDQZrm 5030
+VPMOVZXDQZrmk 5031
+VPMOVZXDQZrmkz 5032
+VPMOVZXDQZrr 5033
+VPMOVZXDQZrrk 5034
+VPMOVZXDQZrrkz 5035
+VPMOVZXDQrm 5036
+VPMOVZXDQrr 5037
+VPMOVZXWDYrm 5038
+VPMOVZXWDYrr 5039
+VPMOVZXWDZ 5040
+VPMOVZXWDZrm 5041
+VPMOVZXWDZrmk 5042
+VPMOVZXWDZrmkz 5043
+VPMOVZXWDZrr 5044
+VPMOVZXWDZrrk 5045
+VPMOVZXWDZrrkz 5046
+VPMOVZXWDrm 5047
+VPMOVZXWDrr 5048
+VPMOVZXWQYrm 5049
+VPMOVZXWQYrr 5050
+VPMOVZXWQZ 5051
+VPMOVZXWQZrm 5052
+VPMOVZXWQZrmk 5053
+VPMOVZXWQZrmkz 5054
+VPMOVZXWQZrr 5055
+VPMOVZXWQZrrk 5056
+VPMOVZXWQZrrkz 5057
+VPMOVZXWQrm 5058
+VPMOVZXWQrr 5059
+VPMULDQYrm 5060
+VPMULDQYrr 5061
+VPMULDQZ 5062
+VPMULDQZrm 5063
+VPMULDQZrmb 5064
+VPMULDQZrmbk 5065
+VPMULDQZrmbkz 5066
+VPMULDQZrmk 5067
+VPMULDQZrmkz 5068
+VPMULDQZrr 5069
+VPMULDQZrrk 5070
+VPMULDQZrrkz 5071
+VPMULDQrm 5072
+VPMULDQrr 5073
+VPMULHRSWYrm 5074
+VPMULHRSWYrr 5075
+VPMULHRSWZ 5076
+VPMULHRSWZrm 5077
+VPMULHRSWZrmk 5078
+VPMULHRSWZrmkz 5079
+VPMULHRSWZrr 5080
+VPMULHRSWZrrk 5081
+VPMULHRSWZrrkz 5082
+VPMULHRSWrm 5083
+VPMULHRSWrr 5084
+VPMULHUWYrm 5085
+VPMULHUWYrr 5086
+VPMULHUWZ 5087
+VPMULHUWZrm 5088
+VPMULHUWZrmk 5089
+VPMULHUWZrmkz 5090
+VPMULHUWZrr 5091
+VPMULHUWZrrk 5092
+VPMULHUWZrrkz 5093
+VPMULHUWrm 5094
+VPMULHUWrr 5095
+VPMULHWYrm 5096
+VPMULHWYrr 5097
+VPMULHWZ 5098
+VPMULHWZrm 5099
+VPMULHWZrmk 5100
+VPMULHWZrmkz 5101
+VPMULHWZrr 5102
+VPMULHWZrrk 5103
+VPMULHWZrrkz 5104
+VPMULHWrm 5105
+VPMULHWrr 5106
+VPMULLDYrm 5107
+VPMULLDYrr 5108
+VPMULLDZ 5109
+VPMULLDZrm 5110
+VPMULLDZrmb 5111
+VPMULLDZrmbk 5112
+VPMULLDZrmbkz 5113
+VPMULLDZrmk 5114
+VPMULLDZrmkz 5115
+VPMULLDZrr 5116
+VPMULLDZrrk 5117
+VPMULLDZrrkz 5118
+VPMULLDrm 5119
+VPMULLDrr 5120
+VPMULLQZ 5121
+VPMULLQZrm 5122
+VPMULLQZrmb 5123
+VPMULLQZrmbk 5124
+VPMULLQZrmbkz 5125
+VPMULLQZrmk 5126
+VPMULLQZrmkz 5127
+VPMULLQZrr 5128
+VPMULLQZrrk 5129
+VPMULLQZrrkz 5130
+VPMULLWYrm 5131
+VPMULLWYrr 5132
+VPMULLWZ 5133
+VPMULLWZrm 5134
+VPMULLWZrmk 5135
+VPMULLWZrmkz 5136
+VPMULLWZrr 5137
+VPMULLWZrrk 5138
+VPMULLWZrrkz 5139
+VPMULLWrm 5140
+VPMULLWrr 5141
+VPMULTISHIFTQBZ 5142
+VPMULTISHIFTQBZrm 5143
+VPMULTISHIFTQBZrmb 5144
+VPMULTISHIFTQBZrmbk 5145
+VPMULTISHIFTQBZrmbkz 5146
+VPMULTISHIFTQBZrmk 5147
+VPMULTISHIFTQBZrmkz 5148
+VPMULTISHIFTQBZrr 5149
+VPMULTISHIFTQBZrrk 5150
+VPMULTISHIFTQBZrrkz 5151
+VPMULUDQYrm 5152
+VPMULUDQYrr 5153
+VPMULUDQZ 5154
+VPMULUDQZrm 5155
+VPMULUDQZrmb 5156
+VPMULUDQZrmbk 5157
+VPMULUDQZrmbkz 5158
+VPMULUDQZrmk 5159
+VPMULUDQZrmkz 5160
+VPMULUDQZrr 5161
+VPMULUDQZrrk 5162
+VPMULUDQZrrkz 5163
+VPMULUDQrm 5164
+VPMULUDQrr 5165
+VPOPCNTBZ 5166
+VPOPCNTBZrm 5167
+VPOPCNTBZrmk 5168
+VPOPCNTBZrmkz 5169
+VPOPCNTBZrr 5170
+VPOPCNTBZrrk 5171
+VPOPCNTBZrrkz 5172
+VPOPCNTDZ 5173
+VPOPCNTDZrm 5174
+VPOPCNTDZrmb 5175
+VPOPCNTDZrmbk 5176
+VPOPCNTDZrmbkz 5177
+VPOPCNTDZrmk 5178
+VPOPCNTDZrmkz 5179
+VPOPCNTDZrr 5180
+VPOPCNTDZrrk 5181
+VPOPCNTDZrrkz 5182
+VPOPCNTQZ 5183
+VPOPCNTQZrm 5184
+VPOPCNTQZrmb 5185
+VPOPCNTQZrmbk 5186
+VPOPCNTQZrmbkz 5187
+VPOPCNTQZrmk 5188
+VPOPCNTQZrmkz 5189
+VPOPCNTQZrr 5190
+VPOPCNTQZrrk 5191
+VPOPCNTQZrrkz 5192
+VPOPCNTWZ 5193
+VPOPCNTWZrm 5194
+VPOPCNTWZrmk 5195
+VPOPCNTWZrmkz 5196
+VPOPCNTWZrr 5197
+VPOPCNTWZrrk 5198
+VPOPCNTWZrrkz 5199
+VPORDZ 5200
+VPORDZrm 5201
+VPORDZrmb 5202
+VPORDZrmbk 5203
+VPORDZrmbkz 5204
+VPORDZrmk 5205
+VPORDZrmkz 5206
+VPORDZrr 5207
+VPORDZrrk 5208
+VPORDZrrkz 5209
+VPORQZ 5210
+VPORQZrm 5211
+VPORQZrmb 5212
+VPORQZrmbk 5213
+VPORQZrmbkz 5214
+VPORQZrmk 5215
+VPORQZrmkz 5216
+VPORQZrr 5217
+VPORQZrrk 5218
+VPORQZrrkz 5219
+VPORYrm 5220
+VPORYrr 5221
+VPORrm 5222
+VPORrr 5223
+VPPERMrmr 5224
+VPPERMrrm 5225
+VPPERMrrr 5226
+VPPERMrrr_REV 5227
+VPROLDZ 5228
+VPROLDZmbi 5229
+VPROLDZmbik 5230
+VPROLDZmbikz 5231
+VPROLDZmi 5232
+VPROLDZmik 5233
+VPROLDZmikz 5234
+VPROLDZri 5235
+VPROLDZrik 5236
+VPROLDZrikz 5237
+VPROLQZ 5238
+VPROLQZmbi 5239
+VPROLQZmbik 5240
+VPROLQZmbikz 5241
+VPROLQZmi 5242
+VPROLQZmik 5243
+VPROLQZmikz 5244
+VPROLQZri 5245
+VPROLQZrik 5246
+VPROLQZrikz 5247
+VPROLVDZ 5248
+VPROLVDZrm 5249
+VPROLVDZrmb 5250
+VPROLVDZrmbk 5251
+VPROLVDZrmbkz 5252
+VPROLVDZrmk 5253
+VPROLVDZrmkz 5254
+VPROLVDZrr 5255
+VPROLVDZrrk 5256
+VPROLVDZrrkz 5257
+VPROLVQZ 5258
+VPROLVQZrm 5259
+VPROLVQZrmb 5260
+VPROLVQZrmbk 5261
+VPROLVQZrmbkz 5262
+VPROLVQZrmk 5263
+VPROLVQZrmkz 5264
+VPROLVQZrr 5265
+VPROLVQZrrk 5266
+VPROLVQZrrkz 5267
+VPRORDZ 5268
+VPRORDZmbi 5269
+VPRORDZmbik 5270
+VPRORDZmbikz 5271
+VPRORDZmi 5272
+VPRORDZmik 5273
+VPRORDZmikz 5274
+VPRORDZri 5275
+VPRORDZrik 5276
+VPRORDZrikz 5277
+VPRORQZ 5278
+VPRORQZmbi 5279
+VPRORQZmbik 5280
+VPRORQZmbikz 5281
+VPRORQZmi 5282
+VPRORQZmik 5283
+VPRORQZmikz 5284
+VPRORQZri 5285
+VPRORQZrik 5286
+VPRORQZrikz 5287
+VPRORVDZ 5288
+VPRORVDZrm 5289
+VPRORVDZrmb 5290
+VPRORVDZrmbk 5291
+VPRORVDZrmbkz 5292
+VPRORVDZrmk 5293
+VPRORVDZrmkz 5294
+VPRORVDZrr 5295
+VPRORVDZrrk 5296
+VPRORVDZrrkz 5297
+VPRORVQZ 5298
+VPRORVQZrm 5299
+VPRORVQZrmb 5300
+VPRORVQZrmbk 5301
+VPRORVQZrmbkz 5302
+VPRORVQZrmk 5303
+VPRORVQZrmkz 5304
+VPRORVQZrr 5305
+VPRORVQZrrk 5306
+VPRORVQZrrkz 5307
+VPROTBmi 5308
+VPROTBmr 5309
+VPROTBri 5310
+VPROTBrm 5311
+VPROTBrr 5312
+VPROTBrr_REV 5313
+VPROTDmi 5314
+VPROTDmr 5315
+VPROTDri 5316
+VPROTDrm 5317
+VPROTDrr 5318
+VPROTDrr_REV 5319
+VPROTQmi 5320
+VPROTQmr 5321
+VPROTQri 5322
+VPROTQrm 5323
+VPROTQrr 5324
+VPROTQrr_REV 5325
+VPROTWmi 5326
+VPROTWmr 5327
+VPROTWri 5328
+VPROTWrm 5329
+VPROTWrr 5330
+VPROTWrr_REV 5331
+VPSADBWYrm 5332
+VPSADBWYrr 5333
+VPSADBWZ 5334
+VPSADBWZrm 5335
+VPSADBWZrr 5336
+VPSADBWrm 5337
+VPSADBWrr 5338
+VPSCATTERDDZ 5339
+VPSCATTERDDZmr 5340
+VPSCATTERDQZ 5341
+VPSCATTERDQZmr 5342
+VPSCATTERQDZ 5343
+VPSCATTERQDZmr 5344
+VPSCATTERQQZ 5345
+VPSCATTERQQZmr 5346
+VPSHABmr 5347
+VPSHABrm 5348
+VPSHABrr 5349
+VPSHABrr_REV 5350
+VPSHADmr 5351
+VPSHADrm 5352
+VPSHADrr 5353
+VPSHADrr_REV 5354
+VPSHAQmr 5355
+VPSHAQrm 5356
+VPSHAQrr 5357
+VPSHAQrr_REV 5358
+VPSHAWmr 5359
+VPSHAWrm 5360
+VPSHAWrr 5361
+VPSHAWrr_REV 5362
+VPSHLBmr 5363
+VPSHLBrm 5364
+VPSHLBrr 5365
+VPSHLBrr_REV 5366
+VPSHLDDZ 5367
+VPSHLDDZrmbi 5368
+VPSHLDDZrmbik 5369
+VPSHLDDZrmbikz 5370
+VPSHLDDZrmi 5371
+VPSHLDDZrmik 5372
+VPSHLDDZrmikz 5373
+VPSHLDDZrri 5374
+VPSHLDDZrrik 5375
+VPSHLDDZrrikz 5376
+VPSHLDQZ 5377
+VPSHLDQZrmbi 5378
+VPSHLDQZrmbik 5379
+VPSHLDQZrmbikz 5380
+VPSHLDQZrmi 5381
+VPSHLDQZrmik 5382
+VPSHLDQZrmikz 5383
+VPSHLDQZrri 5384
+VPSHLDQZrrik 5385
+VPSHLDQZrrikz 5386
+VPSHLDVDZ 5387
+VPSHLDVDZm 5388
+VPSHLDVDZmb 5389
+VPSHLDVDZmbk 5390
+VPSHLDVDZmbkz 5391
+VPSHLDVDZmk 5392
+VPSHLDVDZmkz 5393
+VPSHLDVDZr 5394
+VPSHLDVDZrk 5395
+VPSHLDVDZrkz 5396
+VPSHLDVQZ 5397
+VPSHLDVQZm 5398
+VPSHLDVQZmb 5399
+VPSHLDVQZmbk 5400
+VPSHLDVQZmbkz 5401
+VPSHLDVQZmk 5402
+VPSHLDVQZmkz 5403
+VPSHLDVQZr 5404
+VPSHLDVQZrk 5405
+VPSHLDVQZrkz 5406
+VPSHLDVWZ 5407
+VPSHLDVWZm 5408
+VPSHLDVWZmk 5409
+VPSHLDVWZmkz 5410
+VPSHLDVWZr 5411
+VPSHLDVWZrk 5412
+VPSHLDVWZrkz 5413
+VPSHLDWZ 5414
+VPSHLDWZrmi 5415
+VPSHLDWZrmik 5416
+VPSHLDWZrmikz 5417
+VPSHLDWZrri 5418
+VPSHLDWZrrik 5419
+VPSHLDWZrrikz 5420
+VPSHLDmr 5421
+VPSHLDrm 5422
+VPSHLDrr 5423
+VPSHLDrr_REV 5424
+VPSHLQmr 5425
+VPSHLQrm 5426
+VPSHLQrr 5427
+VPSHLQrr_REV 5428
+VPSHLWmr 5429
+VPSHLWrm 5430
+VPSHLWrr 5431
+VPSHLWrr_REV 5432
+VPSHRDDZ 5433
+VPSHRDDZrmbi 5434
+VPSHRDDZrmbik 5435
+VPSHRDDZrmbikz 5436
+VPSHRDDZrmi 5437
+VPSHRDDZrmik 5438
+VPSHRDDZrmikz 5439
+VPSHRDDZrri 5440
+VPSHRDDZrrik 5441
+VPSHRDDZrrikz 5442
+VPSHRDQZ 5443
+VPSHRDQZrmbi 5444
+VPSHRDQZrmbik 5445
+VPSHRDQZrmbikz 5446
+VPSHRDQZrmi 5447
+VPSHRDQZrmik 5448
+VPSHRDQZrmikz 5449
+VPSHRDQZrri 5450
+VPSHRDQZrrik 5451
+VPSHRDQZrrikz 5452
+VPSHRDVDZ 5453
+VPSHRDVDZm 5454
+VPSHRDVDZmb 5455
+VPSHRDVDZmbk 5456
+VPSHRDVDZmbkz 5457
+VPSHRDVDZmk 5458
+VPSHRDVDZmkz 5459
+VPSHRDVDZr 5460
+VPSHRDVDZrk 5461
+VPSHRDVDZrkz 5462
+VPSHRDVQZ 5463
+VPSHRDVQZm 5464
+VPSHRDVQZmb 5465
+VPSHRDVQZmbk 5466
+VPSHRDVQZmbkz 5467
+VPSHRDVQZmk 5468
+VPSHRDVQZmkz 5469
+VPSHRDVQZr 5470
+VPSHRDVQZrk 5471
+VPSHRDVQZrkz 5472
+VPSHRDVWZ 5473
+VPSHRDVWZm 5474
+VPSHRDVWZmk 5475
+VPSHRDVWZmkz 5476
+VPSHRDVWZr 5477
+VPSHRDVWZrk 5478
+VPSHRDVWZrkz 5479
+VPSHRDWZ 5480
+VPSHRDWZrmi 5481
+VPSHRDWZrmik 5482
+VPSHRDWZrmikz 5483
+VPSHRDWZrri 5484
+VPSHRDWZrrik 5485
+VPSHRDWZrrikz 5486
+VPSHUFBITQMBZ 5487
+VPSHUFBITQMBZrm 5488
+VPSHUFBITQMBZrmk 5489
+VPSHUFBITQMBZrr 5490
+VPSHUFBITQMBZrrk 5491
+VPSHUFBYrm 5492
+VPSHUFBYrr 5493
+VPSHUFBZ 5494
+VPSHUFBZrm 5495
+VPSHUFBZrmk 5496
+VPSHUFBZrmkz 5497
+VPSHUFBZrr 5498
+VPSHUFBZrrk 5499
+VPSHUFBZrrkz 5500
+VPSHUFBrm 5501
+VPSHUFBrr 5502
+VPSHUFDYmi 5503
+VPSHUFDYri 5504
+VPSHUFDZ 5505
+VPSHUFDZmbi 5506
+VPSHUFDZmbik 5507
+VPSHUFDZmbikz 5508
+VPSHUFDZmi 5509
+VPSHUFDZmik 5510
+VPSHUFDZmikz 5511
+VPSHUFDZri 5512
+VPSHUFDZrik 5513
+VPSHUFDZrikz 5514
+VPSHUFDmi 5515
+VPSHUFDri 5516
+VPSHUFHWYmi 5517
+VPSHUFHWYri 5518
+VPSHUFHWZ 5519
+VPSHUFHWZmi 5520
+VPSHUFHWZmik 5521
+VPSHUFHWZmikz 5522
+VPSHUFHWZri 5523
+VPSHUFHWZrik 5524
+VPSHUFHWZrikz 5525
+VPSHUFHWmi 5526
+VPSHUFHWri 5527
+VPSHUFLWYmi 5528
+VPSHUFLWYri 5529
+VPSHUFLWZ 5530
+VPSHUFLWZmi 5531
+VPSHUFLWZmik 5532
+VPSHUFLWZmikz 5533
+VPSHUFLWZri 5534
+VPSHUFLWZrik 5535
+VPSHUFLWZrikz 5536
+VPSHUFLWmi 5537
+VPSHUFLWri 5538
+VPSIGNBYrm 5539
+VPSIGNBYrr 5540
+VPSIGNBrm 5541
+VPSIGNBrr 5542
+VPSIGNDYrm 5543
+VPSIGNDYrr 5544
+VPSIGNDrm 5545
+VPSIGNDrr 5546
+VPSIGNWYrm 5547
+VPSIGNWYrr 5548
+VPSIGNWrm 5549
+VPSIGNWrr 5550
+VPSLLDQYri 5551
+VPSLLDQZ 5552
+VPSLLDQZmi 5553
+VPSLLDQZri 5554
+VPSLLDQri 5555
+VPSLLDYri 5556
+VPSLLDYrm 5557
+VPSLLDYrr 5558
+VPSLLDZ 5559
+VPSLLDZmbi 5560
+VPSLLDZmbik 5561
+VPSLLDZmbikz 5562
+VPSLLDZmi 5563
+VPSLLDZmik 5564
+VPSLLDZmikz 5565
+VPSLLDZri 5566
+VPSLLDZrik 5567
+VPSLLDZrikz 5568
+VPSLLDZrm 5569
+VPSLLDZrmk 5570
+VPSLLDZrmkz 5571
+VPSLLDZrr 5572
+VPSLLDZrrk 5573
+VPSLLDZrrkz 5574
+VPSLLDri 5575
+VPSLLDrm 5576
+VPSLLDrr 5577
+VPSLLQYri 5578
+VPSLLQYrm 5579
+VPSLLQYrr 5580
+VPSLLQZ 5581
+VPSLLQZmbi 5582
+VPSLLQZmbik 5583
+VPSLLQZmbikz 5584
+VPSLLQZmi 5585
+VPSLLQZmik 5586
+VPSLLQZmikz 5587
+VPSLLQZri 5588
+VPSLLQZrik 5589
+VPSLLQZrikz 5590
+VPSLLQZrm 5591
+VPSLLQZrmk 5592
+VPSLLQZrmkz 5593
+VPSLLQZrr 5594
+VPSLLQZrrk 5595
+VPSLLQZrrkz 5596
+VPSLLQri 5597
+VPSLLQrm 5598
+VPSLLQrr 5599
+VPSLLVDYrm 5600
+VPSLLVDYrr 5601
+VPSLLVDZ 5602
+VPSLLVDZrm 5603
+VPSLLVDZrmb 5604
+VPSLLVDZrmbk 5605
+VPSLLVDZrmbkz 5606
+VPSLLVDZrmk 5607
+VPSLLVDZrmkz 5608
+VPSLLVDZrr 5609
+VPSLLVDZrrk 5610
+VPSLLVDZrrkz 5611
+VPSLLVDrm 5612
+VPSLLVDrr 5613
+VPSLLVQYrm 5614
+VPSLLVQYrr 5615
+VPSLLVQZ 5616
+VPSLLVQZrm 5617
+VPSLLVQZrmb 5618
+VPSLLVQZrmbk 5619
+VPSLLVQZrmbkz 5620
+VPSLLVQZrmk 5621
+VPSLLVQZrmkz 5622
+VPSLLVQZrr 5623
+VPSLLVQZrrk 5624
+VPSLLVQZrrkz 5625
+VPSLLVQrm 5626
+VPSLLVQrr 5627
+VPSLLVWZ 5628
+VPSLLVWZrm 5629
+VPSLLVWZrmk 5630
+VPSLLVWZrmkz 5631
+VPSLLVWZrr 5632
+VPSLLVWZrrk 5633
+VPSLLVWZrrkz 5634
+VPSLLWYri 5635
+VPSLLWYrm 5636
+VPSLLWYrr 5637
+VPSLLWZ 5638
+VPSLLWZmi 5639
+VPSLLWZmik 5640
+VPSLLWZmikz 5641
+VPSLLWZri 5642
+VPSLLWZrik 5643
+VPSLLWZrikz 5644
+VPSLLWZrm 5645
+VPSLLWZrmk 5646
+VPSLLWZrmkz 5647
+VPSLLWZrr 5648
+VPSLLWZrrk 5649
+VPSLLWZrrkz 5650
+VPSLLWri 5651
+VPSLLWrm 5652
+VPSLLWrr 5653
+VPSRADYri 5654
+VPSRADYrm 5655
+VPSRADYrr 5656
+VPSRADZ 5657
+VPSRADZmbi 5658
+VPSRADZmbik 5659
+VPSRADZmbikz 5660
+VPSRADZmi 5661
+VPSRADZmik 5662
+VPSRADZmikz 5663
+VPSRADZri 5664
+VPSRADZrik 5665
+VPSRADZrikz 5666
+VPSRADZrm 5667
+VPSRADZrmk 5668
+VPSRADZrmkz 5669
+VPSRADZrr 5670
+VPSRADZrrk 5671
+VPSRADZrrkz 5672
+VPSRADri 5673
+VPSRADrm 5674
+VPSRADrr 5675
+VPSRAQZ 5676
+VPSRAQZmbi 5677
+VPSRAQZmbik 5678
+VPSRAQZmbikz 5679
+VPSRAQZmi 5680
+VPSRAQZmik 5681
+VPSRAQZmikz 5682
+VPSRAQZri 5683
+VPSRAQZrik 5684
+VPSRAQZrikz 5685
+VPSRAQZrm 5686
+VPSRAQZrmk 5687
+VPSRAQZrmkz 5688
+VPSRAQZrr 5689
+VPSRAQZrrk 5690
+VPSRAQZrrkz 5691
+VPSRAVDYrm 5692
+VPSRAVDYrr 5693
+VPSRAVDZ 5694
+VPSRAVDZrm 5695
+VPSRAVDZrmb 5696
+VPSRAVDZrmbk 5697
+VPSRAVDZrmbkz 5698
+VPSRAVDZrmk 5699
+VPSRAVDZrmkz 5700
+VPSRAVDZrr 5701
+VPSRAVDZrrk 5702
+VPSRAVDZrrkz 5703
+VPSRAVDrm 5704
+VPSRAVDrr 5705
+VPSRAVQZ 5706
+VPSRAVQZrm 5707
+VPSRAVQZrmb 5708
+VPSRAVQZrmbk 5709
+VPSRAVQZrmbkz 5710
+VPSRAVQZrmk 5711
+VPSRAVQZrmkz 5712
+VPSRAVQZrr 5713
+VPSRAVQZrrk 5714
+VPSRAVQZrrkz 5715
+VPSRAVWZ 5716
+VPSRAVWZrm 5717
+VPSRAVWZrmk 5718
+VPSRAVWZrmkz 5719
+VPSRAVWZrr 5720
+VPSRAVWZrrk 5721
+VPSRAVWZrrkz 5722
+VPSRAWYri 5723
+VPSRAWYrm 5724
+VPSRAWYrr 5725
+VPSRAWZ 5726
+VPSRAWZmi 5727
+VPSRAWZmik 5728
+VPSRAWZmikz 5729
+VPSRAWZri 5730
+VPSRAWZrik 5731
+VPSRAWZrikz 5732
+VPSRAWZrm 5733
+VPSRAWZrmk 5734
+VPSRAWZrmkz 5735
+VPSRAWZrr 5736
+VPSRAWZrrk 5737
+VPSRAWZrrkz 5738
+VPSRAWri 5739
+VPSRAWrm 5740
+VPSRAWrr 5741
+VPSRLDQYri 5742
+VPSRLDQZ 5743
+VPSRLDQZmi 5744
+VPSRLDQZri 5745
+VPSRLDQri 5746
+VPSRLDYri 5747
+VPSRLDYrm 5748
+VPSRLDYrr 5749
+VPSRLDZ 5750
+VPSRLDZmbi 5751
+VPSRLDZmbik 5752
+VPSRLDZmbikz 5753
+VPSRLDZmi 5754
+VPSRLDZmik 5755
+VPSRLDZmikz 5756
+VPSRLDZri 5757
+VPSRLDZrik 5758
+VPSRLDZrikz 5759
+VPSRLDZrm 5760
+VPSRLDZrmk 5761
+VPSRLDZrmkz 5762
+VPSRLDZrr 5763
+VPSRLDZrrk 5764
+VPSRLDZrrkz 5765
+VPSRLDri 5766
+VPSRLDrm 5767
+VPSRLDrr 5768
+VPSRLQYri 5769
+VPSRLQYrm 5770
+VPSRLQYrr 5771
+VPSRLQZ 5772
+VPSRLQZmbi 5773
+VPSRLQZmbik 5774
+VPSRLQZmbikz 5775
+VPSRLQZmi 5776
+VPSRLQZmik 5777
+VPSRLQZmikz 5778
+VPSRLQZri 5779
+VPSRLQZrik 5780
+VPSRLQZrikz 5781
+VPSRLQZrm 5782
+VPSRLQZrmk 5783
+VPSRLQZrmkz 5784
+VPSRLQZrr 5785
+VPSRLQZrrk 5786
+VPSRLQZrrkz 5787
+VPSRLQri 5788
+VPSRLQrm 5789
+VPSRLQrr 5790
+VPSRLVDYrm 5791
+VPSRLVDYrr 5792
+VPSRLVDZ 5793
+VPSRLVDZrm 5794
+VPSRLVDZrmb 5795
+VPSRLVDZrmbk 5796
+VPSRLVDZrmbkz 5797
+VPSRLVDZrmk 5798
+VPSRLVDZrmkz 5799
+VPSRLVDZrr 5800
+VPSRLVDZrrk 5801
+VPSRLVDZrrkz 5802
+VPSRLVDrm 5803
+VPSRLVDrr 5804
+VPSRLVQYrm 5805
+VPSRLVQYrr 5806
+VPSRLVQZ 5807
+VPSRLVQZrm 5808
+VPSRLVQZrmb 5809
+VPSRLVQZrmbk 5810
+VPSRLVQZrmbkz 5811
+VPSRLVQZrmk 5812
+VPSRLVQZrmkz 5813
+VPSRLVQZrr 5814
+VPSRLVQZrrk 5815
+VPSRLVQZrrkz 5816
+VPSRLVQrm 5817
+VPSRLVQrr 5818
+VPSRLVWZ 5819
+VPSRLVWZrm 5820
+VPSRLVWZrmk 5821
+VPSRLVWZrmkz 5822
+VPSRLVWZrr 5823
+VPSRLVWZrrk 5824
+VPSRLVWZrrkz 5825
+VPSRLWYri 5826
+VPSRLWYrm 5827
+VPSRLWYrr 5828
+VPSRLWZ 5829
+VPSRLWZmi 5830
+VPSRLWZmik 5831
+VPSRLWZmikz 5832
+VPSRLWZri 5833
+VPSRLWZrik 5834
+VPSRLWZrikz 5835
+VPSRLWZrm 5836
+VPSRLWZrmk 5837
+VPSRLWZrmkz 5838
+VPSRLWZrr 5839
+VPSRLWZrrk 5840
+VPSRLWZrrkz 5841
+VPSRLWri 5842
+VPSRLWrm 5843
+VPSRLWrr 5844
+VPSUBBYrm 5845
+VPSUBBYrr 5846
+VPSUBBZ 5847
+VPSUBBZrm 5848
+VPSUBBZrmk 5849
+VPSUBBZrmkz 5850
+VPSUBBZrr 5851
+VPSUBBZrrk 5852
+VPSUBBZrrkz 5853
+VPSUBBrm 5854
+VPSUBBrr 5855
+VPSUBDYrm 5856
+VPSUBDYrr 5857
+VPSUBDZ 5858
+VPSUBDZrm 5859
+VPSUBDZrmb 5860
+VPSUBDZrmbk 5861
+VPSUBDZrmbkz 5862
+VPSUBDZrmk 5863
+VPSUBDZrmkz 5864
+VPSUBDZrr 5865
+VPSUBDZrrk 5866
+VPSUBDZrrkz 5867
+VPSUBDrm 5868
+VPSUBDrr 5869
+VPSUBQYrm 5870
+VPSUBQYrr 5871
+VPSUBQZ 5872
+VPSUBQZrm 5873
+VPSUBQZrmb 5874
+VPSUBQZrmbk 5875
+VPSUBQZrmbkz 5876
+VPSUBQZrmk 5877
+VPSUBQZrmkz 5878
+VPSUBQZrr 5879
+VPSUBQZrrk 5880
+VPSUBQZrrkz 5881
+VPSUBQrm 5882
+VPSUBQrr 5883
+VPSUBSBYrm 5884
+VPSUBSBYrr 5885
+VPSUBSBZ 5886
+VPSUBSBZrm 5887
+VPSUBSBZrmk 5888
+VPSUBSBZrmkz 5889
+VPSUBSBZrr 5890
+VPSUBSBZrrk 5891
+VPSUBSBZrrkz 5892
+VPSUBSBrm 5893
+VPSUBSBrr 5894
+VPSUBSWYrm 5895
+VPSUBSWYrr 5896
+VPSUBSWZ 5897
+VPSUBSWZrm 5898
+VPSUBSWZrmk 5899
+VPSUBSWZrmkz 5900
+VPSUBSWZrr 5901
+VPSUBSWZrrk 5902
+VPSUBSWZrrkz 5903
+VPSUBSWrm 5904
+VPSUBSWrr 5905
+VPSUBUSBYrm 5906
+VPSUBUSBYrr 5907
+VPSUBUSBZ 5908
+VPSUBUSBZrm 5909
+VPSUBUSBZrmk 5910
+VPSUBUSBZrmkz 5911
+VPSUBUSBZrr 5912
+VPSUBUSBZrrk 5913
+VPSUBUSBZrrkz 5914
+VPSUBUSBrm 5915
+VPSUBUSBrr 5916
+VPSUBUSWYrm 5917
+VPSUBUSWYrr 5918
+VPSUBUSWZ 5919
+VPSUBUSWZrm 5920
+VPSUBUSWZrmk 5921
+VPSUBUSWZrmkz 5922
+VPSUBUSWZrr 5923
+VPSUBUSWZrrk 5924
+VPSUBUSWZrrkz 5925
+VPSUBUSWrm 5926
+VPSUBUSWrr 5927
+VPSUBWYrm 5928
+VPSUBWYrr 5929
+VPSUBWZ 5930
+VPSUBWZrm 5931
+VPSUBWZrmk 5932
+VPSUBWZrmkz 5933
+VPSUBWZrr 5934
+VPSUBWZrrk 5935
+VPSUBWZrrkz 5936
+VPSUBWrm 5937
+VPSUBWrr 5938
+VPTERNLOGDZ 5939
+VPTERNLOGDZrmbi 5940
+VPTERNLOGDZrmbik 5941
+VPTERNLOGDZrmbikz 5942
+VPTERNLOGDZrmi 5943
+VPTERNLOGDZrmik 5944
+VPTERNLOGDZrmikz 5945
+VPTERNLOGDZrri 5946
+VPTERNLOGDZrrik 5947
+VPTERNLOGDZrrikz 5948
+VPTERNLOGQZ 5949
+VPTERNLOGQZrmbi 5950
+VPTERNLOGQZrmbik 5951
+VPTERNLOGQZrmbikz 5952
+VPTERNLOGQZrmi 5953
+VPTERNLOGQZrmik 5954
+VPTERNLOGQZrmikz 5955
+VPTERNLOGQZrri 5956
+VPTERNLOGQZrrik 5957
+VPTERNLOGQZrrikz 5958
+VPTESTMBZ 5959
+VPTESTMBZrm 5960
+VPTESTMBZrmk 5961
+VPTESTMBZrr 5962
+VPTESTMBZrrk 5963
+VPTESTMDZ 5964
+VPTESTMDZrm 5965
+VPTESTMDZrmb 5966
+VPTESTMDZrmbk 5967
+VPTESTMDZrmk 5968
+VPTESTMDZrr 5969
+VPTESTMDZrrk 5970
+VPTESTMQZ 5971
+VPTESTMQZrm 5972
+VPTESTMQZrmb 5973
+VPTESTMQZrmbk 5974
+VPTESTMQZrmk 5975
+VPTESTMQZrr 5976
+VPTESTMQZrrk 5977
+VPTESTMWZ 5978
+VPTESTMWZrm 5979
+VPTESTMWZrmk 5980
+VPTESTMWZrr 5981
+VPTESTMWZrrk 5982
+VPTESTNMBZ 5983
+VPTESTNMBZrm 5984
+VPTESTNMBZrmk 5985
+VPTESTNMBZrr 5986
+VPTESTNMBZrrk 5987
+VPTESTNMDZ 5988
+VPTESTNMDZrm 5989
+VPTESTNMDZrmb 5990
+VPTESTNMDZrmbk 5991
+VPTESTNMDZrmk 5992
+VPTESTNMDZrr 5993
+VPTESTNMDZrrk 5994
+VPTESTNMQZ 5995
+VPTESTNMQZrm 5996
+VPTESTNMQZrmb 5997
+VPTESTNMQZrmbk 5998
+VPTESTNMQZrmk 5999
+VPTESTNMQZrr 6000
+VPTESTNMQZrrk 6001
+VPTESTNMWZ 6002
+VPTESTNMWZrm 6003
+VPTESTNMWZrmk 6004
+VPTESTNMWZrr 6005
+VPTESTNMWZrrk 6006
+VPTESTYrm 6007
+VPTESTYrr 6008
+VPTESTrm 6009
+VPTESTrr 6010
+VPUNPCKHBWYrm 6011
+VPUNPCKHBWYrr 6012
+VPUNPCKHBWZ 6013
+VPUNPCKHBWZrm 6014
+VPUNPCKHBWZrmk 6015
+VPUNPCKHBWZrmkz 6016
+VPUNPCKHBWZrr 6017
+VPUNPCKHBWZrrk 6018
+VPUNPCKHBWZrrkz 6019
+VPUNPCKHBWrm 6020
+VPUNPCKHBWrr 6021
+VPUNPCKHDQYrm 6022
+VPUNPCKHDQYrr 6023
+VPUNPCKHDQZ 6024
+VPUNPCKHDQZrm 6025
+VPUNPCKHDQZrmb 6026
+VPUNPCKHDQZrmbk 6027
+VPUNPCKHDQZrmbkz 6028
+VPUNPCKHDQZrmk 6029
+VPUNPCKHDQZrmkz 6030
+VPUNPCKHDQZrr 6031
+VPUNPCKHDQZrrk 6032
+VPUNPCKHDQZrrkz 6033
+VPUNPCKHDQrm 6034
+VPUNPCKHDQrr 6035
+VPUNPCKHQDQYrm 6036
+VPUNPCKHQDQYrr 6037
+VPUNPCKHQDQZ 6038
+VPUNPCKHQDQZrm 6039
+VPUNPCKHQDQZrmb 6040
+VPUNPCKHQDQZrmbk 6041
+VPUNPCKHQDQZrmbkz 6042
+VPUNPCKHQDQZrmk 6043
+VPUNPCKHQDQZrmkz 6044
+VPUNPCKHQDQZrr 6045
+VPUNPCKHQDQZrrk 6046
+VPUNPCKHQDQZrrkz 6047
+VPUNPCKHQDQrm 6048
+VPUNPCKHQDQrr 6049
+VPUNPCKHWDYrm 6050
+VPUNPCKHWDYrr 6051
+VPUNPCKHWDZ 6052
+VPUNPCKHWDZrm 6053
+VPUNPCKHWDZrmk 6054
+VPUNPCKHWDZrmkz 6055
+VPUNPCKHWDZrr 6056
+VPUNPCKHWDZrrk 6057
+VPUNPCKHWDZrrkz 6058
+VPUNPCKHWDrm 6059
+VPUNPCKHWDrr 6060
+VPUNPCKLBWYrm 6061
+VPUNPCKLBWYrr 6062
+VPUNPCKLBWZ 6063
+VPUNPCKLBWZrm 6064
+VPUNPCKLBWZrmk 6065
+VPUNPCKLBWZrmkz 6066
+VPUNPCKLBWZrr 6067
+VPUNPCKLBWZrrk 6068
+VPUNPCKLBWZrrkz 6069
+VPUNPCKLBWrm 6070
+VPUNPCKLBWrr 6071
+VPUNPCKLDQYrm 6072
+VPUNPCKLDQYrr 6073
+VPUNPCKLDQZ 6074
+VPUNPCKLDQZrm 6075
+VPUNPCKLDQZrmb 6076
+VPUNPCKLDQZrmbk 6077
+VPUNPCKLDQZrmbkz 6078
+VPUNPCKLDQZrmk 6079
+VPUNPCKLDQZrmkz 6080
+VPUNPCKLDQZrr 6081
+VPUNPCKLDQZrrk 6082
+VPUNPCKLDQZrrkz 6083
+VPUNPCKLDQrm 6084
+VPUNPCKLDQrr 6085
+VPUNPCKLQDQYrm 6086
+VPUNPCKLQDQYrr 6087
+VPUNPCKLQDQZ 6088
+VPUNPCKLQDQZrm 6089
+VPUNPCKLQDQZrmb 6090
+VPUNPCKLQDQZrmbk 6091
+VPUNPCKLQDQZrmbkz 6092
+VPUNPCKLQDQZrmk 6093
+VPUNPCKLQDQZrmkz 6094
+VPUNPCKLQDQZrr 6095
+VPUNPCKLQDQZrrk 6096
+VPUNPCKLQDQZrrkz 6097
+VPUNPCKLQDQrm 6098
+VPUNPCKLQDQrr 6099
+VPUNPCKLWDYrm 6100
+VPUNPCKLWDYrr 6101
+VPUNPCKLWDZ 6102
+VPUNPCKLWDZrm 6103
+VPUNPCKLWDZrmk 6104
+VPUNPCKLWDZrmkz 6105
+VPUNPCKLWDZrr 6106
+VPUNPCKLWDZrrk 6107
+VPUNPCKLWDZrrkz 6108
+VPUNPCKLWDrm 6109
+VPUNPCKLWDrr 6110
+VPXORDZ 6111
+VPXORDZrm 6112
+VPXORDZrmb 6113
+VPXORDZrmbk 6114
+VPXORDZrmbkz 6115
+VPXORDZrmk 6116
+VPXORDZrmkz 6117
+VPXORDZrr 6118
+VPXORDZrrk 6119
+VPXORDZrrkz 6120
+VPXORQZ 6121
+VPXORQZrm 6122
+VPXORQZrmb 6123
+VPXORQZrmbk 6124
+VPXORQZrmbkz 6125
+VPXORQZrmk 6126
+VPXORQZrmkz 6127
+VPXORQZrr 6128
+VPXORQZrrk 6129
+VPXORQZrrkz 6130
+VPXORYrm 6131
+VPXORYrr 6132
+VPXORrm 6133
+VPXORrr 6134
+VRANGEPDZ 6135
+VRANGEPDZrmbi 6136
+VRANGEPDZrmbik 6137
+VRANGEPDZrmbikz 6138
+VRANGEPDZrmi 6139
+VRANGEPDZrmik 6140
+VRANGEPDZrmikz 6141
+VRANGEPDZrri 6142
+VRANGEPDZrrib 6143
+VRANGEPDZrribk 6144
+VRANGEPDZrribkz 6145
+VRANGEPDZrrik 6146
+VRANGEPDZrrikz 6147
+VRANGEPSZ 6148
+VRANGEPSZrmbi 6149
+VRANGEPSZrmbik 6150
+VRANGEPSZrmbikz 6151
+VRANGEPSZrmi 6152
+VRANGEPSZrmik 6153
+VRANGEPSZrmikz 6154
+VRANGEPSZrri 6155
+VRANGEPSZrrib 6156
+VRANGEPSZrribk 6157
+VRANGEPSZrribkz 6158
+VRANGEPSZrrik 6159
+VRANGEPSZrrikz 6160
+VRANGESDZrmi 6161
+VRANGESDZrmik 6162
+VRANGESDZrmikz 6163
+VRANGESDZrri 6164
+VRANGESDZrrib 6165
+VRANGESDZrribk 6166
+VRANGESDZrribkz 6167
+VRANGESDZrrik 6168
+VRANGESDZrrikz 6169
+VRANGESSZrmi 6170
+VRANGESSZrmik 6171
+VRANGESSZrmikz 6172
+VRANGESSZrri 6173
+VRANGESSZrrib 6174
+VRANGESSZrribk 6175
+VRANGESSZrribkz 6176
+VRANGESSZrrik 6177
+VRANGESSZrrikz 6178
+VRCP 6179
+VRCPBF 6180
+VRCPPHZ 6181
+VRCPPHZm 6182
+VRCPPHZmb 6183
+VRCPPHZmbk 6184
+VRCPPHZmbkz 6185
+VRCPPHZmk 6186
+VRCPPHZmkz 6187
+VRCPPHZr 6188
+VRCPPHZrk 6189
+VRCPPHZrkz 6190
+VRCPPSYm 6191
+VRCPPSYr 6192
+VRCPPSm 6193
+VRCPPSr 6194
+VRCPSHZrm 6195
+VRCPSHZrmk 6196
+VRCPSHZrmkz 6197
+VRCPSHZrr 6198
+VRCPSHZrrk 6199
+VRCPSHZrrkz 6200
+VRCPSSm 6201
+VRCPSSm_Int 6202
+VRCPSSr 6203
+VRCPSSr_Int 6204
+VREDUCEBF 6205
+VREDUCEPDZ 6206
+VREDUCEPDZrmbi 6207
+VREDUCEPDZrmbik 6208
+VREDUCEPDZrmbikz 6209
+VREDUCEPDZrmi 6210
+VREDUCEPDZrmik 6211
+VREDUCEPDZrmikz 6212
+VREDUCEPDZrri 6213
+VREDUCEPDZrrib 6214
+VREDUCEPDZrribk 6215
+VREDUCEPDZrribkz 6216
+VREDUCEPDZrrik 6217
+VREDUCEPDZrrikz 6218
+VREDUCEPHZ 6219
+VREDUCEPHZrmbi 6220
+VREDUCEPHZrmbik 6221
+VREDUCEPHZrmbikz 6222
+VREDUCEPHZrmi 6223
+VREDUCEPHZrmik 6224
+VREDUCEPHZrmikz 6225
+VREDUCEPHZrri 6226
+VREDUCEPHZrrib 6227
+VREDUCEPHZrribk 6228
+VREDUCEPHZrribkz 6229
+VREDUCEPHZrrik 6230
+VREDUCEPHZrrikz 6231
+VREDUCEPSZ 6232
+VREDUCEPSZrmbi 6233
+VREDUCEPSZrmbik 6234
+VREDUCEPSZrmbikz 6235
+VREDUCEPSZrmi 6236
+VREDUCEPSZrmik 6237
+VREDUCEPSZrmikz 6238
+VREDUCEPSZrri 6239
+VREDUCEPSZrrib 6240
+VREDUCEPSZrribk 6241
+VREDUCEPSZrribkz 6242
+VREDUCEPSZrrik 6243
+VREDUCEPSZrrikz 6244
+VREDUCESDZrmi 6245
+VREDUCESDZrmik 6246
+VREDUCESDZrmikz 6247
+VREDUCESDZrri 6248
+VREDUCESDZrrib 6249
+VREDUCESDZrribk 6250
+VREDUCESDZrribkz 6251
+VREDUCESDZrrik 6252
+VREDUCESDZrrikz 6253
+VREDUCESHZrmi 6254
+VREDUCESHZrmik 6255
+VREDUCESHZrmikz 6256
+VREDUCESHZrri 6257
+VREDUCESHZrrib 6258
+VREDUCESHZrribk 6259
+VREDUCESHZrribkz 6260
+VREDUCESHZrrik 6261
+VREDUCESHZrrikz 6262
+VREDUCESSZrmi 6263
+VREDUCESSZrmik 6264
+VREDUCESSZrmikz 6265
+VREDUCESSZrri 6266
+VREDUCESSZrrib 6267
+VREDUCESSZrribk 6268
+VREDUCESSZrribkz 6269
+VREDUCESSZrrik 6270
+VREDUCESSZrrikz 6271
+VRNDSCALEBF 6272
+VRNDSCALEPDZ 6273
+VRNDSCALEPDZrmbi 6274
+VRNDSCALEPDZrmbik 6275
+VRNDSCALEPDZrmbikz 6276
+VRNDSCALEPDZrmi 6277
+VRNDSCALEPDZrmik 6278
+VRNDSCALEPDZrmikz 6279
+VRNDSCALEPDZrri 6280
+VRNDSCALEPDZrrib 6281
+VRNDSCALEPDZrribk 6282
+VRNDSCALEPDZrribkz 6283
+VRNDSCALEPDZrrik 6284
+VRNDSCALEPDZrrikz 6285
+VRNDSCALEPHZ 6286
+VRNDSCALEPHZrmbi 6287
+VRNDSCALEPHZrmbik 6288
+VRNDSCALEPHZrmbikz 6289
+VRNDSCALEPHZrmi 6290
+VRNDSCALEPHZrmik 6291
+VRNDSCALEPHZrmikz 6292
+VRNDSCALEPHZrri 6293
+VRNDSCALEPHZrrib 6294
+VRNDSCALEPHZrribk 6295
+VRNDSCALEPHZrribkz 6296
+VRNDSCALEPHZrrik 6297
+VRNDSCALEPHZrrikz 6298
+VRNDSCALEPSZ 6299
+VRNDSCALEPSZrmbi 6300
+VRNDSCALEPSZrmbik 6301
+VRNDSCALEPSZrmbikz 6302
+VRNDSCALEPSZrmi 6303
+VRNDSCALEPSZrmik 6304
+VRNDSCALEPSZrmikz 6305
+VRNDSCALEPSZrri 6306
+VRNDSCALEPSZrrib 6307
+VRNDSCALEPSZrribk 6308
+VRNDSCALEPSZrribkz 6309
+VRNDSCALEPSZrrik 6310
+VRNDSCALEPSZrrikz 6311
+VRNDSCALESDZrmi 6312
+VRNDSCALESDZrmi_Int 6313
+VRNDSCALESDZrmik_Int 6314
+VRNDSCALESDZrmikz_Int 6315
+VRNDSCALESDZrri 6316
+VRNDSCALESDZrri_Int 6317
+VRNDSCALESDZrrib_Int 6318
+VRNDSCALESDZrribk_Int 6319
+VRNDSCALESDZrribkz_Int 6320
+VRNDSCALESDZrrik_Int 6321
+VRNDSCALESDZrrikz_Int 6322
+VRNDSCALESHZrmi 6323
+VRNDSCALESHZrmi_Int 6324
+VRNDSCALESHZrmik_Int 6325
+VRNDSCALESHZrmikz_Int 6326
+VRNDSCALESHZrri 6327
+VRNDSCALESHZrri_Int 6328
+VRNDSCALESHZrrib_Int 6329
+VRNDSCALESHZrribk_Int 6330
+VRNDSCALESHZrribkz_Int 6331
+VRNDSCALESHZrrik_Int 6332
+VRNDSCALESHZrrikz_Int 6333
+VRNDSCALESSZrmi 6334
+VRNDSCALESSZrmi_Int 6335
+VRNDSCALESSZrmik_Int 6336
+VRNDSCALESSZrmikz_Int 6337
+VRNDSCALESSZrri 6338
+VRNDSCALESSZrri_Int 6339
+VRNDSCALESSZrrib_Int 6340
+VRNDSCALESSZrribk_Int 6341
+VRNDSCALESSZrribkz_Int 6342
+VRNDSCALESSZrrik_Int 6343
+VRNDSCALESSZrrikz_Int 6344
+VROUNDPDYmi 6345
+VROUNDPDYri 6346
+VROUNDPDmi 6347
+VROUNDPDri 6348
+VROUNDPSYmi 6349
+VROUNDPSYri 6350
+VROUNDPSmi 6351
+VROUNDPSri 6352
+VROUNDSDmi 6353
+VROUNDSDmi_Int 6354
+VROUNDSDri 6355
+VROUNDSDri_Int 6356
+VROUNDSSmi 6357
+VROUNDSSmi_Int 6358
+VROUNDSSri 6359
+VROUNDSSri_Int 6360
+VRSQRT 6361
+VRSQRTBF 6362
+VRSQRTPHZ 6363
+VRSQRTPHZm 6364
+VRSQRTPHZmb 6365
+VRSQRTPHZmbk 6366
+VRSQRTPHZmbkz 6367
+VRSQRTPHZmk 6368
+VRSQRTPHZmkz 6369
+VRSQRTPHZr 6370
+VRSQRTPHZrk 6371
+VRSQRTPHZrkz 6372
+VRSQRTPSYm 6373
+VRSQRTPSYr 6374
+VRSQRTPSm 6375
+VRSQRTPSr 6376
+VRSQRTSHZrm 6377
+VRSQRTSHZrmk 6378
+VRSQRTSHZrmkz 6379
+VRSQRTSHZrr 6380
+VRSQRTSHZrrk 6381
+VRSQRTSHZrrkz 6382
+VRSQRTSSm 6383
+VRSQRTSSm_Int 6384
+VRSQRTSSr 6385
+VRSQRTSSr_Int 6386
+VSCALEFBF 6387
+VSCALEFPDZ 6388
+VSCALEFPDZrm 6389
+VSCALEFPDZrmb 6390
+VSCALEFPDZrmbk 6391
+VSCALEFPDZrmbkz 6392
+VSCALEFPDZrmk 6393
+VSCALEFPDZrmkz 6394
+VSCALEFPDZrr 6395
+VSCALEFPDZrrb 6396
+VSCALEFPDZrrbk 6397
+VSCALEFPDZrrbkz 6398
+VSCALEFPDZrrk 6399
+VSCALEFPDZrrkz 6400
+VSCALEFPHZ 6401
+VSCALEFPHZrm 6402
+VSCALEFPHZrmb 6403
+VSCALEFPHZrmbk 6404
+VSCALEFPHZrmbkz 6405
+VSCALEFPHZrmk 6406
+VSCALEFPHZrmkz 6407
+VSCALEFPHZrr 6408
+VSCALEFPHZrrb 6409
+VSCALEFPHZrrbk 6410
+VSCALEFPHZrrbkz 6411
+VSCALEFPHZrrk 6412
+VSCALEFPHZrrkz 6413
+VSCALEFPSZ 6414
+VSCALEFPSZrm 6415
+VSCALEFPSZrmb 6416
+VSCALEFPSZrmbk 6417
+VSCALEFPSZrmbkz 6418
+VSCALEFPSZrmk 6419
+VSCALEFPSZrmkz 6420
+VSCALEFPSZrr 6421
+VSCALEFPSZrrb 6422
+VSCALEFPSZrrbk 6423
+VSCALEFPSZrrbkz 6424
+VSCALEFPSZrrk 6425
+VSCALEFPSZrrkz 6426
+VSCALEFSDZrm 6427
+VSCALEFSDZrmk 6428
+VSCALEFSDZrmkz 6429
+VSCALEFSDZrr 6430
+VSCALEFSDZrrb_Int 6431
+VSCALEFSDZrrbk_Int 6432
+VSCALEFSDZrrbkz_Int 6433
+VSCALEFSDZrrk 6434
+VSCALEFSDZrrkz 6435
+VSCALEFSHZrm 6436
+VSCALEFSHZrmk 6437
+VSCALEFSHZrmkz 6438
+VSCALEFSHZrr 6439
+VSCALEFSHZrrb_Int 6440
+VSCALEFSHZrrbk_Int 6441
+VSCALEFSHZrrbkz_Int 6442
+VSCALEFSHZrrk 6443
+VSCALEFSHZrrkz 6444
+VSCALEFSSZrm 6445
+VSCALEFSSZrmk 6446
+VSCALEFSSZrmkz 6447
+VSCALEFSSZrr 6448
+VSCALEFSSZrrb_Int 6449
+VSCALEFSSZrrbk_Int 6450
+VSCALEFSSZrrbkz_Int 6451
+VSCALEFSSZrrk 6452
+VSCALEFSSZrrkz 6453
+VSCATTERDPDZ 6454
+VSCATTERDPDZmr 6455
+VSCATTERDPSZ 6456
+VSCATTERDPSZmr 6457
+VSCATTERPF 6458
+VSCATTERQPDZ 6459
+VSCATTERQPDZmr 6460
+VSCATTERQPSZ 6461
+VSCATTERQPSZmr 6462
+VSHA 6463
+VSHUFF 6464
+VSHUFI 6465
+VSHUFPDYrmi 6466
+VSHUFPDYrri 6467
+VSHUFPDZ 6468
+VSHUFPDZrmbi 6469
+VSHUFPDZrmbik 6470
+VSHUFPDZrmbikz 6471
+VSHUFPDZrmi 6472
+VSHUFPDZrmik 6473
+VSHUFPDZrmikz 6474
+VSHUFPDZrri 6475
+VSHUFPDZrrik 6476
+VSHUFPDZrrikz 6477
+VSHUFPDrmi 6478
+VSHUFPDrri 6479
+VSHUFPSYrmi 6480
+VSHUFPSYrri 6481
+VSHUFPSZ 6482
+VSHUFPSZrmbi 6483
+VSHUFPSZrmbik 6484
+VSHUFPSZrmbikz 6485
+VSHUFPSZrmi 6486
+VSHUFPSZrmik 6487
+VSHUFPSZrmikz 6488
+VSHUFPSZrri 6489
+VSHUFPSZrrik 6490
+VSHUFPSZrrikz 6491
+VSHUFPSrmi 6492
+VSHUFPSrri 6493
+VSM 6494
+VSQRTBF 6495
+VSQRTPDYm 6496
+VSQRTPDYr 6497
+VSQRTPDZ 6498
+VSQRTPDZm 6499
+VSQRTPDZmb 6500
+VSQRTPDZmbk 6501
+VSQRTPDZmbkz 6502
+VSQRTPDZmk 6503
+VSQRTPDZmkz 6504
+VSQRTPDZr 6505
+VSQRTPDZrb 6506
+VSQRTPDZrbk 6507
+VSQRTPDZrbkz 6508
+VSQRTPDZrk 6509
+VSQRTPDZrkz 6510
+VSQRTPDm 6511
+VSQRTPDr 6512
+VSQRTPHZ 6513
+VSQRTPHZm 6514
+VSQRTPHZmb 6515
+VSQRTPHZmbk 6516
+VSQRTPHZmbkz 6517
+VSQRTPHZmk 6518
+VSQRTPHZmkz 6519
+VSQRTPHZr 6520
+VSQRTPHZrb 6521
+VSQRTPHZrbk 6522
+VSQRTPHZrbkz 6523
+VSQRTPHZrk 6524
+VSQRTPHZrkz 6525
+VSQRTPSYm 6526
+VSQRTPSYr 6527
+VSQRTPSZ 6528
+VSQRTPSZm 6529
+VSQRTPSZmb 6530
+VSQRTPSZmbk 6531
+VSQRTPSZmbkz 6532
+VSQRTPSZmk 6533
+VSQRTPSZmkz 6534
+VSQRTPSZr 6535
+VSQRTPSZrb 6536
+VSQRTPSZrbk 6537
+VSQRTPSZrbkz 6538
+VSQRTPSZrk 6539
+VSQRTPSZrkz 6540
+VSQRTPSm 6541
+VSQRTPSr 6542
+VSQRTSDZm 6543
+VSQRTSDZm_Int 6544
+VSQRTSDZmk_Int 6545
+VSQRTSDZmkz_Int 6546
+VSQRTSDZr 6547
+VSQRTSDZr_Int 6548
+VSQRTSDZrb_Int 6549
+VSQRTSDZrbk_Int 6550
+VSQRTSDZrbkz_Int 6551
+VSQRTSDZrk_Int 6552
+VSQRTSDZrkz_Int 6553
+VSQRTSDm 6554
+VSQRTSDm_Int 6555
+VSQRTSDr 6556
+VSQRTSDr_Int 6557
+VSQRTSHZm 6558
+VSQRTSHZm_Int 6559
+VSQRTSHZmk_Int 6560
+VSQRTSHZmkz_Int 6561
+VSQRTSHZr 6562
+VSQRTSHZr_Int 6563
+VSQRTSHZrb_Int 6564
+VSQRTSHZrbk_Int 6565
+VSQRTSHZrbkz_Int 6566
+VSQRTSHZrk_Int 6567
+VSQRTSHZrkz_Int 6568
+VSQRTSSZm 6569
+VSQRTSSZm_Int 6570
+VSQRTSSZmk_Int 6571
+VSQRTSSZmkz_Int 6572
+VSQRTSSZr 6573
+VSQRTSSZr_Int 6574
+VSQRTSSZrb_Int 6575
+VSQRTSSZrbk_Int 6576
+VSQRTSSZrbkz_Int 6577
+VSQRTSSZrk_Int 6578
+VSQRTSSZrkz_Int 6579
+VSQRTSSm 6580
+VSQRTSSm_Int 6581
+VSQRTSSr 6582
+VSQRTSSr_Int 6583
+VSTMXCSR 6584
+VSUBBF 6585
+VSUBPDYrm 6586
+VSUBPDYrr 6587
+VSUBPDZ 6588
+VSUBPDZrm 6589
+VSUBPDZrmb 6590
+VSUBPDZrmbk 6591
+VSUBPDZrmbkz 6592
+VSUBPDZrmk 6593
+VSUBPDZrmkz 6594
+VSUBPDZrr 6595
+VSUBPDZrrb 6596
+VSUBPDZrrbk 6597
+VSUBPDZrrbkz 6598
+VSUBPDZrrk 6599
+VSUBPDZrrkz 6600
+VSUBPDrm 6601
+VSUBPDrr 6602
+VSUBPHZ 6603
+VSUBPHZrm 6604
+VSUBPHZrmb 6605
+VSUBPHZrmbk 6606
+VSUBPHZrmbkz 6607
+VSUBPHZrmk 6608
+VSUBPHZrmkz 6609
+VSUBPHZrr 6610
+VSUBPHZrrb 6611
+VSUBPHZrrbk 6612
+VSUBPHZrrbkz 6613
+VSUBPHZrrk 6614
+VSUBPHZrrkz 6615
+VSUBPSYrm 6616
+VSUBPSYrr 6617
+VSUBPSZ 6618
+VSUBPSZrm 6619
+VSUBPSZrmb 6620
+VSUBPSZrmbk 6621
+VSUBPSZrmbkz 6622
+VSUBPSZrmk 6623
+VSUBPSZrmkz 6624
+VSUBPSZrr 6625
+VSUBPSZrrb 6626
+VSUBPSZrrbk 6627
+VSUBPSZrrbkz 6628
+VSUBPSZrrk 6629
+VSUBPSZrrkz 6630
+VSUBPSrm 6631
+VSUBPSrr 6632
+VSUBSDZrm 6633
+VSUBSDZrm_Int 6634
+VSUBSDZrmk_Int 6635
+VSUBSDZrmkz_Int 6636
+VSUBSDZrr 6637
+VSUBSDZrr_Int 6638
+VSUBSDZrrb_Int 6639
+VSUBSDZrrbk_Int 6640
+VSUBSDZrrbkz_Int 6641
+VSUBSDZrrk_Int 6642
+VSUBSDZrrkz_Int 6643
+VSUBSDrm 6644
+VSUBSDrm_Int 6645
+VSUBSDrr 6646
+VSUBSDrr_Int 6647
+VSUBSHZrm 6648
+VSUBSHZrm_Int 6649
+VSUBSHZrmk_Int 6650
+VSUBSHZrmkz_Int 6651
+VSUBSHZrr 6652
+VSUBSHZrr_Int 6653
+VSUBSHZrrb_Int 6654
+VSUBSHZrrbk_Int 6655
+VSUBSHZrrbkz_Int 6656
+VSUBSHZrrk_Int 6657
+VSUBSHZrrkz_Int 6658
+VSUBSSZrm 6659
+VSUBSSZrm_Int 6660
+VSUBSSZrmk_Int 6661
+VSUBSSZrmkz_Int 6662
+VSUBSSZrr 6663
+VSUBSSZrr_Int 6664
+VSUBSSZrrb_Int 6665
+VSUBSSZrrbk_Int 6666
+VSUBSSZrrbkz_Int 6667
+VSUBSSZrrk_Int 6668
+VSUBSSZrrkz_Int 6669
+VSUBSSrm 6670
+VSUBSSrm_Int 6671
+VSUBSSrr 6672
+VSUBSSrr_Int 6673
+VTESTPDYrm 6674
+VTESTPDYrr 6675
+VTESTPDrm 6676
+VTESTPDrr 6677
+VTESTPSYrm 6678
+VTESTPSYrr 6679
+VTESTPSrm 6680
+VTESTPSrr 6681
+VUCOMISDZrm 6682
+VUCOMISDZrm_Int 6683
+VUCOMISDZrr 6684
+VUCOMISDZrr_Int 6685
+VUCOMISDZrrb 6686
+VUCOMISDrm 6687
+VUCOMISDrm_Int 6688
+VUCOMISDrr 6689
+VUCOMISDrr_Int 6690
+VUCOMISHZrm 6691
+VUCOMISHZrm_Int 6692
+VUCOMISHZrr 6693
+VUCOMISHZrr_Int 6694
+VUCOMISHZrrb 6695
+VUCOMISSZrm 6696
+VUCOMISSZrm_Int 6697
+VUCOMISSZrr 6698
+VUCOMISSZrr_Int 6699
+VUCOMISSZrrb 6700
+VUCOMISSrm 6701
+VUCOMISSrm_Int 6702
+VUCOMISSrr 6703
+VUCOMISSrr_Int 6704
+VUCOMXSDZrm 6705
+VUCOMXSDZrm_Int 6706
+VUCOMXSDZrr 6707
+VUCOMXSDZrr_Int 6708
+VUCOMXSDZrrb_Int 6709
+VUCOMXSHZrm 6710
+VUCOMXSHZrm_Int 6711
+VUCOMXSHZrr 6712
+VUCOMXSHZrr_Int 6713
+VUCOMXSHZrrb_Int 6714
+VUCOMXSSZrm 6715
+VUCOMXSSZrm_Int 6716
+VUCOMXSSZrr 6717
+VUCOMXSSZrr_Int 6718
+VUCOMXSSZrrb_Int 6719
+VUNPCKHPDYrm 6720
+VUNPCKHPDYrr 6721
+VUNPCKHPDZ 6722
+VUNPCKHPDZrm 6723
+VUNPCKHPDZrmb 6724
+VUNPCKHPDZrmbk 6725
+VUNPCKHPDZrmbkz 6726
+VUNPCKHPDZrmk 6727
+VUNPCKHPDZrmkz 6728
+VUNPCKHPDZrr 6729
+VUNPCKHPDZrrk 6730
+VUNPCKHPDZrrkz 6731
+VUNPCKHPDrm 6732
+VUNPCKHPDrr 6733
+VUNPCKHPSYrm 6734
+VUNPCKHPSYrr 6735
+VUNPCKHPSZ 6736
+VUNPCKHPSZrm 6737
+VUNPCKHPSZrmb 6738
+VUNPCKHPSZrmbk 6739
+VUNPCKHPSZrmbkz 6740
+VUNPCKHPSZrmk 6741
+VUNPCKHPSZrmkz 6742
+VUNPCKHPSZrr 6743
+VUNPCKHPSZrrk 6744
+VUNPCKHPSZrrkz 6745
+VUNPCKHPSrm 6746
+VUNPCKHPSrr 6747
+VUNPCKLPDYrm 6748
+VUNPCKLPDYrr 6749
+VUNPCKLPDZ 6750
+VUNPCKLPDZrm 6751
+VUNPCKLPDZrmb 6752
+VUNPCKLPDZrmbk 6753
+VUNPCKLPDZrmbkz 6754
+VUNPCKLPDZrmk 6755
+VUNPCKLPDZrmkz 6756
+VUNPCKLPDZrr 6757
+VUNPCKLPDZrrk 6758
+VUNPCKLPDZrrkz 6759
+VUNPCKLPDrm 6760
+VUNPCKLPDrr 6761
+VUNPCKLPSYrm 6762
+VUNPCKLPSYrr 6763
+VUNPCKLPSZ 6764
+VUNPCKLPSZrm 6765
+VUNPCKLPSZrmb 6766
+VUNPCKLPSZrmbk 6767
+VUNPCKLPSZrmbkz 6768
+VUNPCKLPSZrmk 6769
+VUNPCKLPSZrmkz 6770
+VUNPCKLPSZrr 6771
+VUNPCKLPSZrrk 6772
+VUNPCKLPSZrrkz 6773
+VUNPCKLPSrm 6774
+VUNPCKLPSrr 6775
+VXORPDYrm 6776
+VXORPDYrr 6777
+VXORPDZ 6778
+VXORPDZrm 6779
+VXORPDZrmb 6780
+VXORPDZrmbk 6781
+VXORPDZrmbkz 6782
+VXORPDZrmk 6783
+VXORPDZrmkz 6784
+VXORPDZrr 6785
+VXORPDZrrk 6786
+VXORPDZrrkz 6787
+VXORPDrm 6788
+VXORPDrr 6789
+VXORPSYrm 6790
+VXORPSYrr 6791
+VXORPSZ 6792
+VXORPSZrm 6793
+VXORPSZrmb 6794
+VXORPSZrmbk 6795
+VXORPSZrmbkz 6796
+VXORPSZrmk 6797
+VXORPSZrmkz 6798
+VXORPSZrr 6799
+VXORPSZrrk 6800
+VXORPSZrrkz 6801
+VXORPSrm 6802
+VXORPSrr 6803
+VZEROALL 6804
+VZEROUPPER 6805
+V_SET 6806
+V_SETALLONES 6807
+WAIT 6808
+WBINVD 6809
+WBNOINVD 6810
+WRFLAGS 6811
+WRFSBASE 6812
+WRGSBASE 6813
+WRMSR 6814
+WRMSRLIST 6815
+WRMSRNS 6816
+WRMSRNSir 6817
+WRMSRNSir_EVEX 6818
+WRPKRUr 6819
+WRSSD 6820
+WRSSD_EVEX 6821
+WRSSQ 6822
+WRSSQ_EVEX 6823
+WRUSSD 6824
+WRUSSD_EVEX 6825
+WRUSSQ 6826
+WRUSSQ_EVEX 6827
+XABORT 6828
+XABORT_DEF 6829
+XACQUIRE_PREFIX 6830
+XADD 6831
+XAM_F 6832
+XAM_Fp 6833
+XBEGIN 6834
+XCHG 6835
+XCH_F 6836
+XCRYPTCBC 6837
+XCRYPTCFB 6838
+XCRYPTCTR 6839
+XCRYPTECB 6840
+XCRYPTOFB 6841
+XEND 6842
+XGETBV 6843
+XLAT 6844
+XOR 6845
+XORPDrm 6846
+XORPDrr 6847
+XORPSrm 6848
+XORPSrr 6849
+XRELEASE_PREFIX 6850
+XRESLDTRK 6851
+XRSTOR 6852
+XRSTORS 6853
+XSAVE 6854
+XSAVEC 6855
+XSAVEOPT 6856
+XSAVES 6857
+XSETBV 6858
+XSHA 6859
+XSTORE 6860
+XSUSLDTRK 6861
+XTEST 6862
+Immediate 6863
+CImmediate 6864
+FPImmediate 6865
+MBB 6866
+FrameIndex 6867
+ConstantPoolIndex 6868
+TargetIndex 6869
+JumpTableIndex 6870
+ExternalSymbol 6871
+GlobalAddress 6872
+BlockAddress 6873
+RegisterMask 6874
+RegisterLiveOut 6875
+Metadata 6876
+MCSymbol 6877
+CFIIndex 6878
+IntrinsicID 6879
+Predicate 6880
+ShuffleMask 6881
+PhyReg_GR8 6882
+PhyReg_GRH8 6883
+PhyReg_GR8_NOREX2 6884
+PhyReg_GR8_NOREX 6885
+PhyReg_GR8_ABCD_H 6886
+PhyReg_GR8_ABCD_L 6887
+PhyReg_GRH16 6888
+PhyReg_GR16 6889
+PhyReg_GR16_NOREX2 6890
+PhyReg_GR16_NOREX 6891
+PhyReg_VK1 6892
+PhyReg_VK16 6893
+PhyReg_VK2 6894
+PhyReg_VK4 6895
+PhyReg_VK8 6896
+PhyReg_VK16WM 6897
+PhyReg_VK1WM 6898
+PhyReg_VK2WM 6899
+PhyReg_VK4WM 6900
+PhyReg_VK8WM 6901
+PhyReg_SEGMENT_REG 6902
+PhyReg_GR16_ABCD 6903
+PhyReg_FPCCR 6904
+PhyReg_FR16X 6905
+PhyReg_FR16 6906
+PhyReg_VK16PAIR 6907
+PhyReg_VK1PAIR 6908
+PhyReg_VK2PAIR 6909
+PhyReg_VK4PAIR 6910
+PhyReg_VK8PAIR 6911
+PhyReg_VK1PAIR_with_sub_mask_0_in_VK1WM 6912
+PhyReg_LOW32_ADDR_ACCESS_RBP 6913
+PhyReg_LOW32_ADDR_ACCESS 6914
+PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 6915
+PhyReg_FR32X 6916
+PhyReg_GR32 6917
+PhyReg_GR32_NOSP 6918
+PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 6919
+PhyReg_DEBUG_REG 6920
+PhyReg_FR32 6921
+PhyReg_GR32_NOREX2 6922
+PhyReg_GR32_NOREX2_NOSP 6923
+PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 6924
+PhyReg_GR32_NOREX 6925
+PhyReg_VK32 6926
+PhyReg_GR32_NOREX_NOSP 6927
+PhyReg_RFP32 6928
+PhyReg_VK32WM 6929
+PhyReg_GR32_ABCD 6930
+PhyReg_GR32_TC 6931
+PhyReg_GR32_ABCD_and_GR32_TC 6932
+PhyReg_GR32_AD 6933
+PhyReg_GR32_ArgRef 6934
+PhyReg_GR32_BPSP 6935
+PhyReg_GR32_BSI 6936
+PhyReg_GR32_CB 6937
+PhyReg_GR32_DC 6938
+PhyReg_GR32_DIBP 6939
+PhyReg_GR32_SIDI 6940
+PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 6941
+PhyReg_CCR 6942
+PhyReg_DFCCR 6943
+PhyReg_GR32_ABCD_and_GR32_BSI 6944
+PhyReg_GR32_AD_and_GR32_ArgRef 6945
+PhyReg_GR32_ArgRef_and_GR32_CB 6946
+PhyReg_GR32_BPSP_and_GR32_DIBP 6947
+PhyReg_GR32_BPSP_and_GR32_TC 6948
+PhyReg_GR32_BSI_and_GR32_SIDI 6949
+PhyReg_GR32_DIBP_and_GR32_SIDI 6950
+PhyReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 6951
+PhyReg_LOW32_ADDR_ACCESS_with_sub_32bit 6952
+PhyReg_RFP64 6953
+PhyReg_GR64 6954
+PhyReg_FR64X 6955
+PhyReg_GR64_with_sub_8bit 6956
+PhyReg_GR64_NOSP 6957
+PhyReg_GR64_NOREX2 6958
+PhyReg_CONTROL_REG 6959
+PhyReg_FR64 6960
+PhyReg_GR64_with_sub_16bit_in_GR16_NOREX2 6961
+PhyReg_GR64_NOREX2_NOSP 6962
+PhyReg_GR64PLTSafe 6963
+PhyReg_GR64_TC 6964
+PhyReg_GR64_NOREX 6965
+PhyReg_GR64_TCW64 6966
+PhyReg_GR64_TC_with_sub_8bit 6967
+PhyReg_GR64_NOREX2_NOSP_and_GR64_TC 6968
+PhyReg_GR64_TCW64_with_sub_8bit 6969
+PhyReg_GR64_TC_and_GR64_TCW64 6970
+PhyReg_GR64_with_sub_16bit_in_GR16_NOREX 6971
+PhyReg_VK64 6972
+PhyReg_VR64 6973
+PhyReg_GR64PLTSafe_and_GR64_TC 6974
+PhyReg_GR64_NOREX2_NOSP_and_GR64_TCW64 6975
+PhyReg_GR64_NOREX_NOSP 6976
+PhyReg_GR64_NOREX_and_GR64_TC 6977
+PhyReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 6978
+PhyReg_VK64WM 6979
+PhyReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 6980
+PhyReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 6981
+PhyReg_GR64PLTSafe_and_GR64_TCW64 6982
+PhyReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 6983
+PhyReg_GR64_NOREX_and_GR64_TCW64 6984
+PhyReg_GR64_ABCD 6985
+PhyReg_GR64_with_sub_32bit_in_GR32_TC 6986
+PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 6987
+PhyReg_GR64_AD 6988
+PhyReg_GR64_ArgRef 6989
+PhyReg_GR64_and_LOW32_ADDR_ACCESS_RBP 6990
+PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef 6991
+PhyReg_GR64_with_sub_32bit_in_GR32_BPSP 6992
+PhyReg_GR64_with_sub_32bit_in_GR32_BSI 6993
+PhyReg_GR64_with_sub_32bit_in_GR32_CB 6994
+PhyReg_GR64_with_sub_32bit_in_GR32_DIBP 6995
+PhyReg_GR64_with_sub_32bit_in_GR32_SIDI 6996
+PhyReg_GR64_A 6997
+PhyReg_GR64_ArgRef_and_GR64_TC 6998
+PhyReg_GR64_and_LOW32_ADDR_ACCESS 6999
+PhyReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 7000
+PhyReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7001
+PhyReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7002
+PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7003
+PhyReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7004
+PhyReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7005
+PhyReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7006
+PhyReg_RST 7007
+PhyReg_RFP80 7008
+PhyReg_RFP80_7 7009
+PhyReg_VR128X 7010
+PhyReg_VR128 7011
+PhyReg_VR256X 7012
+PhyReg_VR256 7013
+PhyReg_VR512 7014
+PhyReg_VR512_0_15 7015
+PhyReg_TILE 7016
+VirtReg_GR8 7017
+VirtReg_GRH8 7018
+VirtReg_GR8_NOREX2 7019
+VirtReg_GR8_NOREX 7020
+VirtReg_GR8_ABCD_H 7021
+VirtReg_GR8_ABCD_L 7022
+VirtReg_GRH16 7023
+VirtReg_GR16 7024
+VirtReg_GR16_NOREX2 7025
+VirtReg_GR16_NOREX 7026
+VirtReg_VK1 7027
+VirtReg_VK16 7028
+VirtReg_VK2 7029
+VirtReg_VK4 7030
+VirtReg_VK8 7031
+VirtReg_VK16WM 7032
+VirtReg_VK1WM 7033
+VirtReg_VK2WM 7034
+VirtReg_VK4WM 7035
+VirtReg_VK8WM 7036
+VirtReg_SEGMENT_REG 7037
+VirtReg_GR16_ABCD 7038
+VirtReg_FPCCR 7039
+VirtReg_FR16X 7040
+VirtReg_FR16 7041
+VirtReg_VK16PAIR 7042
+VirtReg_VK1PAIR 7043
+VirtReg_VK2PAIR 7044
+VirtReg_VK4PAIR 7045
+VirtReg_VK8PAIR 7046
+VirtReg_VK1PAIR_with_sub_mask_0_in_VK1WM 7047
+VirtReg_LOW32_ADDR_ACCESS_RBP 7048
+VirtReg_LOW32_ADDR_ACCESS 7049
+VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit 7050
+VirtReg_FR32X 7051
+VirtReg_GR32 7052
+VirtReg_GR32_NOSP 7053
+VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX2 7054
+VirtReg_DEBUG_REG 7055
+VirtReg_FR32 7056
+VirtReg_GR32_NOREX2 7057
+VirtReg_GR32_NOREX2_NOSP 7058
+VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_16bit_in_GR16_NOREX 7059
+VirtReg_GR32_NOREX 7060
+VirtReg_VK32 7061
+VirtReg_GR32_NOREX_NOSP 7062
+VirtReg_RFP32 7063
+VirtReg_VK32WM 7064
+VirtReg_GR32_ABCD 7065
+VirtReg_GR32_TC 7066
+VirtReg_GR32_ABCD_and_GR32_TC 7067
+VirtReg_GR32_AD 7068
+VirtReg_GR32_ArgRef 7069
+VirtReg_GR32_BPSP 7070
+VirtReg_GR32_BSI 7071
+VirtReg_GR32_CB 7072
+VirtReg_GR32_DC 7073
+VirtReg_GR32_DIBP 7074
+VirtReg_GR32_SIDI 7075
+VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_32bit 7076
+VirtReg_CCR 7077
+VirtReg_DFCCR 7078
+VirtReg_GR32_ABCD_and_GR32_BSI 7079
+VirtReg_GR32_AD_and_GR32_ArgRef 7080
+VirtReg_GR32_ArgRef_and_GR32_CB 7081
+VirtReg_GR32_BPSP_and_GR32_DIBP 7082
+VirtReg_GR32_BPSP_and_GR32_TC 7083
+VirtReg_GR32_BSI_and_GR32_SIDI 7084
+VirtReg_GR32_DIBP_and_GR32_SIDI 7085
+VirtReg_LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit 7086
+VirtReg_LOW32_ADDR_ACCESS_with_sub_32bit 7087
+VirtReg_RFP64 7088
+VirtReg_GR64 7089
+VirtReg_FR64X 7090
+VirtReg_GR64_with_sub_8bit 7091
+VirtReg_GR64_NOSP 7092
+VirtReg_GR64_NOREX2 7093
+VirtReg_CONTROL_REG 7094
+VirtReg_FR64 7095
+VirtReg_GR64_with_sub_16bit_in_GR16_NOREX2 7096
+VirtReg_GR64_NOREX2_NOSP 7097
+VirtReg_GR64PLTSafe 7098
+VirtReg_GR64_TC 7099
+VirtReg_GR64_NOREX 7100
+VirtReg_GR64_TCW64 7101
+VirtReg_GR64_TC_with_sub_8bit 7102
+VirtReg_GR64_NOREX2_NOSP_and_GR64_TC 7103
+VirtReg_GR64_TCW64_with_sub_8bit 7104
+VirtReg_GR64_TC_and_GR64_TCW64 7105
+VirtReg_GR64_with_sub_16bit_in_GR16_NOREX 7106
+VirtReg_VK64 7107
+VirtReg_VR64 7108
+VirtReg_GR64PLTSafe_and_GR64_TC 7109
+VirtReg_GR64_NOREX2_NOSP_and_GR64_TCW64 7110
+VirtReg_GR64_NOREX_NOSP 7111
+VirtReg_GR64_NOREX_and_GR64_TC 7112
+VirtReg_GR64_TCW64_and_GR64_TC_with_sub_8bit 7113
+VirtReg_VK64WM 7114
+VirtReg_GR64_TC_and_GR64_NOREX2_NOSP_and_GR64_TCW64 7115
+VirtReg_GR64_TC_and_GR64_with_sub_16bit_in_GR16_NOREX 7116
+VirtReg_GR64PLTSafe_and_GR64_TCW64 7117
+VirtReg_GR64_NOREX_and_GR64PLTSafe_and_GR64_TC 7118
+VirtReg_GR64_NOREX_and_GR64_TCW64 7119
+VirtReg_GR64_ABCD 7120
+VirtReg_GR64_with_sub_32bit_in_GR32_TC 7121
+VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_TC 7122
+VirtReg_GR64_AD 7123
+VirtReg_GR64_ArgRef 7124
+VirtReg_GR64_and_LOW32_ADDR_ACCESS_RBP 7125
+VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef 7126
+VirtReg_GR64_with_sub_32bit_in_GR32_BPSP 7127
+VirtReg_GR64_with_sub_32bit_in_GR32_BSI 7128
+VirtReg_GR64_with_sub_32bit_in_GR32_CB 7129
+VirtReg_GR64_with_sub_32bit_in_GR32_DIBP 7130
+VirtReg_GR64_with_sub_32bit_in_GR32_SIDI 7131
+VirtReg_GR64_A 7132
+VirtReg_GR64_ArgRef_and_GR64_TC 7133
+VirtReg_GR64_and_LOW32_ADDR_ACCESS 7134
+VirtReg_GR64_with_sub_32bit_in_GR32_ABCD_and_GR32_BSI 7135
+VirtReg_GR64_with_sub_32bit_in_GR32_AD_and_GR32_ArgRef 7136
+VirtReg_GR64_with_sub_32bit_in_GR32_ArgRef_and_GR32_CB 7137
+VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_DIBP 7138
+VirtReg_GR64_with_sub_32bit_in_GR32_BPSP_and_GR32_TC 7139
+VirtReg_GR64_with_sub_32bit_in_GR32_BSI_and_GR32_SIDI 7140
+VirtReg_GR64_with_sub_32bit_in_GR32_DIBP_and_GR32_SIDI 7141
+VirtReg_RST 7142
+VirtReg_RFP80 7143
+VirtReg_RFP80_7 7144
+VirtReg_VR128X 7145
+VirtReg_VR128 7146
+VirtReg_VR256X 7147
+VirtReg_VR256 7148
+VirtReg_VR512 7149
+VirtReg_VR512_0_15 7150
+VirtReg_TILE 7151
diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-reduction.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-reduction.s
index 3d7a67d8ba16..621cad6e121a 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-reduction.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-reduction.s
@@ -630,593 +630,593 @@ vfwredusum.vs v8, v8, v8
# CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDAND_VS vredand.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDAND_VS vredand.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAXU_VS vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMAXU_VS vredmaxu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMAX_VS vredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMAX_VS vredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMINU_VS vredminu.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMINU_VS vredminu.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDMIN_VS vredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDMIN_VS vredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDOR_VS vredor.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDOR_VS vredor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDSUM_VS vredsum.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDSUM_VS vredsum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VREDXOR_VS vredxor.vs v8, v8, v8
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VREDXOR_VS vredxor.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUMU_VS vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VWREDSUMU_VS vwredsumu.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 5 1.00 5 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 5 2.00 5 SMX60_VIEU[2] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 7 2.00 7 SMX60_VIEU[2] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 11 4.00 11 SMX60_VIEU[4] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 19 10.00 19 SMX60_VIEU[10] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VWREDSUM_VS vwredsum.vs v8, v16, v24
+# CHECK-NEXT: 1 35 35.00 35 SMX60_VIEU[35] VWREDSUM_VS vwredsum.vs v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 15 8.00 15 SMX60_VFP[8] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 21 14.00 21 SMX60_VFP[14] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 33 20.00 33 SMX60_VFP[20] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 57 57.00 57 SMX60_VFP[57] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 15 8.00 15 SMX60_VFP[8] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 21 14.00 21 SMX60_VFP[14] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 33 20.00 33 SMX60_VFP[20] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 57 57.00 57 SMX60_VFP[57] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 15 8.00 15 SMX60_VFP[8] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 21 14.00 21 SMX60_VFP[14] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 33 20.00 33 SMX60_VFP[20] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMAX_VS vfredmax.vs v8, v8, v8
+# CHECK-NEXT: 1 57 57.00 57 SMX60_VFP[57] VFREDMAX_VS vfredmax.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 15 8.00 15 SMX60_VFP[8] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 21 14.00 21 SMX60_VFP[14] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 33 20.00 33 SMX60_VFP[20] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 57 57.00 57 SMX60_VFP[57] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 15 8.00 15 SMX60_VFP[8] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 21 14.00 21 SMX60_VFP[14] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 33 20.00 33 SMX60_VFP[20] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 57 57.00 57 SMX60_VFP[57] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 15 8.00 15 SMX60_VFP[8] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 21 14.00 21 SMX60_VFP[14] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 33 20.00 33 SMX60_VFP[20] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDMIN_VS vfredmin.vs v8, v8, v8
+# CHECK-NEXT: 1 57 57.00 57 SMX60_VFP[57] VFREDMIN_VS vfredmin.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 24 20.00 24 SMX60_VFP[20] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 48 24.00 48 SMX60_VFP[24] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 96 48.00 96 SMX60_VFP[48] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 192 96.00 192 SMX60_VFP[96] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 384 384.00 384 SMX60_VFP[384] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 24 12.00 24 SMX60_VFP[12] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 48 24.00 48 SMX60_VFP[24] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 96 48.00 96 SMX60_VFP[48] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 192 192.00 192 SMX60_VFP[192] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 12 6.00 12 SMX60_VFP[6] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 24 12.00 24 SMX60_VFP[12] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 48 24.00 48 SMX60_VFP[24] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDOSUM_VS vfredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 96 96.00 96 SMX60_VFP[96] VFREDOSUM_VS vfredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 15 8.00 15 SMX60_VFP[8] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 21 14.00 21 SMX60_VFP[14] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 33 20.00 33 SMX60_VFP[20] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 57 57.00 57 SMX60_VFP[57] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 12 8.00 12 SMX60_VFP[8] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 15 8.00 15 SMX60_VFP[8] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 21 14.00 21 SMX60_VFP[14] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 33 20.00 33 SMX60_VFP[20] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 57 57.00 57 SMX60_VFP[57] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 15 8.00 15 SMX60_VFP[8] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 21 14.00 21 SMX60_VFP[14] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 33 20.00 33 SMX60_VFP[20] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFREDUSUM_VS vfredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 57 57.00 57 SMX60_VFP[57] VFREDUSUM_VS vfredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 32 27.00 32 SMX60_VFP[27] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 16 11.00 16 SMX60_VFP[11] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 64 32.00 64 SMX60_VFP[32] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 128 64.00 128 SMX60_VFP[64] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 256 128.00 256 SMX60_VFP[128] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 512 512.00 512 SMX60_VFP[512] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 16 11.00 16 SMX60_VFP[11] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 32 16.00 32 SMX60_VFP[16] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 64 32.00 64 SMX60_VFP[32] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 128 64.00 128 SMX60_VFP[64] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: 1 256 256.00 256 SMX60_VFP[256] VFWREDOSUM_VS vfwredosum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 32 27.00 32 SMX60_VFP[27] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 16 11.00 16 SMX60_VFP[11] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 64 32.00 64 SMX60_VFP[32] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 128 64.00 128 SMX60_VFP[64] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 256 128.00 256 SMX60_VFP[128] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 512 512.00 512 SMX60_VFP[512] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 16 11.00 16 SMX60_VFP[11] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 32 16.00 32 SMX60_VFP[16] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 64 32.00 64 SMX60_VFP[32] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 128 64.00 128 SMX60_VFP[64] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: 1 256 256.00 256 SMX60_VFP[256] VFWREDUSUM_VS vfwredusum.vs v8, v8, v8
# CHECK: Resources:
# CHECK-NEXT: [0] - SMX60_FP
@@ -1230,595 +1230,595 @@ vfwredusum.vs v8, v8, v8
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6]
-# CHECK-NEXT: - 294.00 - - - 82.00 212.00 -
+# CHECK-NEXT: - 294.00 - - - 4271.00 2028.00 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] Instructions:
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredand.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredand.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmaxu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmaxu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredminu.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredminu.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredsum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredsum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 2.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 4.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 10.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vredxor.vs v8, v8, v8
+# CHECK-NEXT: - - - - - - 35.00 - vredxor.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 4.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 10.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 35.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 4.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 10.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 35.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 4.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 10.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsumu.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 35.00 - vwredsumu.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 4.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 10.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 35.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 4.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 10.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 35.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 2.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 4.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 10.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - - 1.00 - vwredsum.vs v8, v16, v24
+# CHECK-NEXT: - - - - - - 35.00 - vwredsum.vs v8, v16, v24
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 14.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 20.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 57.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 14.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 20.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 57.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 14.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 20.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmax.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 57.00 - - vfredmax.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 14.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 20.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 57.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 14.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 20.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 57.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 14.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 20.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredmin.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 57.00 - - vfredmin.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 20.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 24.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 48.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 96.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 384.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 12.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 24.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 48.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 192.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 6.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 12.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 24.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 96.00 - - vfredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 14.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 20.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 57.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 14.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 20.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 57.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 8.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 14.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 20.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 57.00 - - vfredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 27.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 11.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 32.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 64.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 128.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 512.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 11.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 16.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 32.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 64.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredosum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 256.00 - - vfwredosum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 27.00 - - vfwredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 11.00 - - vfwredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 32.00 - - vfwredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 64.00 - - vfwredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 128.00 - - vfwredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 512.00 - - vfwredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 11.00 - - vfwredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 16.00 - - vfwredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 32.00 - - vfwredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 64.00 - - vfwredusum.vs v8, v8, v8
# CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: - - - - - 1.00 - - vfwredusum.vs v8, v8, v8
+# CHECK-NEXT: - - - - - 256.00 - - vfwredusum.vs v8, v8, v8
diff --git a/llvm/test/tools/llvm-profdata/profile-version.test b/llvm/test/tools/llvm-profdata/profile-version.test
index cb68a648d5e5..e811699ac63e 100644
--- a/llvm/test/tools/llvm-profdata/profile-version.test
+++ b/llvm/test/tools/llvm-profdata/profile-version.test
@@ -2,7 +2,7 @@ Test the profile version.
RUN: llvm-profdata merge -o %t.profdata %p/Inputs/basic.proftext
RUN: llvm-profdata show --profile-version %t.profdata | FileCheck %s
-CHECK: Profile version: 12
+CHECK: Profile version: 13
RUN: llvm-profdata merge -o %t.prev.profdata %p/Inputs/basic.proftext --write-prev-version
RUN: llvm-profdata show --profile-version %t.prev.profdata | FileCheck %s --check-prefix=PREV
diff --git a/llvm/tools/llvm-cas/CMakeLists.txt b/llvm/tools/llvm-cas/CMakeLists.txt
new file mode 100644
index 000000000000..e9d40cb49e01
--- /dev/null
+++ b/llvm/tools/llvm-cas/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(LLVM_TARGET_DEFINITIONS Options.td)
+tablegen(LLVM Options.inc -gen-opt-parser-defs)
+add_public_tablegen_target(LLVMCASToolTableGen)
+
+set(LLVM_LINK_COMPONENTS
+ Support
+ CAS
+ Option
+ )
+
+add_llvm_tool(llvm-cas
+ llvm-cas.cpp
+
+ DEPENDS
+ ${tablegen_deps}
+ LLVMCASToolTableGen
+ )
diff --git a/llvm/tools/llvm-cas/Options.td b/llvm/tools/llvm-cas/Options.td
new file mode 100644
index 000000000000..5ae64c104fdb
--- /dev/null
+++ b/llvm/tools/llvm-cas/Options.td
@@ -0,0 +1,63 @@
+include "llvm/Option/OptParser.td"
+
+class F<string name> : Flag<["--", "-"], name>;
+
+def grp_action : OptionGroup<"Actions">, HelpText<"llvm-cas actions">;
+
+def help : F<"help">, HelpText<"Prints this help output">;
+def : Flag<["-"], "h">, Alias<help>, HelpText<"Alias for --help">;
+
+// Tool actions
+
+def cas_dump : F<"dump">, HelpText<"Dump internal contents">, Group<grp_action>;
+def cat_node_data : F<"cat-node-data">,
+ HelpText<"Cat node data">,
+ Group<grp_action>;
+def make_blob : F<"make-blob">, HelpText<"Make blob">, Group<grp_action>;
+def make_node : F<"make-node">, HelpText<"Make node">, Group<grp_action>;
+def ls_node_refs : F<"ls-node-refs">,
+ HelpText<"List node refs">,
+ Group<grp_action>;
+def import : F<"import">,
+ HelpText<"Import objects from another CAS">,
+ Group<grp_action>;
+def put_cache_key : F<"put-cache-key">,
+ HelpText<"Set a value for a cache key">,
+ Group<grp_action>;
+def get_cache_result : F<"get-cache-result">,
+ HelpText<"Get the result value from a cache key">,
+ Group<grp_action>;
+def validate : F<"validate">,
+ HelpText<"Validate ObjectStore">,
+ Group<grp_action>;
+def validate_object : F<"validate-object">,
+ HelpText<"Validate the object for CASID">,
+ Group<grp_action>;
+def validate_if_needed : F<"validate-if-needed">,
+ HelpText<"Validate cas contents if needed">,
+ Group<grp_action>;
+def prune : F<"prune">, HelpText<"Prune local cas storage">, Group<grp_action>;
+
+// Tool options
+
+def cas_path : Separate<["-", "--"], "cas">,
+ MetaVarName<"<path>">,
+ HelpText<"Path to CAS on disk">;
+
+def upstream_cas : Separate<["-", "--"], "upstream-cas">,
+ MetaVarName<"<path>">,
+ HelpText<"Path to another upstream CAS">;
+
+def data : Separate<["-", "--"], "data">,
+ MetaVarName<"<path>">,
+ HelpText<"Path to data or '-' for stdin">;
+
+def check_hash : F<"check-hash">,
+ HelpText<"Check all hashes during validation">;
+
+def allow_recovery : F<"allow-recovery">,
+ HelpText<"Allow recovery of CAS data">;
+
+def force : F<"force">, HelpText<"Force validation even if unnecessary">;
+
+def in_process : F<"in-process">, HelpText<"Validation in-process">;
diff --git a/llvm/tools/llvm-cas/llvm-cas.cpp b/llvm/tools/llvm-cas/llvm-cas.cpp
new file mode 100644
index 000000000000..e72ee470d231
--- /dev/null
+++ b/llvm/tools/llvm-cas/llvm-cas.cpp
@@ -0,0 +1,405 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file A utility for operating on LLVM CAS.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "llvm/CAS/ObjectStore.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+namespace {
+enum ID {
+ OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
+#include "Options.inc"
+#undef OPTION
+};
+
+#define OPTTABLE_STR_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
+
+using namespace llvm::opt;
+static constexpr opt::OptTable::Info InfoTable[] = {
+#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
+#include "Options.inc"
+#undef OPTION
+};
+
+class LLVMCASOptTable : public opt::GenericOptTable {
+public:
+ LLVMCASOptTable()
+ : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
+};
+
+enum class CommandKind {
+ Invalid,
+ Dump,
+ CatNodeData,
+ MakeBlob,
+ MakeNode,
+ ListObjectReferences,
+ Import,
+ PutCacheKey,
+ GetCacheResult,
+ Validate,
+ ValidateObject,
+ ValidateIfNeeded,
+ Prune,
+};
+
+struct CommandOptions {
+ CommandKind Command = CommandKind::Invalid;
+ std::vector<std::string> Inputs;
+ std::string CASPath;
+ std::string UpstreamCASPath;
+ std::string DataPath;
+ bool CheckHash;
+ bool AllowRecovery;
+ bool Force;
+ bool InProcess;
+
+ static CommandKind getCommandKind(opt::Arg &A) {
+ switch (A.getOption().getID()) {
+ case OPT_cas_dump:
+ return CommandKind::Dump;
+ case OPT_cat_node_data:
+ return CommandKind::CatNodeData;
+ case OPT_make_blob:
+ return CommandKind::MakeBlob;
+ case OPT_make_node:
+ return CommandKind::MakeNode;
+ case OPT_ls_node_refs:
+ return CommandKind::ListObjectReferences;
+ case OPT_import:
+ return CommandKind::Import;
+ case OPT_put_cache_key:
+ return CommandKind::PutCacheKey;
+ case OPT_get_cache_result:
+ return CommandKind::GetCacheResult;
+ case OPT_validate:
+ return CommandKind::Validate;
+ case OPT_validate_object:
+ return CommandKind::ValidateObject;
+ case OPT_validate_if_needed:
+ return CommandKind::ValidateIfNeeded;
+ case OPT_prune:
+ return CommandKind::Prune;
+ }
+ return CommandKind::Invalid;
+ }
+
+ // Command requires input.
+ static bool requiresInput(CommandKind Kind) {
+ return Kind != CommandKind::ValidateIfNeeded &&
+ Kind != CommandKind::Validate && Kind != CommandKind::MakeBlob &&
+ Kind != CommandKind::MakeNode && Kind != CommandKind::Dump &&
+ Kind != CommandKind::Prune;
+ }
+};
+} // namespace
+
+static int dump(ObjectStore &CAS);
+static int listObjectReferences(ObjectStore &CAS, const CASID &ID);
+static int catNodeData(ObjectStore &CAS, const CASID &ID);
+static int makeBlob(ObjectStore &CAS, StringRef DataPath);
+static int makeNode(ObjectStore &CAS, ArrayRef<std::string> References,
+ StringRef DataPath);
+static int import(ObjectStore &FromCAS, ObjectStore &ToCAS,
+ ArrayRef<std::string> Objects);
+static int putCacheKey(ObjectStore &CAS, ActionCache &AC,
+ ArrayRef<std::string> Objects);
+static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID);
+static int validateObject(ObjectStore &CAS, const CASID &ID);
+static int validate(ObjectStore &CAS, ActionCache &AC, bool CheckHash);
+static int validateIfNeeded(StringRef Path, bool CheckHash, bool Force,
+ bool AllowRecovery, bool InProcess,
+ const char *Argv0);
+static int prune(cas::ObjectStore &CAS);
+
+static Expected<CommandOptions> parseOptions(int Argc, char **Argv) {
+ BumpPtrAllocator Alloc;
+ StringSaver Saver(Alloc);
+ SmallVector<const char *> ExpanedArgs;
+ if (!cl::expandResponseFiles(Argc, Argv, nullptr, Saver, ExpanedArgs))
+ return createStringError("cannot expand response file");
+
+ LLVMCASOptTable T;
+ unsigned MI, MC;
+ opt::InputArgList Args = T.ParseArgs(ExpanedArgs, MI, MC);
+
+ for (auto *Arg : Args.filtered(OPT_UNKNOWN)) {
+ llvm::errs() << "ignoring unknown option: " << Arg->getSpelling() << '\n';
+ }
+
+ if (Args.hasArg(OPT_help)) {
+ T.printHelp(
+ outs(),
+ (std::string(Argv[0]) + " [action] [options] <input files>").c_str(),
+ "llvm-cas tool that performs CAS actions.", false);
+ exit(0);
+ }
+
+ CommandOptions Opts;
+ for (auto *A : Args.filtered(OPT_grp_action))
+ Opts.Command = CommandOptions::getCommandKind(*A);
+
+ if (Opts.Command == CommandKind::Invalid)
+ return createStringError("no command action is specified");
+
+ for (auto *File : Args.filtered(OPT_INPUT))
+ Opts.Inputs.push_back(File->getValue());
+ Opts.CASPath = Args.getLastArgValue(OPT_cas_path);
+ Opts.UpstreamCASPath = Args.getLastArgValue(OPT_upstream_cas);
+ Opts.DataPath = Args.getLastArgValue(OPT_data);
+ Opts.CheckHash = Args.hasArg(OPT_check_hash);
+ Opts.AllowRecovery = Args.hasArg(OPT_allow_recovery);
+ Opts.Force = Args.hasArg(OPT_force);
+ Opts.InProcess = Args.hasArg(OPT_in_process);
+
+ // Validate options.
+ if (Opts.CASPath.empty())
+ return createStringError("missing --cas <path>");
+
+ if (Opts.Inputs.empty() && CommandOptions::requiresInput(Opts.Command))
+ return createStringError("missing <input> to operate on");
+
+ return Opts;
+}
+
+int main(int Argc, char **Argv) {
+ InitLLVM X(Argc, Argv);
+
+ ExitOnError ExitOnErr;
+ auto Opts = ExitOnErr(parseOptions(Argc, Argv));
+
+ if (Opts.Command == CommandKind::ValidateIfNeeded)
+ return validateIfNeeded(Opts.CASPath, Opts.CheckHash, Opts.Force,
+ Opts.AllowRecovery, Opts.InProcess, Argv[0]);
+
+ auto [CAS, AC] = ExitOnErr(createOnDiskUnifiedCASDatabases(Opts.CASPath));
+ assert(CAS);
+
+ if (Opts.Command == CommandKind::Dump)
+ return dump(*CAS);
+
+ if (Opts.Command == CommandKind::Validate)
+ return validate(*CAS, *AC, Opts.CheckHash);
+
+ if (Opts.Command == CommandKind::MakeBlob)
+ return makeBlob(*CAS, Opts.DataPath);
+
+ if (Opts.Command == CommandKind::MakeNode)
+ return makeNode(*CAS, Opts.Inputs, Opts.DataPath);
+
+ if (Opts.Command == CommandKind::Prune)
+ return prune(*CAS);
+
+ if (Opts.Command == CommandKind::Import) {
+ if (Opts.UpstreamCASPath.empty())
+ ExitOnErr(createStringError("missing '-upstream-cas'"));
+
+ auto [UpstreamCAS, _] =
+ ExitOnErr(createOnDiskUnifiedCASDatabases(Opts.UpstreamCASPath));
+ return import(*UpstreamCAS, *CAS, Opts.Inputs);
+ }
+
+ if (Opts.Command == CommandKind::PutCacheKey ||
+ Opts.Command == CommandKind::GetCacheResult) {
+ if (!AC)
+ ExitOnErr(createStringError("no action-cache available"));
+ }
+
+ if (Opts.Command == CommandKind::PutCacheKey)
+ return putCacheKey(*CAS, *AC, Opts.Inputs);
+
+ // Remaining commands need exactly one CAS object.
+ if (Opts.Inputs.size() > 1)
+ ExitOnErr(createStringError("too many <object>s, expected 1"));
+ CASID ID = ExitOnErr(CAS->parseID(Opts.Inputs.front()));
+
+ if (Opts.Command == CommandKind::GetCacheResult)
+ return getCacheResult(*CAS, *AC, ID);
+
+ if (Opts.Command == CommandKind::ListObjectReferences)
+ return listObjectReferences(*CAS, ID);
+
+ if (Opts.Command == CommandKind::CatNodeData)
+ return catNodeData(*CAS, ID);
+
+ assert(Opts.Command == CommandKind::ValidateObject);
+ return validateObject(*CAS, ID);
+}
+
+static Expected<std::unique_ptr<MemoryBuffer>> openBuffer(StringRef DataPath) {
+ if (DataPath.empty())
+ return createStringError("--data missing");
+ return errorOrToExpected(DataPath == "-"
+ ? llvm::MemoryBuffer::getSTDIN()
+ : llvm::MemoryBuffer::getFile(DataPath));
+}
+
+int dump(ObjectStore &CAS) {
+ ExitOnError ExitOnErr("llvm-cas: dump: ");
+ CAS.print(llvm::outs());
+ return 0;
+}
+
+int makeBlob(ObjectStore &CAS, StringRef DataPath) {
+ ExitOnError ExitOnErr("llvm-cas: make-blob: ");
+ std::unique_ptr<MemoryBuffer> Buffer = ExitOnErr(openBuffer(DataPath));
+
+ ObjectProxy Blob = ExitOnErr(CAS.createProxy({}, Buffer->getBuffer()));
+ llvm::outs() << Blob.getID() << "\n";
+ return 0;
+}
+
+int catNodeData(ObjectStore &CAS, const CASID &ID) {
+ ExitOnError ExitOnErr("llvm-cas: cat-node-data: ");
+ llvm::outs() << ExitOnErr(CAS.getProxy(ID)).getData();
+ return 0;
+}
+
+int listObjectReferences(ObjectStore &CAS, const CASID &ID) {
+ ExitOnError ExitOnErr("llvm-cas: ls-node-refs: ");
+
+ ObjectProxy Object = ExitOnErr(CAS.getProxy(ID));
+ ExitOnErr(Object.forEachReference([&](ObjectRef Ref) -> Error {
+ llvm::outs() << CAS.getID(Ref) << "\n";
+ return Error::success();
+ }));
+
+ return 0;
+}
+
+static int makeNode(ObjectStore &CAS, ArrayRef<std::string> Objects,
+ StringRef DataPath) {
+ std::unique_ptr<MemoryBuffer> Data =
+ ExitOnError("llvm-cas: make-node: data: ")(openBuffer(DataPath));
+
+ SmallVector<ObjectRef> IDs;
+ for (StringRef Object : Objects) {
+ ExitOnError ObjectErr("llvm-cas: make-node: ref: ");
+ std::optional<ObjectRef> ID =
+ CAS.getReference(ObjectErr(CAS.parseID(Object)));
+ if (!ID)
+ ObjectErr(createStringError("unknown object '" + Object + "'"));
+ IDs.push_back(*ID);
+ }
+
+ ExitOnError ExitOnErr("llvm-cas: make-node: ");
+ ObjectProxy Object = ExitOnErr(CAS.createProxy(IDs, Data->getBuffer()));
+ llvm::outs() << Object.getID() << "\n";
+ return 0;
+}
+
+static int import(ObjectStore &FromCAS, ObjectStore &ToCAS,
+ ArrayRef<std::string> Objects) {
+ ExitOnError ExitOnErr("llvm-cas: import: ");
+
+ for (StringRef Object : Objects) {
+ CASID ID = ExitOnErr(FromCAS.parseID(Object));
+ auto Ref = FromCAS.getReference(ID);
+ if (!Ref)
+ ExitOnErr(createStringError("input not found: " + ID.toString()));
+
+ auto Imported = ExitOnErr(ToCAS.importObject(FromCAS, *Ref));
+ llvm::outs() << ToCAS.getID(Imported).toString() << "\n";
+ }
+ return 0;
+}
+
+static int putCacheKey(ObjectStore &CAS, ActionCache &AC,
+ ArrayRef<std::string> Objects) {
+ ExitOnError ExitOnErr("llvm-cas: put-cache-key: ");
+
+ if (Objects.size() % 2 != 0)
+ ExitOnErr(createStringError("expected pairs of inputs"));
+ while (!Objects.empty()) {
+ CASID Key = ExitOnErr(CAS.parseID(Objects[0]));
+ CASID Result = ExitOnErr(CAS.parseID(Objects[1]));
+ Objects = Objects.drop_front(2);
+ ExitOnErr(AC.put(Key, Result));
+ }
+ return 0;
+}
+
+static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID) {
+ ExitOnError ExitOnErr("llvm-cas: get-cache-result: ");
+
+ auto Result = ExitOnErr(AC.get(ID));
+ if (!Result) {
+ outs() << "result not found\n";
+ return 1;
+ }
+ outs() << *Result << "\n";
+ return 0;
+}
+
+int validateObject(ObjectStore &CAS, const CASID &ID) {
+ ExitOnError ExitOnErr("llvm-cas: validate-object: ");
+ ExitOnErr(CAS.validateObject(ID));
+ outs() << ID << ": validated successfully\n";
+ return 0;
+}
+
+int validate(ObjectStore &CAS, ActionCache &AC, bool CheckHash) {
+ ExitOnError ExitOnErr("llvm-cas: validate: ");
+ ExitOnErr(CAS.validate(CheckHash));
+ ExitOnErr(AC.validate());
+ outs() << "validated successfully\n";
+ return 0;
+}
+
+int validateIfNeeded(StringRef Path, bool CheckHash, bool Force,
+ bool AllowRecovery, bool InProcess, const char *Argv0) {
+ ExitOnError ExitOnErr("llvm-cas: validate-if-needed: ");
+ std::string ExecStorage;
+ std::optional<StringRef> Exec;
+ if (!InProcess) {
+ ExecStorage = sys::fs::getMainExecutable(Argv0, (void *)validateIfNeeded);
+ Exec = ExecStorage;
+ }
+ ValidationResult Result = ExitOnErr(validateOnDiskUnifiedCASDatabasesIfNeeded(
+ Path, CheckHash, AllowRecovery, Force, Exec));
+ switch (Result) {
+ case ValidationResult::Valid:
+ outs() << "validated successfully\n";
+ break;
+ case ValidationResult::Recovered:
+ outs() << "recovered from invalid data\n";
+ break;
+ case ValidationResult::Skipped:
+ outs() << "validation skipped\n";
+ break;
+ }
+ return 0;
+}
+
+static int prune(cas::ObjectStore &CAS) {
+ ExitOnError ExitOnErr("llvm-cas: prune: ");
+ ExitOnErr(CAS.pruneStorageData());
+ return 0;
+}
diff --git a/llvm/unittests/IR/PatternMatch.cpp b/llvm/unittests/IR/PatternMatch.cpp
index 972dac82d333..1142c559c97f 100644
--- a/llvm/unittests/IR/PatternMatch.cpp
+++ b/llvm/unittests/IR/PatternMatch.cpp
@@ -2657,4 +2657,31 @@ TEST_F(PatternMatchTest, ShiftOrSelf) {
EXPECT_EQ(ShAmtC, 0U);
}
+TEST_F(PatternMatchTest, CommutativeDeferredIntrinsicMatch) {
+ Value *X = ConstantFP::get(IRB.getDoubleTy(), 1.0);
+ Value *Y = ConstantFP::get(IRB.getDoubleTy(), 2.0);
+
+ auto CheckMatch = [X, Y](Value *Pattern) {
+ Value *tX = nullptr, *tY = nullptr;
+ EXPECT_TRUE(
+ match(Pattern, m_c_Intrinsic<Intrinsic::minimum>(
+ m_Value(tX), m_c_Intrinsic<Intrinsic::minimum>(
+ m_Deferred(tX), m_Value(tY)))));
+ EXPECT_EQ(tX, X);
+ EXPECT_EQ(tY, Y);
+ };
+ CheckMatch(IRB.CreateBinaryIntrinsic(
+ Intrinsic::minimum, X,
+ IRB.CreateBinaryIntrinsic(Intrinsic::minimum, X, Y)));
+ CheckMatch(IRB.CreateBinaryIntrinsic(
+ Intrinsic::minimum, X,
+ IRB.CreateBinaryIntrinsic(Intrinsic::minimum, Y, X)));
+ CheckMatch(IRB.CreateBinaryIntrinsic(
+ Intrinsic::minimum, IRB.CreateBinaryIntrinsic(Intrinsic::minimum, X, Y),
+ X));
+ CheckMatch(IRB.CreateBinaryIntrinsic(
+ Intrinsic::minimum, IRB.CreateBinaryIntrinsic(Intrinsic::minimum, Y, X),
+ X));
+}
+
} // anonymous namespace.
diff --git a/llvm/unittests/Option/OptionSubCommandsTest.cpp b/llvm/unittests/Option/OptionSubCommandsTest.cpp
index e31a3262f135..d4744c90b006 100644
--- a/llvm/unittests/Option/OptionSubCommandsTest.cpp
+++ b/llvm/unittests/Option/OptionSubCommandsTest.cpp
@@ -192,6 +192,19 @@ TYPED_TEST(OptSubCommandTableTest, SubCommandParsing) {
std::string::npos,
ErrMsg.find("Option [lowercase] is not valid for SubCommand [bar]"));
}
+
+ {
+ // Test case 7: Check valid use of a valid subcommand following more
+ // positional arguments.
+ const char *Args[] = {"bar", "input"};
+ InputArgList AL = T.ParseArgs(Args, MAI, MAC);
+ StringRef SC = AL.getSubCommand(
+ T.getSubCommands(), HandleMultipleSubcommands, HandleOtherPositionals);
+ EXPECT_EQ(SC, "bar"); // valid subcommand
+ EXPECT_NE(std::string::npos,
+ ErrMsg.find("Unregistered positionals passed"));
+ EXPECT_NE(std::string::npos, ErrMsg.find("input"));
+ }
}
TYPED_TEST(OptSubCommandTableTest, SubCommandHelp) {
diff --git a/llvm/unittests/Support/ThreadPool.cpp b/llvm/unittests/Support/ThreadPool.cpp
index aa7f8744e141..b5268c82e419 100644
--- a/llvm/unittests/Support/ThreadPool.cpp
+++ b/llvm/unittests/Support/ThreadPool.cpp
@@ -183,6 +183,20 @@ TYPED_TEST(ThreadPoolTest, Async) {
ASSERT_EQ(2, i.load());
}
+TYPED_TEST(ThreadPoolTest, AsyncMoveOnly) {
+ CHECK_UNSUPPORTED();
+ DefaultThreadPool Pool;
+ std::promise<int> p;
+ std::future<int> f = p.get_future();
+ Pool.async([this, p = std::move(p)]() mutable {
+ this->waitForMainThread();
+ p.set_value(42);
+ });
+ this->setMainThreadReady();
+ Pool.wait();
+ ASSERT_EQ(42, f.get());
+}
+
TYPED_TEST(ThreadPoolTest, GetFuture) {
CHECK_UNSUPPORTED();
DefaultThreadPool Pool(hardware_concurrency(2));
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index c256eae5dcdc..82ecc16074a8 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -969,16 +969,40 @@ compound=true
#endif
using VPRecipeTest = VPlanTestBase;
+
+namespace {
+template <typename RecipeT, typename T, typename... Rest>
+void checkVPRecipeCastImpl(RecipeT *R) {
+ // Direct checks on recipe pointer
+ EXPECT_TRUE(isa<T>(R));
+ EXPECT_EQ(R, dyn_cast<T>(R));
+ (void)cast<T>(R); // Verify cast succeeds (asserts on failure)
+
+ // Check through base pointer
+ VPRecipeBase *BaseR = R;
+ EXPECT_TRUE(isa<T>(BaseR));
+ EXPECT_EQ(R, dyn_cast<T>(BaseR));
+ (void)cast<T>(BaseR);
+
+ // Check through const base pointer
+ const VPRecipeBase *ConstBaseR = R;
+ EXPECT_TRUE(isa<T>(ConstBaseR));
+ EXPECT_EQ(R, dyn_cast<T>(ConstBaseR));
+ (void)cast<T>(ConstBaseR);
+
+ if constexpr (sizeof...(Rest) > 0)
+ checkVPRecipeCastImpl<RecipeT, Rest...>(R);
+}
+} // namespace
+
TEST_F(VPRecipeTest, CastVPInstructionToVPUser) {
IntegerType *Int32 = IntegerType::get(C, 32);
VPlan &Plan = getPlan();
VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPInstruction Recipe(Instruction::Add, {Op1, Op2});
- EXPECT_TRUE(isa<VPUser>(&Recipe));
- VPRecipeBase *BaseR = &Recipe;
- EXPECT_TRUE(isa<VPUser>(BaseR));
- EXPECT_EQ(&Recipe, BaseR);
+
+ checkVPRecipeCastImpl<VPInstruction, VPUser>(&Recipe);
}
TEST_F(VPRecipeTest, CastVPWidenRecipeToVPUser) {
@@ -992,10 +1016,8 @@ TEST_F(VPRecipeTest, CastVPWidenRecipeToVPUser) {
Args.push_back(Op1);
Args.push_back(Op2);
VPWidenRecipe WidenR(*AI, make_range(Args.begin(), Args.end()));
- EXPECT_TRUE(isa<VPUser>(&WidenR));
- VPRecipeBase *WidenRBase = &WidenR;
- EXPECT_TRUE(isa<VPUser>(WidenRBase));
- EXPECT_EQ(&WidenR, WidenRBase);
+
+ checkVPRecipeCastImpl<VPWidenRecipe, VPUser>(&WidenR);
delete AI;
}
@@ -1013,10 +1035,8 @@ TEST_F(VPRecipeTest, CastVPWidenCallRecipeToVPUserAndVPDef) {
Args.push_back(Op2);
Args.push_back(CalledFn);
VPWidenCallRecipe Recipe(Call, Fn, Args);
- EXPECT_TRUE(isa<VPUser>(&Recipe));
- VPRecipeBase *BaseR = &Recipe;
- EXPECT_TRUE(isa<VPUser>(BaseR));
- EXPECT_EQ(&Recipe, BaseR);
+
+ checkVPRecipeCastImpl<VPWidenCallRecipe, VPUser>(&Recipe);
VPValue *VPV = &Recipe;
EXPECT_TRUE(VPV->getDefiningRecipe());
@@ -1041,13 +1061,10 @@ TEST_F(VPRecipeTest, CastVPWidenSelectRecipeToVPUserAndVPDef) {
Args.push_back(Op3);
VPWidenSelectRecipe WidenSelectR(*SelectI,
make_range(Args.begin(), Args.end()));
- EXPECT_TRUE(isa<VPUser>(&WidenSelectR));
- VPRecipeBase *BaseR = &WidenSelectR;
- EXPECT_TRUE(isa<VPUser>(BaseR));
- EXPECT_EQ(&WidenSelectR, BaseR);
+
+ checkVPRecipeCastImpl<VPWidenSelectRecipe, VPUser>(&WidenSelectR);
VPValue *VPV = &WidenSelectR;
- EXPECT_TRUE(isa<VPRecipeBase>(VPV->getDefiningRecipe()));
EXPECT_EQ(&WidenSelectR, VPV->getDefiningRecipe());
delete SelectI;
@@ -1065,10 +1082,8 @@ TEST_F(VPRecipeTest, CastVPWidenGEPRecipeToVPUserAndVPDef) {
Args.push_back(Op1);
Args.push_back(Op2);
VPWidenGEPRecipe Recipe(GEP, make_range(Args.begin(), Args.end()));
- EXPECT_TRUE(isa<VPUser>(&Recipe));
- VPRecipeBase *BaseR = &Recipe;
- EXPECT_TRUE(isa<VPUser>(BaseR));
- EXPECT_EQ(&Recipe, BaseR);
+
+ checkVPRecipeCastImpl<VPWidenGEPRecipe, VPUser>(&Recipe);
VPValue *VPV = &Recipe;
EXPECT_TRUE(isa<VPRecipeBase>(VPV->getDefiningRecipe()));
@@ -1077,6 +1092,28 @@ TEST_F(VPRecipeTest, CastVPWidenGEPRecipeToVPUserAndVPDef) {
delete GEP;
}
+TEST_F(VPRecipeTest, CastVPWidenCastRecipeToVPUser) {
+ VPlan &Plan = getPlan();
+ IntegerType *Int32 = IntegerType::get(C, 32);
+ IntegerType *Int64 = IntegerType::get(C, 64);
+ auto *Cast = CastInst::CreateZExtOrBitCast(PoisonValue::get(Int32), Int64);
+ VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
+ VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, *Cast);
+
+ checkVPRecipeCastImpl<VPWidenCastRecipe, VPUser>(&Recipe);
+ delete Cast;
+}
+
+TEST_F(VPRecipeTest, CastVPWidenIntrinsicRecipeToVPUser) {
+ VPlan &Plan = getPlan();
+ IntegerType *Int32 = IntegerType::get(C, 32);
+ VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
+ VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
+ VPWidenIntrinsicRecipe Recipe(Intrinsic::smax, {Op1, Op2}, Int32);
+
+ checkVPRecipeCastImpl<VPWidenIntrinsicRecipe, VPUser>(&Recipe);
+}
+
TEST_F(VPRecipeTest, CastVPBlendRecipeToVPUser) {
VPlan &Plan = getPlan();
IntegerType *Int32 = IntegerType::get(C, 32);
@@ -1090,9 +1127,9 @@ TEST_F(VPRecipeTest, CastVPBlendRecipeToVPUser) {
Args.push_back(I2);
Args.push_back(M2);
VPBlendRecipe Recipe(Phi, Args, {});
- EXPECT_TRUE(isa<VPUser>(&Recipe));
- VPRecipeBase *BaseR = &Recipe;
- EXPECT_TRUE(isa<VPUser>(BaseR));
+
+ checkVPRecipeCastImpl<VPBlendRecipe, VPUser>(&Recipe);
+
delete Phi;
}
@@ -1103,10 +1140,8 @@ TEST_F(VPRecipeTest, CastVPInterleaveRecipeToVPUser) {
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
InterleaveGroup<Instruction> IG(4, false, Align(4));
VPInterleaveRecipe Recipe(&IG, Addr, {}, Mask, false, {}, DebugLoc());
- EXPECT_TRUE(isa<VPUser>(&Recipe));
- VPRecipeBase *BaseR = &Recipe;
- EXPECT_TRUE(isa<VPUser>(BaseR));
- EXPECT_EQ(&Recipe, BaseR);
+
+ checkVPRecipeCastImpl<VPInterleaveRecipe, VPUser>(&Recipe);
}
TEST_F(VPRecipeTest, CastVPReplicateRecipeToVPUser) {
@@ -1121,9 +1156,9 @@ TEST_F(VPRecipeTest, CastVPReplicateRecipeToVPUser) {
FunctionType *FTy = FunctionType::get(Int32, false);
auto *Call = CallInst::Create(FTy, PoisonValue::get(FTy));
VPReplicateRecipe Recipe(Call, make_range(Args.begin(), Args.end()), true);
- EXPECT_TRUE(isa<VPUser>(&Recipe));
- VPRecipeBase *BaseR = &Recipe;
- EXPECT_TRUE(isa<VPUser>(BaseR));
+
+ checkVPRecipeCastImpl<VPReplicateRecipe, VPUser>(&Recipe);
+
delete Call;
}
@@ -1132,10 +1167,8 @@ TEST_F(VPRecipeTest, CastVPBranchOnMaskRecipeToVPUser) {
IntegerType *Int32 = IntegerType::get(C, 32);
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPBranchOnMaskRecipe Recipe(Mask, {});
- EXPECT_TRUE(isa<VPUser>(&Recipe));
- VPRecipeBase *BaseR = &Recipe;
- EXPECT_TRUE(isa<VPUser>(BaseR));
- EXPECT_EQ(&Recipe, BaseR);
+
+ checkVPRecipeCastImpl<VPBranchOnMaskRecipe, VPUser>(&Recipe);
}
TEST_F(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) {
@@ -1147,10 +1180,8 @@ TEST_F(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) {
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {}, {});
- EXPECT_TRUE(isa<VPUser>(&Recipe));
- VPRecipeBase *BaseR = &Recipe;
- EXPECT_TRUE(isa<VPUser>(BaseR));
- EXPECT_EQ(&Recipe, BaseR);
+
+ checkVPRecipeCastImpl<VPWidenLoadRecipe, VPUser>(&Recipe);
VPValue *VPV = Recipe.getVPSingleValue();
EXPECT_TRUE(isa<VPRecipeBase>(VPV->getDefiningRecipe()));
@@ -1159,6 +1190,71 @@ TEST_F(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) {
delete Load;
}
+TEST_F(VPRecipeTest, CastVPInterleaveEVLRecipeToVPUser) {
+ VPlan &Plan = getPlan();
+ IntegerType *Int32 = IntegerType::get(C, 32);
+ VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
+ VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
+ VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 8));
+ InterleaveGroup<Instruction> IG(4, false, Align(4));
+ VPInterleaveRecipe BaseRecipe(&IG, Addr, {}, Mask, false, {}, DebugLoc());
+ VPInterleaveEVLRecipe Recipe(BaseRecipe, *EVL, Mask);
+
+ checkVPRecipeCastImpl<VPInterleaveEVLRecipe, VPUser>(&Recipe);
+}
+
+TEST_F(VPRecipeTest, CastVPWidenLoadEVLRecipeToVPUser) {
+ VPlan &Plan = getPlan();
+ IntegerType *Int32 = IntegerType::get(C, 32);
+ PointerType *Int32Ptr = PointerType::get(C, 0);
+ auto *Load =
+ new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1));
+ VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
+ VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
+ VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 8));
+ VPWidenLoadRecipe BaseLoad(*Load, Addr, Mask, true, false, {}, {});
+ VPWidenLoadEVLRecipe Recipe(BaseLoad, Addr, *EVL, Mask);
+
+ checkVPRecipeCastImpl<VPWidenLoadEVLRecipe, VPUser>(&Recipe);
+
+ delete Load;
+}
+
+TEST_F(VPRecipeTest, CastVPWidenStoreRecipeToVPUser) {
+ VPlan &Plan = getPlan();
+ IntegerType *Int32 = IntegerType::get(C, 32);
+ PointerType *Int32Ptr = PointerType::get(C, 0);
+ auto *Store = new StoreInst(PoisonValue::get(Int32),
+ PoisonValue::get(Int32Ptr), false, Align(1));
+ VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
+ VPValue *StoredVal = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 42));
+ VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
+ VPWidenStoreRecipe Recipe(*Store, Addr, StoredVal, Mask, true, false, {}, {});
+
+ checkVPRecipeCastImpl<VPWidenStoreRecipe, VPUser>(&Recipe);
+
+ delete Store;
+}
+
+TEST_F(VPRecipeTest, CastVPWidenStoreEVLRecipeToVPUser) {
+ VPlan &Plan = getPlan();
+ IntegerType *Int32 = IntegerType::get(C, 32);
+ PointerType *Int32Ptr = PointerType::get(C, 0);
+ auto *Store = new StoreInst(PoisonValue::get(Int32),
+ PoisonValue::get(Int32Ptr), false, Align(1));
+ VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
+ VPValue *StoredVal = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 42));
+ VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 8));
+ VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
+ VPWidenStoreRecipe BaseStore(*Store, Addr, StoredVal, Mask, true, false, {},
+ {});
+ VPWidenStoreEVLRecipe Recipe(BaseStore, Addr, *EVL, Mask);
+
+ checkVPRecipeCastImpl<VPWidenStoreEVLRecipe, VPUser>(&Recipe);
+
+ delete Store;
+}
+
TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
IntegerType *Int1 = IntegerType::get(C, 1);
IntegerType *Int32 = IntegerType::get(C, 32);
@@ -1606,9 +1702,7 @@ TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) {
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
CondOp, VecOp, false);
- EXPECT_TRUE(isa<VPUser>(&Recipe));
- VPRecipeBase *BaseR = &Recipe;
- EXPECT_TRUE(isa<VPUser>(BaseR));
+ checkVPRecipeCastImpl<VPReductionRecipe, VPUser>(&Recipe);
delete Add;
}
@@ -1623,9 +1717,7 @@ TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) {
CondOp, VecOp, false);
VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
- EXPECT_TRUE(isa<VPUser>(&EVLRecipe));
- VPRecipeBase *BaseR = &EVLRecipe;
- EXPECT_TRUE(isa<VPUser>(BaseR));
+ checkVPRecipeCastImpl<VPReductionEVLRecipe, VPUser>(&EVLRecipe);
delete Add;
}
} // namespace
diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index 9e0b9513a9a1..27bd2ce9849f 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -336,6 +336,7 @@ if (current_toolchain == default_toolchain) {
"__chrono/gps_clock.h",
"__chrono/hh_mm_ss.h",
"__chrono/high_resolution_clock.h",
+ "__chrono/is_clock.h",
"__chrono/leap_second.h",
"__chrono/literals.h",
"__chrono/local_info.h",
diff --git a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
index 783eb9628359..679373d74166 100644
--- a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
@@ -72,6 +72,7 @@ static_library("Target") {
"Statistics.cpp",
"StopInfo.cpp",
"StructuredDataPlugin.cpp",
+ "SyntheticFrameProvider.cpp",
"SystemRuntime.cpp",
"Target.cpp",
"TargetList.cpp",
diff --git a/mlir/cmake/modules/AddMLIRPython.cmake b/mlir/cmake/modules/AddMLIRPython.cmake
index fa6aec8a603a..8196e2a2a332 100644
--- a/mlir/cmake/modules/AddMLIRPython.cmake
+++ b/mlir/cmake/modules/AddMLIRPython.cmake
@@ -791,7 +791,6 @@ function(add_mlir_python_extension libname extname)
get_property(NB_LIBRARY_TARGET_NAME TARGET ${libname} PROPERTY LINK_LIBRARIES)
target_compile_options(${NB_LIBRARY_TARGET_NAME}
PRIVATE
- -Wall -Wextra -Wpedantic
-Wno-c++98-compat-extra-semi
-Wno-cast-qual
-Wno-covered-switch-default
@@ -799,11 +798,11 @@ function(add_mlir_python_extension libname extname)
-Wno-nested-anon-types
-Wno-unused-parameter
-Wno-zero-length-array
+ -Wno-missing-field-initializers
${eh_rtti_enable})
target_compile_options(${libname}
PRIVATE
- -Wall -Wextra -Wpedantic
-Wno-c++98-compat-extra-semi
-Wno-cast-qual
-Wno-covered-switch-default
@@ -811,6 +810,7 @@ function(add_mlir_python_extension libname extname)
-Wno-nested-anon-types
-Wno-unused-parameter
-Wno-zero-length-array
+ -Wno-missing-field-initializers
${eh_rtti_enable})
endif()
diff --git a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h
index cad6cec761ab..e7ab63abfeaa 100644
--- a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h
+++ b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h
@@ -86,7 +86,8 @@ private:
/// ArrayRef<NamedAttribute>.
template <typename SourceOp, typename TargetOp,
template <typename, typename> typename AttrConvert =
- AttrConvertPassThrough>
+ AttrConvertPassThrough,
+ bool FailOnUnsupportedFP = false>
class VectorConvertToLLVMPattern : public ConvertOpToLLVMPattern<SourceOp> {
public:
using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
@@ -123,11 +124,13 @@ public:
"unsupported floating point type");
return success();
};
- for (Value operand : op->getOperands())
- if (failed(checkType(operand)))
+ if (FailOnUnsupportedFP) {
+ for (Value operand : op->getOperands())
+ if (failed(checkType(operand)))
+ return failure();
+ if (failed(checkType(op->getResult(0))))
return failure();
- if (failed(checkType(op->getResult(0))))
- return failure();
+ }
// Determine attributes for the target op
AttrConvert<SourceOp, TargetOp> attrConvert(op);
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 80bc0e5986e5..1cc5b74a3cb6 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1236,6 +1236,23 @@ def NVVM_FenceProxyAcquireOp : NVVM_Op<"fence.proxy.acquire">,
let hasVerifier = 1;
}
+def NVVM_MembarOp : NVVM_Op<"memory.barrier">,
+ Arguments<(ins MemScopeKindAttr:$scope)> {
+ let summary = "Memory barrier operation";
+ let description = [{
+ `membar` operation guarantees that prior memory accesses requested by this
+ thread are performed at the specified `scope`, before later memory
+ operations requested by this thread following the membar instruction.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar)
+ }];
+
+ let assemblyFormat = "$scope attr-dict";
+ let llvmBuilder = [{
+ createIntrinsicCall(builder, getMembarIntrinsicID($scope), {});
+ }];
+}
+
def NVVM_FenceProxyReleaseOp : NVVM_Op<"fence.proxy.release">,
Arguments<(ins MemScopeKindAttr:$scope,
DefaultValuedAttr<ProxyKindAttr,
diff --git a/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td
index 2dd612139fa2..388efaaa2511 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td
@@ -463,10 +463,9 @@ def XeVM_PrefetchOp
def XeVM_BlockPrefetch2dOp
: XeVM_Op<"blockprefetch2d">,
- Arguments<(ins Arg<LLVM_AnyPointer, "", [MemRead]>:$ptr, I32:$base_width,
- I32:$base_height, I32:$base_pitch, I32:$x, I32:$y,
- I32Attr:$elem_size_in_bits, I32Attr:$tile_width, I32Attr:$tile_height,
- I32Attr:$v_blocks,
+ Arguments<(ins LLVM_AnyPointer:$ptr, I32:$base_width, I32:$base_height,
+ I32:$base_pitch, I32:$x, I32:$y, I32Attr:$elem_size_in_bits,
+ I32Attr:$tile_width, I32Attr:$tile_height, I32Attr:$v_blocks,
OptionalAttr<XeVM_LoadCacheControlAttr>:$cache_control)> {
let summary = "2D block prefetch";
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index ecd036d452b2..dfb32a056a4d 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -235,8 +235,7 @@ def TensorOrMemref :
def MapOp : LinalgStructuredBase_Op<"map", [
DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
- DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmBlockArgumentNames"]>,
- SingleBlockImplicitTerminator<"YieldOp">]> {
+ DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmBlockArgumentNames"]>]> {
let summary = "Elementwise operations";
let description = [{
Models elementwise operations on tensors in terms of arithmetic operations
@@ -318,8 +317,7 @@ def MapOp : LinalgStructuredBase_Op<"map", [
def ReduceOp : LinalgStructuredBase_Op<"reduce", [
DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmBlockArgumentNames"]>,
- SameVariadicOperandSize,
- SingleBlockImplicitTerminator<"YieldOp">]> {
+ SameVariadicOperandSize]> {
let summary = "Reduce operator";
let description = [{
Executes `combiner` on the `dimensions` of `inputs` and returns the
@@ -400,8 +398,7 @@ def ReduceOp : LinalgStructuredBase_Op<"reduce", [
//===----------------------------------------------------------------------===//
def TransposeOp : LinalgStructuredBase_Op<"transpose", [
- DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
- SingleBlockImplicitTerminator<"YieldOp">]> {
+ DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
let summary = "Transpose operator";
let description = [{
Permutes the dimensions of `input` according to the given `permutation`.
@@ -477,8 +474,7 @@ def TransposeOp : LinalgStructuredBase_Op<"transpose", [
//===----------------------------------------------------------------------===//
def BroadcastOp : LinalgStructuredBase_Op<"broadcast", [
- DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
- SingleBlockImplicitTerminator<"YieldOp">]> {
+ DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
let summary = "Static broadcast operator";
let description = [{
Broadcast the input into the given shape by adding `dimensions`.
@@ -547,8 +543,9 @@ def BroadcastOp : LinalgStructuredBase_Op<"broadcast", [
//===----------------------------------------------------------------------===//
// Op definition for ElementwiseOp
//===----------------------------------------------------------------------===//
+
def ElementwiseOp : LinalgStructuredBase_Op<"elementwise", [
- AttrSizedOperandSegments]> {
+ AttrSizedOperandSegments]> {
let summary = [{ Performs element-wise operation }];
let description = [{
The attribute `kind` describes arithmetic operation to perform. The
@@ -684,7 +681,6 @@ def ElementwiseOp : LinalgStructuredBase_Op<"elementwise", [
def MatmulOp : LinalgStructuredBase_Op<"matmul", [
AttrSizedOperandSegments,
LinalgContractionOpInterface]> {
-
let summary = [{
Performs a matrix multiplication of two 2D inputs without broadcast or transpose.
}];
@@ -816,8 +812,8 @@ def MatmulOp : LinalgStructuredBase_Op<"matmul", [
//===----------------------------------------------------------------------===//
def ContractOp : LinalgStructuredBase_Op<"contract", [
- AttrSizedOperandSegments,
- LinalgContractionOpInterface]> {
+ AttrSizedOperandSegments,
+ LinalgContractionOpInterface]> {
let summary = [{
Perform a contraction on two inputs, accumulating into the third.
}];
@@ -954,9 +950,9 @@ def ContractOp : LinalgStructuredBase_Op<"contract", [
// Op definition for BatchMatmulOp
//===----------------------------------------------------------------------===//
-def BatchMatmulOp : LinalgStructuredBase_Op<"batch_matmul", !listconcat([AttrSizedOperandSegments],
- /*extraInterfaces=*/[LinalgContractionOpInterface])> {
-
+def BatchMatmulOp : LinalgStructuredBase_Op<"batch_matmul", [
+ AttrSizedOperandSegments,
+ LinalgContractionOpInterface]> {
let summary = [{Performs a batched matrix multiplication of two 3D inputs.}];
let description = [{Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
@@ -1087,7 +1083,6 @@ def BatchMatmulOp : LinalgStructuredBase_Op<"batch_matmul", !listconcat([AttrSiz
def BatchReduceMatmulOp : LinalgStructuredBase_Op<"batch_reduce_matmul", [
AttrSizedOperandSegments,
LinalgContractionOpInterface]> {
-
let summary = [{Performs a batch-reduce matrix multiplication on two inputs.
The partial multiplication results are reduced into a 2D output.}];
let description = [{
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
index 69447f74ec40..b7abcdea10a2 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
@@ -13,6 +13,7 @@
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
#include "mlir/IR/Dialect.h"
+#include "mlir/Interfaces/AlignmentAttrInterface.h"
#include "mlir/Interfaces/CallInterfaces.h"
#include "mlir/Interfaces/CastInterfaces.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
index e00f3c152600..8965302a58c5 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -11,6 +11,7 @@
include "mlir/Dialect/Arith/IR/ArithBase.td"
include "mlir/Dialect/MemRef/IR/MemRefBase.td"
+include "mlir/Interfaces/AlignmentAttrInterface.td"
include "mlir/Interfaces/CastInterfaces.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/InferIntRangeInterface.td"
@@ -65,15 +66,15 @@ class AllocLikeOp<string mnemonic,
list<Trait> traits = []> :
MemRef_Op<mnemonic,
!listconcat([
- AttrSizedOperandSegments
+ AttrSizedOperandSegments,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>
], traits)> {
let arguments = (ins Variadic<Index>:$dynamicSizes,
// The symbolic operands (the ones in square brackets)
// bind to the symbols of the memref's layout map.
Variadic<Index>:$symbolOperands,
- ConfinedAttr<OptionalAttr<I64Attr>,
- [IntMinValue<0>]>:$alignment);
+ OptionalAttr<IntValidAlignment<I64Attr>>:$alignment);
let results = (outs Res<AnyMemRef, "",
[MemAlloc<resource, 0, FullEffect>]>:$memref);
@@ -269,7 +270,8 @@ def MemRef_AllocOp : AllocLikeOp<"alloc", DefaultResource, [
//===----------------------------------------------------------------------===//
-def MemRef_ReallocOp : MemRef_Op<"realloc"> {
+def MemRef_ReallocOp : MemRef_Op<"realloc",
+ [DeclareOpInterfaceMethods<AlignmentAttrOpInterface>]> {
let summary = "memory reallocation operation";
let description = [{
The `realloc` operation changes the size of a memory region. The memory
@@ -335,8 +337,7 @@ def MemRef_ReallocOp : MemRef_Op<"realloc"> {
let arguments = (ins Arg<MemRefRankOf<[AnyType], [1]>, "",
[MemFreeAt<0, FullEffect>]>:$source,
Optional<Index>:$dynamicResultSize,
- ConfinedAttr<OptionalAttr<I64Attr>,
- [IntMinValue<0>]>:$alignment);
+ OptionalAttr<IntValidAlignment<I64Attr>>:$alignment);
let results = (outs Res<MemRefRankOf<[AnyType], [1]>, "",
[MemAlloc<DefaultResource, 1,
@@ -1160,7 +1161,8 @@ def MemRef_GetGlobalOp : MemRef_Op<"get_global",
// GlobalOp
//===----------------------------------------------------------------------===//
-def MemRef_GlobalOp : MemRef_Op<"global", [Symbol]> {
+def MemRef_GlobalOp : MemRef_Op<"global", [Symbol,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>]> {
let summary = "declare or define a global memref variable";
let description = [{
The `memref.global` operation declares or defines a named global memref
@@ -1235,6 +1237,7 @@ def LoadOp : MemRef_Op<"load",
"memref", "result",
"::llvm::cast<MemRefType>($_self).getElementType()">,
MemRefsNormalizable,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>,
DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
DeclareOpInterfaceMethods<PromotableMemOpInterface>,
DeclareOpInterfaceMethods<DestructurableAccessorOpInterface>]> {
@@ -2010,6 +2013,7 @@ def MemRef_StoreOp : MemRef_Op<"store",
"memref", "value",
"::llvm::cast<MemRefType>($_self).getElementType()">,
MemRefsNormalizable,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>,
DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
DeclareOpInterfaceMethods<PromotableMemOpInterface>,
DeclareOpInterfaceMethods<DestructurableAccessorOpInterface>]> {
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index c689b7e46ea9..5b89f741e296 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2184,6 +2184,8 @@ def OpenACC_KernelEnvironmentOp : OpenACC_Op<"kernel_environment",
)
$region attr-dict
}];
+
+ let hasCanonicalizer = 1;
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCooperativeMatrixOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCooperativeMatrixOps.td
index 827ac901d22d..e8124b8b0bed 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCooperativeMatrixOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCooperativeMatrixOps.td
@@ -16,6 +16,8 @@
#ifndef MLIR_DIALECT_SPIRV_IR_COOPERATIVE_MATRIX_OPS
#define MLIR_DIALECT_SPIRV_IR_COOPERATIVE_MATRIX_OPS
+include "mlir/Interfaces/AlignmentAttrInterface.td"
+
//===----------------------------------------------------------------------===//
// SPV_KHR_cooperative_matrix extension ops.
//===----------------------------------------------------------------------===//
@@ -62,7 +64,7 @@ def SPIRV_KHRCooperativeMatrixLengthOp :
// -----
-def SPIRV_KHRCooperativeMatrixLoadOp : SPIRV_KhrVendorOp<"CooperativeMatrixLoad", []> {
+def SPIRV_KHRCooperativeMatrixLoadOp : SPIRV_KhrVendorOp<"CooperativeMatrixLoad", [DeclareOpInterfaceMethods<AlignmentAttrOpInterface>]> {
let summary = "Loads a cooperative matrix through a pointer";
let description = [{
@@ -148,7 +150,7 @@ def SPIRV_KHRCooperativeMatrixLoadOp : SPIRV_KhrVendorOp<"CooperativeMatrixLoad"
// -----
-def SPIRV_KHRCooperativeMatrixStoreOp : SPIRV_KhrVendorOp<"CooperativeMatrixStore", []> {
+def SPIRV_KHRCooperativeMatrixStoreOp : SPIRV_KhrVendorOp<"CooperativeMatrixStore", [DeclareOpInterfaceMethods<AlignmentAttrOpInterface>]> {
let summary = "Stores a cooperative matrix through a pointer";
let description = [{
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td
index 6108decdb970..0b3d70f80bed 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMemoryOps.td
@@ -15,6 +15,8 @@
#define MLIR_DIALECT_SPIRV_IR_MEMORY_OPS
include "mlir/Dialect/SPIRV/IR/SPIRVBase.td"
+include "mlir/Interfaces/AlignmentAttrInterface.td"
+
// -----
@@ -79,7 +81,7 @@ def SPIRV_AccessChainOp : SPIRV_Op<"AccessChain", [Pure]> {
// -----
-def SPIRV_CopyMemoryOp : SPIRV_Op<"CopyMemory", []> {
+def SPIRV_CopyMemoryOp : SPIRV_Op<"CopyMemory", [DeclareOpInterfaceMethods<AlignmentAttrOpInterface>]> {
let summary = [{
Copy from the memory pointed to by Source to the memory pointed to by
Target. Both operands must be non-void pointers and having the same <id>
@@ -182,7 +184,7 @@ def SPIRV_InBoundsPtrAccessChainOp : SPIRV_Op<"InBoundsPtrAccessChain", [Pure]>
// -----
-def SPIRV_LoadOp : SPIRV_Op<"Load", []> {
+def SPIRV_LoadOp : SPIRV_Op<"Load", [DeclareOpInterfaceMethods<AlignmentAttrOpInterface>]> {
let summary = "Load through a pointer.";
let description = [{
@@ -310,7 +312,7 @@ def SPIRV_PtrAccessChainOp : SPIRV_Op<"PtrAccessChain", [Pure]> {
// -----
-def SPIRV_StoreOp : SPIRV_Op<"Store", []> {
+def SPIRV_StoreOp : SPIRV_Op<"Store", [DeclareOpInterfaceMethods<AlignmentAttrOpInterface>]> {
let summary = "Store through a pointer.";
let description = [{
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.h b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.h
index 2676e921c73f..0e1f6e79a367 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.h
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.h
@@ -20,6 +20,7 @@
#include "mlir/Dialect/SPIRV/Interfaces/SPIRVImageInterfaces.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/OpImplementation.h"
+#include "mlir/Interfaces/AlignmentAttrInterface.h"
#include "mlir/Interfaces/CallInterfaces.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/FunctionInterfaces.h"
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
index bbf55f5d507e..b3a0653b9076 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
@@ -23,6 +23,7 @@
#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/PatternMatch.h"
+#include "mlir/Interfaces/AlignmentAttrInterface.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/DestinationStyleOpInterface.h"
#include "mlir/Interfaces/IndexingMapOpInterface.h"
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index 6e15b1e7df60..43172ff2082d 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -19,6 +19,7 @@ include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.td"
include "mlir/Dialect/Vector/Interfaces/MaskingOpInterface.td"
include "mlir/Dialect/Vector/IR/Vector.td"
include "mlir/Dialect/Vector/IR/VectorAttributes.td"
+include "mlir/Interfaces/AlignmentAttrInterface.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/DestinationStyleOpInterface.td"
include "mlir/Interfaces/IndexingMapOpInterface.td"
@@ -1653,7 +1654,8 @@ def Vector_TransferWriteOp :
def Vector_LoadOp : Vector_Op<"load", [
DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>,
- DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>
+ DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>
]> {
let summary = "reads an n-D slice of memory into an n-D vector";
let description = [{
@@ -1770,7 +1772,8 @@ def Vector_LoadOp : Vector_Op<"load", [
def Vector_StoreOp : Vector_Op<"store", [
DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>,
- DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>
+ DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>
]> {
let summary = "writes an n-D vector to an n-D slice of memory";
let description = [{
@@ -1875,7 +1878,10 @@ def Vector_StoreOp : Vector_Op<"store", [
}
def Vector_MaskedLoadOp :
- Vector_Op<"maskedload", [DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>]>,
+ Vector_Op<"maskedload", [
+ DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>
+ ]>,
Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base,
Variadic<Index>:$indices,
VectorOfNonZeroRankOf<[I1]>:$mask,
@@ -1967,7 +1973,10 @@ def Vector_MaskedLoadOp :
}
def Vector_MaskedStoreOp :
- Vector_Op<"maskedstore", [DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>]>,
+ Vector_Op<"maskedstore", [
+ DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>
+ ]>,
Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
Variadic<Index>:$indices,
VectorOfNonZeroRankOf<[I1]>:$mask,
@@ -2048,7 +2057,8 @@ def Vector_GatherOp :
Vector_Op<"gather", [
DeclareOpInterfaceMethods<MaskableOpInterface>,
DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
- DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
+ DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>
]>,
Arguments<(ins Arg<TensorOrMemRef<[AnyType]>, "", [MemRead]>:$base,
Variadic<Index>:$offsets,
@@ -2151,7 +2161,10 @@ def Vector_GatherOp :
}
def Vector_ScatterOp :
- Vector_Op<"scatter", [DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>]>,
+ Vector_Op<"scatter", [
+ DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>
+ ]>,
Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
Variadic<Index>:$offsets,
VectorOfNonZeroRankOf<[AnyInteger, Index]>:$indices,
@@ -2236,7 +2249,10 @@ def Vector_ScatterOp :
}
def Vector_ExpandLoadOp :
- Vector_Op<"expandload", [DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>]>,
+ Vector_Op<"expandload", [
+ DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>
+ ]>,
Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base,
Variadic<Index>:$indices,
FixedVectorOfNonZeroRankOf<[I1]>:$mask,
@@ -2324,7 +2340,10 @@ def Vector_ExpandLoadOp :
}
def Vector_CompressStoreOp :
- Vector_Op<"compressstore", [DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>]>,
+ Vector_Op<"compressstore", [
+ DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
+ DeclareOpInterfaceMethods<AlignmentAttrOpInterface>
+ ]>,
Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
Variadic<Index>:$indices,
FixedVectorOfNonZeroRankOf<[I1]>:$mask,
diff --git a/mlir/include/mlir/Interfaces/AlignmentAttrInterface.h b/mlir/include/mlir/Interfaces/AlignmentAttrInterface.h
new file mode 100644
index 000000000000..5b52c22d4a82
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/AlignmentAttrInterface.h
@@ -0,0 +1,21 @@
+//===- AlignmentAttrInterface.h - Alignment attribute interface -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_ALIGNMENTATTRINTERFACE_H
+#define MLIR_INTERFACES_ALIGNMENTATTRINTERFACE_H
+
+#include "mlir/IR/OpDefinition.h"
+#include "llvm/Support/Alignment.h"
+
+namespace mlir {
+class MLIRContext;
+} // namespace mlir
+
+#include "mlir/Interfaces/AlignmentAttrInterface.h.inc"
+
+#endif // MLIR_INTERFACES_ALIGNMENTATTRINTERFACE_H
diff --git a/mlir/include/mlir/Interfaces/AlignmentAttrInterface.td b/mlir/include/mlir/Interfaces/AlignmentAttrInterface.td
new file mode 100644
index 000000000000..931af6990f40
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/AlignmentAttrInterface.td
@@ -0,0 +1,65 @@
+//===- AlignmentAttrInterface.td - Alignment attribute interface -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an interface for operations that expose an optional
+// alignment attribute.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_ALIGNMENTATTRINTERFACE_TD
+#define MLIR_INTERFACES_ALIGNMENTATTRINTERFACE_TD
+
+include "mlir/IR/OpBase.td"
+
+def AlignmentAttrOpInterface : OpInterface<"AlignmentAttrOpInterface"> {
+ let description = [{
+ An interface for operations that carry an optional alignment attribute and
+ want to expose it as an `llvm::MaybeAlign` helper.
+ }];
+
+ let cppNamespace = "::mlir";
+
+ let methods = [
+ InterfaceMethod<[{
+ Returns the alignment encoded on the operation as an `llvm::MaybeAlign`.
+ Operations providing a differently named accessor can override the
+ default implementation.
+ }],
+ "::llvm::MaybeAlign",
+ "getMaybeAlign",
+ (ins),
+ [{
+ // Defensive: trait implementations are expected to validate power-of-two
+ // alignments, but we still guard against accidental misuse.
+ auto alignmentOpt = $_op.getAlignment();
+ if (!alignmentOpt || *alignmentOpt <= 0)
+ return ::llvm::MaybeAlign();
+ uint64_t value = static_cast<uint64_t>(*alignmentOpt);
+ if (!::llvm::isPowerOf2_64(value))
+ return ::llvm::MaybeAlign();
+ return ::llvm::MaybeAlign(value);
+ }]
+ >
+ ];
+
+ let extraTraitClassDeclaration = [{
+ ::llvm::MaybeAlign getMaybeAlign() {
+ // Defensive: trait implementations are expected to validate power-of-two
+ // alignments, but we still guard against accidental misuse.
+ auto alignmentOpt = (*static_cast<ConcreteOp *>(this)).getAlignment();
+ if (!alignmentOpt || *alignmentOpt <= 0)
+ return ::llvm::MaybeAlign();
+ uint64_t value = static_cast<uint64_t>(*alignmentOpt);
+ if (!::llvm::isPowerOf2_64(value))
+ return ::llvm::MaybeAlign();
+ return ::llvm::MaybeAlign(value);
+ }
+ }];
+}
+
+#endif // MLIR_INTERFACES_ALIGNMENTATTRINTERFACE_TD
diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt
index 72ed046a1ba5..eb96a6886111 100644
--- a/mlir/include/mlir/Interfaces/CMakeLists.txt
+++ b/mlir/include/mlir/Interfaces/CMakeLists.txt
@@ -1,3 +1,4 @@
+add_mlir_interface(AlignmentAttrInterface)
add_mlir_interface(CallInterfaces)
add_mlir_interface(CastInterfaces)
add_mlir_interface(ControlFlowInterfaces)
diff --git a/mlir/include/mlir/TableGen/CodeGenHelpers.h b/mlir/include/mlir/TableGen/CodeGenHelpers.h
index 997aef26bdc0..b56172f55a15 100644
--- a/mlir/include/mlir/TableGen/CodeGenHelpers.h
+++ b/mlir/include/mlir/TableGen/CodeGenHelpers.h
@@ -52,6 +52,15 @@ private:
std::optional<llvm::NamespaceEmitter> nsEmitter;
};
+/// This class represents how an error stream string being constructed will be
+/// consumed.
+enum class ErrorStreamType {
+ // Inside a string that's streamed into an InflightDiagnostic.
+ InString,
+ // Inside a string inside an OpError.
+ InsideOpError,
+};
+
/// This class deduplicates shared operation verification code by emitting
/// static functions alongside the op definitions. These methods are local to
/// the definition file, and are invoked within the operation verify methods.
@@ -192,7 +201,8 @@ private:
/// A generic function to emit constraints
void emitConstraints(const ConstraintMap &constraints, StringRef selfName,
- const char *codeTemplate);
+ const char *codeTemplate,
+ ErrorStreamType errorStreamType);
/// Assign a unique name to a unique constraint.
std::string getUniqueName(StringRef kind, unsigned index);
@@ -243,6 +253,18 @@ std::string stringify(T &&t) {
apply(std::forward<T>(t));
}
+/// Helper to generate a C++ streaming error message from a given message.
+/// Message can contain '{{...}}' placeholders that are substituted with
+/// C-expressions via tgfmt. It would effectively convert:
+/// "failed to verify {{foo}}"
+/// into:
+/// "failed to verify " << bar
+/// where bar is the result of evaluating 'tgfmt("foo", &ctx)' at compile
+/// time.
+std::string buildErrorStreamingString(
+ StringRef message, const FmtContext &ctx,
+ ErrorStreamType errorStreamType = ErrorStreamType::InString);
+
} // namespace tblgen
} // namespace mlir
diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
index 0dcdd5bb97bc..812043d1af0d 100644
--- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
+++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -716,7 +717,7 @@ bool IntegerRelation::isEmpty() const {
// that aren't the intended use case for IntegerRelation. This is
// needed since FM has a worst case exponential complexity in theory.
if (tmpCst.getNumConstraints() >= kExplosionFactor * getNumVars()) {
- LLVM_DEBUG(llvm::dbgs() << "FM constraint explosion detected\n");
+ LDBG() << "FM constraint explosion detected";
return false;
}
@@ -1943,7 +1944,7 @@ void IntegerRelation::removeTrivialRedundancy() {
// which can prove the existence of a solution if there is one.
void IntegerRelation::fourierMotzkinEliminate(unsigned pos, bool darkShadow,
bool *isResultIntegerExact) {
- LLVM_DEBUG(llvm::dbgs() << "FM input (eliminate pos " << pos << "):\n");
+ LDBG() << "FM input (eliminate pos " << pos << "):";
LLVM_DEBUG(dump());
assert(pos < getNumVars() && "invalid position");
assert(hasConsistentState());
@@ -1955,7 +1956,7 @@ void IntegerRelation::fourierMotzkinEliminate(unsigned pos, bool darkShadow,
LogicalResult ret = gaussianEliminateVar(pos);
(void)ret;
assert(ret.succeeded() && "Gaussian elimination guaranteed to succeed");
- LLVM_DEBUG(llvm::dbgs() << "FM output (through Gaussian elimination):\n");
+ LDBG() << "FM output (through Gaussian elimination):";
LLVM_DEBUG(dump());
return;
}
@@ -1969,7 +1970,7 @@ void IntegerRelation::fourierMotzkinEliminate(unsigned pos, bool darkShadow,
// If it doesn't appear, just remove the column and return.
// TODO: refactor removeColumns to use it from here.
removeVar(pos);
- LLVM_DEBUG(llvm::dbgs() << "FM output:\n");
+ LDBG() << "FM output:";
LLVM_DEBUG(dump());
return;
}
@@ -2052,8 +2053,7 @@ void IntegerRelation::fourierMotzkinEliminate(unsigned pos, bool darkShadow,
}
}
- LLVM_DEBUG(llvm::dbgs() << "FM isResultIntegerExact: " << allLCMsAreOne
- << "\n");
+ LDBG() << "FM isResultIntegerExact: " << allLCMsAreOne;
if (allLCMsAreOne && isResultIntegerExact)
*isResultIntegerExact = true;
@@ -2090,7 +2090,7 @@ void IntegerRelation::fourierMotzkinEliminate(unsigned pos, bool darkShadow,
newRel.normalizeConstraintsByGCD();
newRel.removeTrivialRedundancy();
clearAndCopyFrom(newRel);
- LLVM_DEBUG(llvm::dbgs() << "FM output:\n");
+ LDBG() << "FM output:";
LLVM_DEBUG(dump());
}
diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp
index 03ed4d51cc74..b6099902cc33 100644
--- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp
+++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp
@@ -36,20 +36,23 @@ namespace {
/// attribute.
template <typename SourceOp, typename TargetOp, bool Constrained,
template <typename, typename> typename AttrConvert =
- AttrConvertPassThrough>
+ AttrConvertPassThrough,
+ bool FailOnUnsupportedFP = false>
struct ConstrainedVectorConvertToLLVMPattern
- : public VectorConvertToLLVMPattern<SourceOp, TargetOp, AttrConvert> {
- using VectorConvertToLLVMPattern<SourceOp, TargetOp,
- AttrConvert>::VectorConvertToLLVMPattern;
+ : public VectorConvertToLLVMPattern<SourceOp, TargetOp, AttrConvert,
+ FailOnUnsupportedFP> {
+ using VectorConvertToLLVMPattern<
+ SourceOp, TargetOp, AttrConvert,
+ FailOnUnsupportedFP>::VectorConvertToLLVMPattern;
LogicalResult
matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
if (Constrained != static_cast<bool>(op.getRoundingModeAttr()))
return failure();
- return VectorConvertToLLVMPattern<SourceOp, TargetOp,
- AttrConvert>::matchAndRewrite(op, adaptor,
- rewriter);
+ return VectorConvertToLLVMPattern<
+ SourceOp, TargetOp, AttrConvert,
+ FailOnUnsupportedFP>::matchAndRewrite(op, adaptor, rewriter);
}
};
@@ -78,7 +81,8 @@ struct IdentityBitcastLowering final
using AddFOpLowering =
VectorConvertToLLVMPattern<arith::AddFOp, LLVM::FAddOp,
- arith::AttrConvertFastMathToLLVM>;
+ arith::AttrConvertFastMathToLLVM,
+ /*FailOnUnsupportedFP=*/true>;
using AddIOpLowering =
VectorConvertToLLVMPattern<arith::AddIOp, LLVM::AddOp,
arith::AttrConvertOverflowToLLVM>;
@@ -87,53 +91,67 @@ using BitcastOpLowering =
VectorConvertToLLVMPattern<arith::BitcastOp, LLVM::BitcastOp>;
using DivFOpLowering =
VectorConvertToLLVMPattern<arith::DivFOp, LLVM::FDivOp,
- arith::AttrConvertFastMathToLLVM>;
+ arith::AttrConvertFastMathToLLVM,
+ /*FailOnUnsupportedFP=*/true>;
using DivSIOpLowering =
VectorConvertToLLVMPattern<arith::DivSIOp, LLVM::SDivOp>;
using DivUIOpLowering =
VectorConvertToLLVMPattern<arith::DivUIOp, LLVM::UDivOp>;
-using ExtFOpLowering = VectorConvertToLLVMPattern<arith::ExtFOp, LLVM::FPExtOp>;
+using ExtFOpLowering = VectorConvertToLLVMPattern<arith::ExtFOp, LLVM::FPExtOp,
+ AttrConvertPassThrough,
+ /*FailOnUnsupportedFP=*/true>;
using ExtSIOpLowering =
VectorConvertToLLVMPattern<arith::ExtSIOp, LLVM::SExtOp>;
using ExtUIOpLowering =
VectorConvertToLLVMPattern<arith::ExtUIOp, LLVM::ZExtOp>;
using FPToSIOpLowering =
- VectorConvertToLLVMPattern<arith::FPToSIOp, LLVM::FPToSIOp>;
+ VectorConvertToLLVMPattern<arith::FPToSIOp, LLVM::FPToSIOp,
+ AttrConvertPassThrough,
+ /*FailOnUnsupportedFP=*/true>;
using FPToUIOpLowering =
- VectorConvertToLLVMPattern<arith::FPToUIOp, LLVM::FPToUIOp>;
+ VectorConvertToLLVMPattern<arith::FPToUIOp, LLVM::FPToUIOp,
+ AttrConvertPassThrough,
+ /*FailOnUnsupportedFP=*/true>;
using MaximumFOpLowering =
VectorConvertToLLVMPattern<arith::MaximumFOp, LLVM::MaximumOp,
- arith::AttrConvertFastMathToLLVM>;
+ arith::AttrConvertFastMathToLLVM,
+ /*FailOnUnsupportedFP=*/true>;
using MaxNumFOpLowering =
VectorConvertToLLVMPattern<arith::MaxNumFOp, LLVM::MaxNumOp,
- arith::AttrConvertFastMathToLLVM>;
+ arith::AttrConvertFastMathToLLVM,
+ /*FailOnUnsupportedFP=*/true>;
using MaxSIOpLowering =
VectorConvertToLLVMPattern<arith::MaxSIOp, LLVM::SMaxOp>;
using MaxUIOpLowering =
VectorConvertToLLVMPattern<arith::MaxUIOp, LLVM::UMaxOp>;
using MinimumFOpLowering =
VectorConvertToLLVMPattern<arith::MinimumFOp, LLVM::MinimumOp,
- arith::AttrConvertFastMathToLLVM>;
+ arith::AttrConvertFastMathToLLVM,
+ /*FailOnUnsupportedFP=*/true>;
using MinNumFOpLowering =
VectorConvertToLLVMPattern<arith::MinNumFOp, LLVM::MinNumOp,
- arith::AttrConvertFastMathToLLVM>;
+ arith::AttrConvertFastMathToLLVM,
+ /*FailOnUnsupportedFP=*/true>;
using MinSIOpLowering =
VectorConvertToLLVMPattern<arith::MinSIOp, LLVM::SMinOp>;
using MinUIOpLowering =
VectorConvertToLLVMPattern<arith::MinUIOp, LLVM::UMinOp>;
using MulFOpLowering =
VectorConvertToLLVMPattern<arith::MulFOp, LLVM::FMulOp,
- arith::AttrConvertFastMathToLLVM>;
+ arith::AttrConvertFastMathToLLVM,
+ /*FailOnUnsupportedFP=*/true>;
using MulIOpLowering =
VectorConvertToLLVMPattern<arith::MulIOp, LLVM::MulOp,
arith::AttrConvertOverflowToLLVM>;
using NegFOpLowering =
VectorConvertToLLVMPattern<arith::NegFOp, LLVM::FNegOp,
- arith::AttrConvertFastMathToLLVM>;
+ arith::AttrConvertFastMathToLLVM,
+ /*FailOnUnsupportedFP=*/true>;
using OrIOpLowering = VectorConvertToLLVMPattern<arith::OrIOp, LLVM::OrOp>;
using RemFOpLowering =
VectorConvertToLLVMPattern<arith::RemFOp, LLVM::FRemOp,
- arith::AttrConvertFastMathToLLVM>;
+ arith::AttrConvertFastMathToLLVM,
+ /*FailOnUnsupportedFP=*/true>;
using RemSIOpLowering =
VectorConvertToLLVMPattern<arith::RemSIOp, LLVM::SRemOp>;
using RemUIOpLowering =
@@ -151,21 +169,25 @@ using SIToFPOpLowering =
VectorConvertToLLVMPattern<arith::SIToFPOp, LLVM::SIToFPOp>;
using SubFOpLowering =
VectorConvertToLLVMPattern<arith::SubFOp, LLVM::FSubOp,
- arith::AttrConvertFastMathToLLVM>;
+ arith::AttrConvertFastMathToLLVM,
+ /*FailOnUnsupportedFP=*/true>;
using SubIOpLowering =
VectorConvertToLLVMPattern<arith::SubIOp, LLVM::SubOp,
arith::AttrConvertOverflowToLLVM>;
using TruncFOpLowering =
ConstrainedVectorConvertToLLVMPattern<arith::TruncFOp, LLVM::FPTruncOp,
- false>;
+ false, AttrConvertPassThrough,
+ /*FailOnUnsupportedFP=*/true>;
using ConstrainedTruncFOpLowering = ConstrainedVectorConvertToLLVMPattern<
arith::TruncFOp, LLVM::ConstrainedFPTruncIntr, true,
- arith::AttrConverterConstrainedFPToLLVM>;
+ arith::AttrConverterConstrainedFPToLLVM, /*FailOnUnsupportedFP=*/true>;
using TruncIOpLowering =
VectorConvertToLLVMPattern<arith::TruncIOp, LLVM::TruncOp,
arith::AttrConvertOverflowToLLVM>;
using UIToFPOpLowering =
- VectorConvertToLLVMPattern<arith::UIToFPOp, LLVM::UIToFPOp>;
+ VectorConvertToLLVMPattern<arith::UIToFPOp, LLVM::UIToFPOp,
+ AttrConvertPassThrough,
+ /*FailOnUnsupportedFP=*/true>;
using XOrIOpLowering = VectorConvertToLLVMPattern<arith::XOrIOp, LLVM::XOrOp>;
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
index 6c08cdfb669f..d6c3cd62ee74 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
@@ -21,25 +21,6 @@ using namespace mlir::bufferization;
#include "mlir/Dialect/Bufferization/IR/BufferizationOpsDialect.cpp.inc"
-/// Attribute name used to mark function arguments who's buffers can be written
-/// to during One-Shot Module Bufferize.
-constexpr const ::llvm::StringLiteral BufferizationDialect::kWritableAttrName;
-
-/// Attribute name used to mark the bufferization layout for region arguments
-/// during One-Shot Module Bufferize.
-constexpr const ::llvm::StringLiteral
- BufferizationDialect::kBufferLayoutAttrName;
-
-/// An attribute that can be attached to ops with an allocation and/or
-/// deallocation side effect. It indicates that the op is under a "manual
-/// deallocation" scheme. In the case of an allocation op, the returned
-/// value is *not* an automatically managed allocation and assigned an
-/// ownership of "false". Furthermore, only deallocation ops that are
-/// guaranteed to deallocate a buffer under "manual deallocation" are
-/// allowed to have this attribute. (Deallocation ops without this
-/// attribute are rejected by the ownership-based buffer deallocation pass.)
-constexpr const ::llvm::StringLiteral BufferizationDialect::kManualDeallocation;
-
//===----------------------------------------------------------------------===//
// Bufferization Dialect Interfaces
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 3a433825fd31..aa8206347e9b 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -997,8 +997,11 @@ tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag,
// Iterate over the outputs of the producer and over the loop bbArgs and
// check if any bbArg points to the same value as the producer output. In
// such case, make the producer output point to the bbArg directly.
- for (OpOperand &initOperandPtr :
- cast<DestinationStyleOpInterface>(clone).getDpsInitsMutable()) {
+ auto dpsInterface = dyn_cast<DestinationStyleOpInterface>(clone);
+ if (!dpsInterface)
+ return;
+
+ for (OpOperand &initOperandPtr : dpsInterface.getDpsInitsMutable()) {
Value producerOperand =
clone->getOperand(initOperandPtr.getOperandNumber());
for (BlockArgument containerIterArg :
@@ -1060,7 +1063,7 @@ tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag,
resultNumber, offsets, sizes);
// Cleanup clone.
- if (dyn_cast<LoopLikeOpInterface>(containingOp))
+ if (isa<LoopLikeOpInterface>(containingOp))
rewriter.eraseOp(tileableProducer);
return std::make_tuple(tileAndFuseResult->tiledOps, newContainingOp);
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index b2f1d840f3bc..8c9c137b8aeb 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1042,6 +1042,65 @@ struct RemoveConstantIfConditionWithRegion : public OpRewritePattern<OpTy> {
}
};
+/// Remove empty acc.kernel_environment operations. If the operation has wait
+/// operands, create a acc.wait operation to preserve synchronization.
+struct RemoveEmptyKernelEnvironment
+ : public OpRewritePattern<acc::KernelEnvironmentOp> {
+ using OpRewritePattern<acc::KernelEnvironmentOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(acc::KernelEnvironmentOp op,
+ PatternRewriter &rewriter) const override {
+ assert(op->getNumRegions() == 1 && "expected op to have one region");
+
+ Block &block = op.getRegion().front();
+ if (!block.empty())
+ return failure();
+
+ // Conservatively disable canonicalization of empty acc.kernel_environment
+ // operations if the wait operands in the kernel_environment cannot be fully
+ // represented by acc.wait operation.
+
+ // Disable canonicalization if device type is not the default
+ if (auto deviceTypeAttr = op.getWaitOperandsDeviceTypeAttr()) {
+ for (auto attr : deviceTypeAttr) {
+ if (auto dtAttr = mlir::dyn_cast<acc::DeviceTypeAttr>(attr)) {
+ if (dtAttr.getValue() != mlir::acc::DeviceType::None)
+ return failure();
+ }
+ }
+ }
+
+ // Disable canonicalization if any wait segment has a devnum
+ if (auto hasDevnumAttr = op.getHasWaitDevnumAttr()) {
+ for (auto attr : hasDevnumAttr) {
+ if (auto boolAttr = mlir::dyn_cast<mlir::BoolAttr>(attr)) {
+ if (boolAttr.getValue())
+ return failure();
+ }
+ }
+ }
+
+ // Disable canonicalization if there are multiple wait segments
+ if (auto segmentsAttr = op.getWaitOperandsSegmentsAttr()) {
+ if (segmentsAttr.size() > 1)
+ return failure();
+ }
+
+ // Remove empty kernel environment.
+ // Preserve synchronization by creating acc.wait operation if needed.
+ if (!op.getWaitOperands().empty() || op.getWaitOnlyAttr())
+ rewriter.replaceOpWithNewOp<acc::WaitOp>(op, op.getWaitOperands(),
+ /*asyncOperand=*/Value(),
+ /*waitDevnum=*/Value(),
+ /*async=*/nullptr,
+ /*ifCond=*/Value());
+ else
+ rewriter.eraseOp(op);
+
+ return success();
+ }
+};
+
//===----------------------------------------------------------------------===//
// Recipe Region Helpers
//===----------------------------------------------------------------------===//
@@ -2691,6 +2750,15 @@ void acc::HostDataOp::getCanonicalizationPatterns(RewritePatternSet &results,
}
//===----------------------------------------------------------------------===//
+// KernelEnvironmentOp
+//===----------------------------------------------------------------------===//
+
+void acc::KernelEnvironmentOp::getCanonicalizationPatterns(
+ RewritePatternSet &results, MLIRContext *context) {
+ results.add<RemoveEmptyKernelEnvironment>(context);
+}
+
+//===----------------------------------------------------------------------===//
// LoopOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Interfaces/AlignmentAttrInterface.cpp b/mlir/lib/Interfaces/AlignmentAttrInterface.cpp
new file mode 100644
index 000000000000..fe985adb5e79
--- /dev/null
+++ b/mlir/lib/Interfaces/AlignmentAttrInterface.cpp
@@ -0,0 +1,13 @@
+//===- AlignmentAttrInterface.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Interfaces/AlignmentAttrInterface.h"
+
+using namespace mlir;
+
+#include "mlir/Interfaces/AlignmentAttrInterface.cpp.inc"
diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index f96af02db0be..ad3e2b61be41 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -1,4 +1,5 @@
set(LLVM_OPTIONAL_SOURCES
+ AlignmentAttrInterface.cpp
CallInterfaces.cpp
CastInterfaces.cpp
ControlFlowInterfaces.cpp
@@ -41,6 +42,7 @@ function(add_mlir_interface_library name)
endfunction(add_mlir_interface_library)
+add_mlir_interface_library(AlignmentAttrInterface)
add_mlir_interface_library(CallInterfaces)
add_mlir_interface_library(CastInterfaces)
add_mlir_interface_library(ControlFlowInterfaces)
diff --git a/mlir/lib/TableGen/CodeGenHelpers.cpp b/mlir/lib/TableGen/CodeGenHelpers.cpp
index d52d5e769ee6..9ad031eb701a 100644
--- a/mlir/lib/TableGen/CodeGenHelpers.cpp
+++ b/mlir/lib/TableGen/CodeGenHelpers.cpp
@@ -12,12 +12,26 @@
//===----------------------------------------------------------------------===//
#include "mlir/TableGen/CodeGenHelpers.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/TableGen/Argument.h"
+#include "mlir/TableGen/Attribute.h"
+#include "mlir/TableGen/Format.h"
#include "mlir/TableGen/Operator.h"
#include "mlir/TableGen/Pattern.h"
+#include "mlir/TableGen/Property.h"
+#include "mlir/TableGen/Region.h"
+#include "mlir/TableGen/Successor.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/CodeGenHelpers.h"
+#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
+#include <cassert>
+#include <optional>
+#include <string>
using namespace llvm;
using namespace mlir;
@@ -112,6 +126,55 @@ StringRef StaticVerifierFunctionEmitter::getRegionConstraintFn(
// Constraint Emission
//===----------------------------------------------------------------------===//
+/// Helper to generate a C++ string expression from a given message.
+/// Message can contain '{{...}}' placeholders that are substituted with
+/// C-expressions via tgfmt.
+std::string mlir::tblgen::buildErrorStreamingString(
+ StringRef message, const FmtContext &ctx, ErrorStreamType errorStreamType) {
+ std::string result;
+ raw_string_ostream os(result);
+
+ std::string msgStr = escapeString(message);
+ StringRef msg = msgStr;
+
+ // Split the message by '{{' and '}}' and build a streaming expression.
+ auto split = msg.split("{{");
+ os << split.first;
+ if (split.second.empty()) {
+ return msgStr;
+ }
+
+ if (errorStreamType == ErrorStreamType::InsideOpError)
+ os << "\")";
+ else
+ os << '"';
+
+ msg = split.second;
+ while (!msg.empty()) {
+ split = msg.split("}}");
+ StringRef var = split.first;
+ StringRef rest = split.second;
+
+ os << " << " << tgfmt(var, &ctx);
+
+ if (rest.empty())
+ break;
+
+ split = rest.split("{{");
+ if (split.second.empty() &&
+ errorStreamType == ErrorStreamType::InsideOpError) {
+ // To enable having part of string post, this adds a parenthesis before
+ // the last string segment to match the existing one.
+ os << " << (\"" << split.first;
+ } else {
+ os << " << \"" << split.first;
+ }
+ msg = split.second;
+ }
+
+ return os.str();
+}
+
/// Code templates for emitting type, attribute, successor, and region
/// constraints. Each of these templates require the following arguments:
///
@@ -224,22 +287,24 @@ static ::llvm::LogicalResult {0}(
void StaticVerifierFunctionEmitter::emitConstraints(
const ConstraintMap &constraints, StringRef selfName,
- const char *const codeTemplate) {
+ const char *const codeTemplate, ErrorStreamType errorStreamType) {
FmtContext ctx;
ctx.addSubst("_op", "*op").withSelf(selfName);
+
for (auto &it : constraints) {
os << formatv(codeTemplate, it.second,
tgfmt(it.first.getConditionTemplate(), &ctx),
- escapeString(it.first.getSummary()));
+ buildErrorStreamingString(it.first.getSummary(), ctx));
}
}
-
void StaticVerifierFunctionEmitter::emitTypeConstraints() {
- emitConstraints(typeConstraints, "type", typeConstraintCode);
+ emitConstraints(typeConstraints, "type", typeConstraintCode,
+ ErrorStreamType::InString);
}
void StaticVerifierFunctionEmitter::emitAttrConstraints() {
- emitConstraints(attrConstraints, "attr", attrConstraintCode);
+ emitConstraints(attrConstraints, "attr", attrConstraintCode,
+ ErrorStreamType::InString);
}
/// Unlike with the other helpers, this one has to substitute in the interface
@@ -251,17 +316,19 @@ void StaticVerifierFunctionEmitter::emitPropConstraints() {
auto propConstraint = cast<PropConstraint>(it.first);
os << formatv(propConstraintCode, it.second,
tgfmt(propConstraint.getConditionTemplate(), &ctx),
- escapeString(it.first.getSummary()),
+ buildErrorStreamingString(it.first.getSummary(), ctx),
propConstraint.getInterfaceType());
}
}
void StaticVerifierFunctionEmitter::emitSuccessorConstraints() {
- emitConstraints(successorConstraints, "successor", successorConstraintCode);
+ emitConstraints(successorConstraints, "successor", successorConstraintCode,
+ ErrorStreamType::InString);
}
void StaticVerifierFunctionEmitter::emitRegionConstraints() {
- emitConstraints(regionConstraints, "region", regionConstraintCode);
+ emitConstraints(regionConstraints, "region", regionConstraintCode,
+ ErrorStreamType::InString);
}
void StaticVerifierFunctionEmitter::emitPatternConstraints() {
@@ -270,13 +337,14 @@ void StaticVerifierFunctionEmitter::emitPatternConstraints() {
for (auto &it : typeConstraints) {
os << formatv(patternConstraintCode, it.second,
tgfmt(it.first.getConditionTemplate(), &ctx),
- escapeString(it.first.getSummary()), "::mlir::Type type");
+ buildErrorStreamingString(it.first.getSummary(), ctx),
+ "::mlir::Type type");
}
ctx.withSelf("attr");
for (auto &it : attrConstraints) {
os << formatv(patternConstraintCode, it.second,
tgfmt(it.first.getConditionTemplate(), &ctx),
- escapeString(it.first.getSummary()),
+ buildErrorStreamingString(it.first.getSummary(), ctx),
"::mlir::Attribute attr");
}
ctx.withSelf("prop");
@@ -291,7 +359,7 @@ void StaticVerifierFunctionEmitter::emitPatternConstraints() {
}
os << formatv(patternConstraintCode, it.second,
tgfmt(propConstraint.getConditionTemplate(), &ctx),
- escapeString(propConstraint.getSummary()),
+ buildErrorStreamingString(propConstraint.getSummary(), ctx),
Twine(interfaceType) + " prop");
}
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index 0964e1b8c5ef..cecff51e637a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -291,6 +291,20 @@ static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
llvm_unreachable("Unsupported proxy kinds");
}
+static unsigned getMembarIntrinsicID(NVVM::MemScopeKind scope) {
+ switch (scope) {
+ case NVVM::MemScopeKind::CTA:
+ return llvm::Intrinsic::nvvm_membar_cta;
+ case NVVM::MemScopeKind::CLUSTER:
+ return llvm::Intrinsic::nvvm_fence_sc_cluster;
+ case NVVM::MemScopeKind::GPU:
+ return llvm::Intrinsic::nvvm_membar_gl;
+ case NVVM::MemScopeKind::SYS:
+ return llvm::Intrinsic::nvvm_membar_sys;
+ }
+ llvm_unreachable("Unknown scope for memory barrier");
+}
+
#define TCGEN05LD(SHAPE, NUM) llvm::Intrinsic::nvvm_tcgen05_ld_##SHAPE##_##NUM
static llvm::Intrinsic::ID
diff --git a/mlir/python/mlir/dialects/transform/__init__.py b/mlir/python/mlir/dialects/transform/__init__.py
index b075919d1ef0..de414dc52c0a 100644
--- a/mlir/python/mlir/dialects/transform/__init__.py
+++ b/mlir/python/mlir/dialects/transform/__init__.py
@@ -39,16 +39,32 @@ class CastOp(CastOp):
super().__init__(result_type, _get_op_result_or_value(target), loc=loc, ip=ip)
+def cast(
+ result_type: Type, target: Union[Operation, Value], *, loc=None, ip=None
+) -> OpResult:
+ return CastOp(result_type=result_type, target=target, loc=loc, ip=ip).result
+
+
@_ods_cext.register_operation(_Dialect, replace=True)
class ApplyPatternsOp(ApplyPatternsOp):
def __init__(
self,
target: Union[Operation, Value, OpView],
+ apply_cse: bool = False,
+ max_iterations: Optional[Union[IntegerAttr, int]] = None,
+ max_num_rewrites: Optional[Union[IntegerAttr, int]] = None,
*,
loc=None,
ip=None,
):
- super().__init__(target, loc=loc, ip=ip)
+ super().__init__(
+ target,
+ apply_cse=apply_cse,
+ max_iterations=max_iterations,
+ max_num_rewrites=max_num_rewrites,
+ loc=loc,
+ ip=ip,
+ )
self.regions[0].blocks.append()
@property
@@ -56,6 +72,25 @@ class ApplyPatternsOp(ApplyPatternsOp):
return self.regions[0].blocks[0]
+def apply_patterns(
+ target: Union[Operation, Value, OpView],
+ apply_cse: bool = False,
+ max_iterations: Optional[Union[IntegerAttr, int]] = None,
+ max_num_rewrites: Optional[Union[IntegerAttr, int]] = None,
+ *,
+ loc=None,
+ ip=None,
+) -> ApplyPatternsOp:
+ return ApplyPatternsOp(
+ target=target,
+ apply_cse=apply_cse,
+ max_iterations=max_iterations,
+ max_num_rewrites=max_num_rewrites,
+ loc=loc,
+ ip=ip,
+ )
+
+
@_ods_cext.register_operation(_Dialect, replace=True)
class GetParentOp(GetParentOp):
def __init__(
@@ -64,6 +99,7 @@ class GetParentOp(GetParentOp):
target: Union[Operation, Value],
*,
isolated_from_above: bool = False,
+ allow_empty_results: bool = False,
op_name: Optional[str] = None,
deduplicate: bool = False,
nth_parent: int = 1,
@@ -74,6 +110,7 @@ class GetParentOp(GetParentOp):
result_type,
_get_op_result_or_value(target),
isolated_from_above=isolated_from_above,
+ allow_empty_results=allow_empty_results,
op_name=op_name,
deduplicate=deduplicate,
nth_parent=nth_parent,
@@ -82,6 +119,31 @@ class GetParentOp(GetParentOp):
)
+def get_parent_op(
+ result_type: Type,
+ target: Union[Operation, Value],
+ *,
+ isolated_from_above: bool = False,
+ allow_empty_results: bool = False,
+ op_name: Optional[str] = None,
+ deduplicate: bool = False,
+ nth_parent: int = 1,
+ loc=None,
+ ip=None,
+) -> OpResult:
+ return GetParentOp(
+ result_type=result_type,
+ target=target,
+ isolated_from_above=isolated_from_above,
+ allow_empty_results=allow_empty_results,
+ op_name=op_name,
+ deduplicate=deduplicate,
+ nth_parent=nth_parent,
+ loc=loc,
+ ip=ip,
+ ).result
+
+
@_ods_cext.register_operation(_Dialect, replace=True)
class MergeHandlesOp(MergeHandlesOp):
def __init__(
@@ -89,17 +151,32 @@ class MergeHandlesOp(MergeHandlesOp):
handles: Sequence[Union[Operation, Value]],
*,
deduplicate: bool = False,
+ results: Optional[Sequence[Type]] = None,
loc=None,
ip=None,
):
super().__init__(
[_get_op_result_or_value(h) for h in handles],
deduplicate=deduplicate,
+ results=results,
loc=loc,
ip=ip,
)
+def merge_handles(
+ handles: Sequence[Union[Operation, Value]],
+ *,
+ deduplicate: bool = False,
+ results: Optional[Sequence[Type]] = None,
+ loc=None,
+ ip=None,
+) -> OpResult:
+ return MergeHandlesOp(
+ handles=handles, deduplicate=deduplicate, results=results, loc=loc, ip=ip
+ ).result
+
+
@_ods_cext.register_operation(_Dialect, replace=True)
class ReplicateOp(ReplicateOp):
def __init__(
@@ -119,16 +196,31 @@ class ReplicateOp(ReplicateOp):
)
+def replicate(
+ pattern: Union[Operation, Value],
+ handles: Sequence[Union[Operation, Value]],
+ *,
+ loc=None,
+ ip=None,
+) -> Union[OpResult, OpResultList, ReplicateOp]:
+ op = ReplicateOp(pattern=pattern, handles=handles, loc=loc, ip=ip)
+ results = op.results
+ return results if len(results) > 1 else (results[0] if len(results) == 1 else op)
+
+
@_ods_cext.register_operation(_Dialect, replace=True)
class SequenceOp(SequenceOp):
def __init__(
self,
- failure_propagation_mode,
+ failure_propagation_mode: FailurePropagationMode,
results: Sequence[Type],
target: Union[Operation, Value, Type],
extra_bindings: Optional[
Union[Sequence[Value], Sequence[Type], Operation, OpView]
] = None,
+ *,
+ loc=None,
+ ip=None,
):
root = (
_get_op_result_or_value(target)
@@ -155,6 +247,8 @@ class SequenceOp(SequenceOp):
failure_propagation_mode=failure_propagation_mode,
root=root,
extra_bindings=extra_bindings,
+ loc=loc,
+ ip=ip,
)
self.regions[0].blocks.append(*tuple([root_type] + extra_binding_types))
@@ -171,16 +265,42 @@ class SequenceOp(SequenceOp):
return self.body.arguments[1:]
+def sequence(
+ failure_propagation_mode: FailurePropagationMode,
+ results: Sequence[Type],
+ target: Union[Operation, Value, Type],
+ extra_bindings: Optional[
+ Union[Sequence[Value], Sequence[Type], Operation, OpView]
+ ] = None,
+ *,
+ loc=None,
+ ip=None,
+) -> Union[OpResult, OpResultList, SequenceOp]:
+ op = SequenceOp(
+ results=results,
+ failure_propagation_mode=failure_propagation_mode,
+ extra_bindings=extra_bindings,
+ target=target,
+ loc=loc,
+ ip=ip,
+ )
+ results = op.results
+ return results if len(results) > 1 else (results[0] if len(results) == 1 else op)
+
+
@_ods_cext.register_operation(_Dialect, replace=True)
class NamedSequenceOp(NamedSequenceOp):
def __init__(
self,
- sym_name,
+ sym_name: Union[str, SymbolRefAttr],
input_types: Sequence[Type],
result_types: Sequence[Type],
- sym_visibility=None,
- arg_attrs=None,
- res_attrs=None,
+ *,
+ sym_visibility: Optional[Union[str, StringAttr]] = None,
+ arg_attrs: Optional[Union[Sequence[dict], "DictArrayAttr"]] = None,
+ res_attrs: Optional[Union[Sequence[dict], "DictArrayAttr"]] = None,
+ loc=None,
+ ip=None,
):
function_type = FunctionType.get(input_types, result_types)
super().__init__(
@@ -205,6 +325,29 @@ class NamedSequenceOp(NamedSequenceOp):
return self.body.arguments[1:]
+def named_sequence(
+ sym_name: Union[str, SymbolRefAttr],
+ input_types: Sequence[Type],
+ result_types: Sequence[Type],
+ *,
+ sym_visibility: Optional[Union[str, StringAttr]] = None,
+ arg_attrs: Optional[Union[Sequence[dict], "DictArrayAttr"]] = None,
+ res_attrs: Optional[Union[Sequence[dict], "DictArrayAttr"]] = None,
+ loc=None,
+ ip=None,
+) -> NamedSequenceOp:
+ return NamedSequenceOp(
+ sym_name=sym_name,
+ input_types=input_types,
+ result_types=result_types,
+ sym_visibility=sym_visibility,
+ arg_attrs=arg_attrs,
+ res_attrs=res_attrs,
+ loc=loc,
+ ip=ip,
+ )
+
+
@_ods_cext.register_operation(_Dialect, replace=True)
class YieldOp(YieldOp):
def __init__(
@@ -219,6 +362,12 @@ class YieldOp(YieldOp):
super().__init__(_get_op_results_or_values(operands), loc=loc, ip=ip)
+def yield_(
+ operands: Optional[Union[Operation, Sequence[Value]]] = None, *, loc=None, ip=None
+) -> YieldOp:
+ return YieldOp(operands=operands, loc=loc, ip=ip)
+
+
OptionValueTypes = Union[
Sequence["OptionValueTypes"], Attribute, Value, Operation, OpView, str, int, bool
]
@@ -247,7 +396,7 @@ class ApplyRegisteredPassOp(ApplyRegisteredPassOp):
def option_value_to_attr(value):
nonlocal cur_param_operand_idx
if isinstance(value, (Value, Operation, OpView)):
- dynamic_options.append(_get_op_result_or_value(value))
+ dynamic_options.append(value)
cur_param_operand_idx += 1
return ParamOperandAttr(cur_param_operand_idx - 1, context)
elif isinstance(value, Attribute):
diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir
index b5dcb01d3dc6..5f1ec66234df 100644
--- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir
+++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir
@@ -754,11 +754,13 @@ func.func @memref_bitcast(%1: memref<?xi16>) -> memref<?xbf16> {
// CHECK: arith.addf {{.*}} : f4E2M1FN
// CHECK: arith.addf {{.*}} : vector<4xf4E2M1FN>
// CHECK: arith.addf {{.*}} : vector<8x4xf4E2M1FN>
-func.func @unsupported_fp_type(%arg0: f4E2M1FN, %arg1: vector<4xf4E2M1FN>, %arg2: vector<8x4xf4E2M1FN>) -> (f4E2M1FN, vector<4xf4E2M1FN>, vector<8x4xf4E2M1FN>) {
+// CHECK: llvm.select {{.*}} : i1, i4
+func.func @unsupported_fp_type(%arg0: f4E2M1FN, %arg1: vector<4xf4E2M1FN>, %arg2: vector<8x4xf4E2M1FN>, %arg3: f4E2M1FN, %arg4: i1) {
%0 = arith.addf %arg0, %arg0 : f4E2M1FN
%1 = arith.addf %arg1, %arg1 : vector<4xf4E2M1FN>
%2 = arith.addf %arg2, %arg2 : vector<8x4xf4E2M1FN>
- return %0, %1, %2 : f4E2M1FN, vector<4xf4E2M1FN>, vector<8x4xf4E2M1FN>
+ %3 = arith.select %arg4, %arg0, %arg3 : f4E2M1FN
+ return
}
// -----
diff --git a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
index 9c552d849c12..d606cf51435d 100644
--- a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
+++ b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
@@ -1,15 +1,16 @@
-// RUN: mlir-opt %s --split-input-file -convert-xegpu-to-xevm | FileCheck %s
+// RUN: mlir-opt %s --split-input-file -convert-xegpu-to-xevm -canonicalize | FileCheck %s
gpu.module @test {
// CHECK-LABEL: @load_gather_i64_src_value_offset
-// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>
-gpu.func @load_gather_i64_src_value_offset(%src: i64, %offset: vector<1xindex>) {
+// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>, %[[ARG2:.*]]: memref<1xf16>
+// CHECK-SAME: %[[ARG3:.*]]: vector<1xi1>
+gpu.func @load_gather_i64_src_value_offset(%src: i64, %offset: vector<1xindex>, %dst: memref<1xf16>, %mask: vector<1xi1>) {
+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
+ // CHECK: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f16
+ // CHECK: %[[C2_I64:.*]] = arith.constant 2 : i64
+ // CHECK: %[[VAR2:.*]] = vector.extract %[[ARG3]][0] : i1 from vector<1xi1>
// CHECK: %[[VAR0:.*]] = vector.extract %[[ARG1]][0] : index from vector<1xindex>
// CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] : index to i64
- // CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<1xi1>
- // CHECK: %[[VAR2:.*]] = vector.extract %[[CST]][0] : i1 from vector<1xi1>
- %1 = arith.constant dense<1>: vector<1xi1>
- // CHECK: %[[C2_I64:.*]] = arith.constant 2 : i64
// CHECK: %[[VAR3:.*]] = arith.muli %[[VAR1]], %[[C2_I64]] : i64
// CHECK: %[[VAR4:.*]] = arith.addi %[[ARG0]], %[[VAR3]] : i64
// CHECK: %[[VAR5:.*]] = llvm.inttoptr %[[VAR4]] : i64 to !llvm.ptr<1>
@@ -17,11 +18,12 @@ gpu.func @load_gather_i64_src_value_offset(%src: i64, %offset: vector<1xindex>)
// CHECK: %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> f16
// CHECK: scf.yield %[[VAR7]] : f16
// CHECK: } else {
- // CHECK: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f16
// CHECK: scf.yield %[[CST_0]] : f16
// CHECK: }
- %3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+ %0 = xegpu.load %src[%offset], %mask <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
: i64, vector<1xindex>, vector<1xi1> -> vector<1xf16>
+ %c0 = arith.constant 0 : index
+ vector.store %0, %dst[%c0] : memref<1xf16>, vector<1xf16>
gpu.return
}
}
@@ -30,16 +32,16 @@ gpu.func @load_gather_i64_src_value_offset(%src: i64, %offset: vector<1xindex>)
gpu.module @test {
// CHECK-LABEL: @source_materialize_single_elem_vec
// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>, %[[ARG2:.*]]: memref<1xf16>
-gpu.func @source_materialize_single_elem_vec(%src: i64, %offset: vector<1xindex>, %dst: memref<1xf16>) {
- %1 = arith.constant dense<1>: vector<1xi1>
- %3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+// CHECK-SAME: %[[ARG3:.*]]: vector<1xi1>
+gpu.func @source_materialize_single_elem_vec(%src: i64, %offset: vector<1xindex>, %dst: memref<1xf16>, %mask: vector<1xi1>) {
+ %0 = xegpu.load %src[%offset], %mask <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
: i64, vector<1xindex>, vector<1xi1> -> vector<1xf16>
+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[VAR_IF:.*]] = scf.if
// CHECK: %[[VAR_RET:.*]] = vector.broadcast %[[VAR_IF]] : f16 to vector<1xf16>
- // CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: vector.store %[[VAR_RET]], %[[ARG2]][%[[C0]]] : memref<1xf16>, vector<1xf16>
%c0 = arith.constant 0 : index
- vector.store %3, %dst[%c0] : memref<1xf16>, vector<1xf16>
+ vector.store %0, %dst[%c0] : memref<1xf16>, vector<1xf16>
gpu.return
}
}
@@ -48,24 +50,21 @@ gpu.func @source_materialize_single_elem_vec(%src: i64, %offset: vector<1xindex>
gpu.module @test {
// CHECK-LABEL: @store_scatter_i64_src_value_offset
-// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>
-gpu.func @store_scatter_i64_src_value_offset(%src: i64, %offset: vector<1xindex>) {
+// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>, %[[ARG2:.*]]: vector<1xi1>
+gpu.func @store_scatter_i64_src_value_offset(%src: i64, %offset: vector<1xindex>, %mask: vector<1xi1>) {
+ // CHECK: %[[CST_0:.*]] = arith.constant 2.900000e+00 : f32
+ // CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64
+ // CHECK: %[[VAR2:.*]] = vector.extract %[[ARG2]][0] : i1 from vector<1xi1>
// CHECK: %[[VAR0:.*]] = vector.extract %[[ARG1]][0] : index from vector<1xindex>
// CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] : index to i64
- // CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<1xi1>
- // CHECK: %[[VAR2:.*]] = vector.extract %[[CST]][0] : i1 from vector<1xi1>
- %1 = arith.constant dense<1>: vector<1xi1>
- // CHECK: %[[CST_0:.*]] = arith.constant dense<2.900000e+00> : vector<1xf32>
- // CHECK: %[[VAR3:.*]] = vector.extract %[[CST_0]][0] : f32 from vector<1xf32>
- %2 = arith.constant dense<2.9>: vector<1xf32>
- // CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64
+ %0 = arith.constant dense<2.9>: vector<1xf32>
// CHECK: %[[VAR4:.*]] = arith.muli %[[VAR1]], %[[C4_I64]] : i64
// CHECK: %[[VAR5:.*]] = arith.addi %[[ARG0]], %[[VAR4]] : i64
// CHECK: %[[VAR6:.*]] = llvm.inttoptr %[[VAR5]] : i64 to !llvm.ptr<1>
// CHECK: scf.if %[[VAR2]] {
- // CHECK: llvm.store %[[VAR3]], %[[VAR6]] {cache_control = #xevm.store_cache_control<L1wb_L2uc_L3uc>} : f32, !llvm.ptr<1>
+ // CHECK: llvm.store %[[CST_0]], %[[VAR6]] {cache_control = #xevm.store_cache_control<L1wb_L2uc_L3uc>} : f32, !llvm.ptr<1>
// CHECK: }
- xegpu.store %2, %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}>
+ xegpu.store %0, %src[%offset], %mask <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}>
: vector<1xf32>, i64, vector<1xindex>, vector<1xi1>
gpu.return
}
@@ -76,9 +75,9 @@ gpu.module @test {
// CHECK-LABEL: @prefetch_i64_src_value_offset
// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>
gpu.func @prefetch_i64_src_value_offset(%src: i64, %offset: vector<1xindex>) {
+ // CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64
// CHECK: %[[VAR0:.*]] = vector.extract %[[ARG1]][0] : index from vector<1xindex>
// CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] : index to i64
- // CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64
// CHECK: %[[VAR2:.*]] = arith.muli %[[VAR1]], %[[C4_I64]] : i64
// CHECK: %[[VAR3:.*]] = arith.addi %[[ARG0]], %[[VAR2]] : i64
// CHECK: %[[VAR4:.*]] = llvm.inttoptr %[[VAR3]] : i64 to !llvm.ptr<1>
@@ -94,11 +93,11 @@ gpu.module @test {
// CHECK-LABEL: @prefetch_memref_src_value_offset
// CHECK-SAME: %[[ARG0:.*]]: memref<256xf32>, %[[ARG1:.*]]: vector<1xindex>
gpu.func @prefetch_memref_src_value_offset(%src: memref<256xf32>, %offset: vector<1xindex>) {
+ // CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64
// CHECK: %[[VAR0:.*]] = vector.extract %[[ARG1]][0] : index from vector<1xindex>
// CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] : index to i64
// CHECK: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[ARG0]] : memref<256xf32> -> index
// CHECK: %[[VAR2:.*]] = arith.index_castui %[[INTPTR]] : index to i64
- // CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64
// CHECK: %[[VAR3:.*]] = arith.muli %[[VAR1]], %[[C4_I64]] : i64
// CHECK: %[[VAR4:.*]] = arith.addi %[[VAR2]], %[[VAR3]] : i64
// CHECK: %[[VAR5:.*]] = llvm.inttoptr %[[VAR4]] : i64 to !llvm.ptr<1>
diff --git a/mlir/test/Conversion/XeGPUToXeVM/prefetch_nd.mlir b/mlir/test/Conversion/XeGPUToXeVM/prefetch_nd.mlir
index 873478aed57e..e4b303087ea9 100644
--- a/mlir/test/Conversion/XeGPUToXeVM/prefetch_nd.mlir
+++ b/mlir/test/Conversion/XeGPUToXeVM/prefetch_nd.mlir
@@ -1,34 +1,29 @@
-// RUN: mlir-opt -convert-xegpu-to-xevm -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -convert-xegpu-to-xevm -canonicalize %s | FileCheck %s
-gpu.module @fence_check {
- gpu.func @fence(%src: memref<8x16xf32, 1>, %dst: memref<8x16xf32, 1>) kernel {
+gpu.module @prefetch_nd_check {
+ // CHECK-LABEL: gpu.func @prefetch_nd
+ gpu.func @prefetch_nd(%src: memref<8x16xf32, 1>, %dst: memref<8x16xf32, 1>) kernel {
+ // CHECK: %[[PREF_BASE_ROW_IN_BYTES:.*]] = arith.constant 64 : i32
+ // CHECK: %[[LD_CREATE_DESC_I64:.*]] = arith.constant dense<0> : vector<4xi64>
+ // CHECK: %[[PREF_BASE_H:.*]] = arith.constant 8 : i32
+ // CHECK: %[[PREF_BASE_W:.*]] = arith.constant 16 : i32
+ // CHECK: %[[OFFSET_ZERO:.*]] = arith.constant 0 : i32
%srcce = memref.memory_space_cast %src : memref<8x16xf32, 1> to memref<8x16xf32>
- %dstte = memref.memory_space_cast %dst : memref<8x16xf32, 1> to memref<8x16xf32>
-
// CHECK: %[[LD_PTR_AS_I64:.*]] = arith.index_castui {{.*}} : index to i64
- // CHECK: %[[LD_CREATE_DESC_I64:.*]] = vector.bitcast {{.*}} : vector<8xi32> to vector<4xi64>
// CHECK: %[[LD_DESC_0:.*]] = vector.insert %[[LD_PTR_AS_I64]], %[[LD_CREATE_DESC_I64]] [0] : i64 into vector<4xi64>
// CHECK: %[[LD_DESC_1:.*]] = vector.bitcast %[[LD_DESC_0]] : vector<4xi64> to vector<8xi32>
- // CHECK: %[[LD_DESC_2:.*]] = vector.insert {{.*}}, %[[LD_DESC_1]] [2] : i32 into vector<8xi32>
- // CHECK: %[[LD_DESC_3:.*]] = vector.insert {{.*}}, %[[LD_DESC_2]] [3] : i32 into vector<8xi32>
- // CHECK: %[[LD_DESC_4:.*]] = vector.insert {{.*}}, %[[LD_DESC_3]] [4] : i32 into vector<8xi32>
- // CHECK: %[[LD_DESC:.*]] = vector.insert {{.*}}, %[[LD_DESC_4]] [5] : i32 into vector<8xi32>
+ // CHECK: %[[LD_DESC_2:.*]] = vector.insert %[[PREF_BASE_W]], %[[LD_DESC_1]] [2] : i32 into vector<8xi32>
+ // CHECK: %[[LD_DESC_3:.*]] = vector.insert %[[PREF_BASE_H]], %[[LD_DESC_2]] [3] : i32 into vector<8xi32>
+ // CHECK: %[[LD_DESC_4:.*]] = vector.insert %[[OFFSET_ZERO]], %[[LD_DESC_3]] [4] : i32 into vector<8xi32>
+ // CHECK: %[[LD_DESC:.*]] = vector.insert %[[OFFSET_ZERO]], %[[LD_DESC_4]] [5] : i32 into vector<8xi32>
%src_tdesc = xegpu.create_nd_tdesc %srcce : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32,
#xegpu.block_tdesc_attr<memory_space = global>, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
//CHECK: %[[LD_DESC_I64:.*]] = vector.bitcast %[[LD_DESC]] : vector<8xi32> to vector<4xi64>
//CHECK: %[[PREF_INTPTR:.*]] = vector.extract %[[LD_DESC_I64]][0] : i64 from vector<4xi64>
- //CHECK: %[[PREF_BASE_W:.*]] = vector.extract %[[LD_DESC]][2] : i32 from vector<8xi32>
- //CHECK: %[[PREF_BASE_H:.*]] = vector.extract %[[LD_DESC]][3] : i32 from vector<8xi32>
- //CHECK: %[[PREF_TILE_W64:.*]] = arith.constant 0 : i64
- //CHECK: %[[PREF_TILE_W:.*]] = arith.trunci %[[PREF_TILE_W64]] : i64 to i32
- //CHECK: %[[PREF_TILE_H64:.*]] = arith.constant 0 : i64
- //CHECK: %[[PREF_TILE_H:.*]] = arith.trunci %[[PREF_TILE_H64]] : i64 to i32
//CHECK: %[[PREF_LLVMPTR:.*]] = llvm.inttoptr %[[PREF_INTPTR]] : i64 to !llvm.ptr<1>
- //CHECK: %[[PREF_SIZEOF_F32:.*]] = arith.constant 4 : i32
- //CHECK: %[[PREF_BASE_ROW_IN_BYTES:.*]] = arith.muli %[[PREF_BASE_W]], %[[PREF_SIZEOF_F32]] : i32
//CHECK: xevm.blockprefetch2d %[[PREF_LLVMPTR]], %[[PREF_BASE_ROW_IN_BYTES]], %[[PREF_BASE_H]],
- //CHECK-SAME: %[[PREF_BASE_ROW_IN_BYTES]], %[[PREF_TILE_W]], %[[PREF_TILE_H]]
+ //CHECK-SAME: %[[PREF_BASE_ROW_IN_BYTES]], %[[OFFSET_ZERO]], %[[OFFSET_ZERO]]
//CHECK-SAME: <{cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>, elem_size_in_bits = 32 : i32,
//CHECK-SAME: tile_height = 8 : i32, tile_width = 16 : i32, v_blocks = 1 : i32}>
//CHECK-SAME: : (!llvm.ptr<1>, i32, i32, i32, i32, i32)
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
index e5216089692b..ab38f9f2f594 100644
--- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
@@ -253,6 +253,40 @@ module {
// -----
+#map = affine_map<(d0) -> (d0 * 2)>
+#map1 = affine_map<(d0) -> (d0 * 4)>
+module {
+ // CHECK-LABEL: func.func @fuse_tileable_op_no_dps
+ func.func @fuse_tileable_op_no_dps(%arg0: tensor<4x4x4xf32>, %arg1: tensor<4x4x4xf32>) -> tensor<4x4x4xf32> {
+ %0 = "test.tiling_no_dps_op"(%arg0, %arg1) : (tensor<4x4x4xf32>, tensor<4x4x4xf32>) -> tensor<4x4x4xf32>
+ %1 = tensor.empty() : tensor<4x4x4xf32>
+ // CHECK: scf.forall
+ %2 = scf.forall (%arg2, %arg3, %arg4) in (4, 2, 1) shared_outs(%arg5 = %1) -> (tensor<4x4x4xf32>) {
+ %3 = affine.apply #map(%arg3)
+ %4 = affine.apply #map1(%arg4)
+ // CHECK: "test.tiling_no_dps_op"
+ // CHECK: "test.unregistered_op"
+ %extracted_slice = tensor.extract_slice %0[%arg2, %3, %4] [1, 2, 4] [1, 1, 1] : tensor<4x4x4xf32> to tensor<1x2x4xf32>
+ %5 = "test.unregistered_op"(%extracted_slice, %extracted_slice) : (tensor<1x2x4xf32>, tensor<1x2x4xf32>) -> tensor<1x2x4xf32>
+ scf.forall.in_parallel {
+ tensor.parallel_insert_slice %5 into %arg5[%arg2, %3, %4] [1, 2, 4] [1, 1, 1] : tensor<1x2x4xf32> into tensor<4x4x4xf32>
+ }
+ }
+ return %2 : tensor<4x4x4xf32>
+ }
+
+ module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+ %op = transform.structured.match ops{["test.tiling_no_dps_op"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %forall = transform.structured.match ops{["scf.forall"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %fused, %new_containing = transform.structured.fuse_into_containing_op %op into %forall : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
+ transform.yield
+ }
+ }
+}
+
+// -----
+
module {
// CHECK-LABEL: func.func @fuse_tileable_op_through_bbarg_inout_nested
// CHECK-SAME: %[[ARG0:[0-9a-z]+]]: tensor<?x?x?xf32>
diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir
index 5ff292058ccc..d10651f36371 100644
--- a/mlir/test/Dialect/MemRef/invalid.mlir
+++ b/mlir/test/Dialect/MemRef/invalid.mlir
@@ -992,6 +992,22 @@ func.func @invalid_store_alignment(%memref: memref<4xi32>, %val: i32) {
// -----
+func.func @invalid_alloc_alignment() {
+ // expected-error @below {{'memref.alloc' op attribute 'alignment' failed to satisfy constraint: 64-bit signless integer attribute whose value is positive and whose value is a power of two > 0}}
+ %0 = memref.alloc() {alignment = 3} : memref<4xf32>
+ return
+}
+
+// -----
+
+func.func @invalid_realloc_alignment(%src: memref<4xf32>) {
+ // expected-error @below {{'memref.realloc' op attribute 'alignment' failed to satisfy constraint: 64-bit signless integer attribute whose value is positive and whose value is a power of two > 0}}
+ %0 = memref.realloc %src {alignment = 7} : memref<4xf32> to memref<8xf32>
+ return
+}
+
+// -----
+
func.func @test_alloc_memref_map_rank_mismatch() {
^bb0:
// expected-error@+1 {{memref layout mismatch between rank and affine map: 2 != 1}}
diff --git a/mlir/test/Dialect/OpenACC/canonicalize.mlir b/mlir/test/Dialect/OpenACC/canonicalize.mlir
index fdc8e6b5cae6..38d3df31305a 100644
--- a/mlir/test/Dialect/OpenACC/canonicalize.mlir
+++ b/mlir/test/Dialect/OpenACC/canonicalize.mlir
@@ -219,3 +219,30 @@ func.func @update_unnecessary_computations(%x: memref<i32>) {
// CHECK-LABEL: func.func @update_unnecessary_computations
// CHECK-NOT: acc.atomic.update
// CHECK: acc.atomic.write
+
+// -----
+
+func.func @kernel_environment_canonicalization(%q1: i32, %q2: i32, %q3: i32) {
+ // Empty kernel_environment (no wait) - should be removed
+ acc.kernel_environment {
+ }
+
+ acc.kernel_environment wait({%q1 : i32, %q2 : i32}) {
+ }
+
+ acc.kernel_environment wait {
+ }
+
+ acc.kernel_environment wait({%q3 : i32} [#acc.device_type<nvidia>]) {
+ }
+
+ return
+}
+
+// CHECK-LABEL: func.func @kernel_environment_canonicalization
+// CHECK-SAME: ([[Q1:%.*]]: i32, [[Q2:%.*]]: i32, [[Q3:%.*]]: i32)
+// CHECK-NOT: acc.kernel_environment wait({{.*}}[#acc.device_type<none>])
+// CHECK: acc.wait([[Q1]], [[Q2]] : i32, i32)
+// CHECK: acc.wait{{$}}
+// CHECK: acc.kernel_environment wait({{.*}}[#acc.device_type<nvidia>])
+// CHECK: return
diff --git a/mlir/test/Target/LLVMIR/nvvm/membar.mlir b/mlir/test/Target/LLVMIR/nvvm/membar.mlir
new file mode 100644
index 000000000000..1b794f663b57
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/membar.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s
+
+// CHECK-lABEL: @memorybarrier()
+llvm.func @memorybarrier() {
+ // CHECK: call void @llvm.nvvm.membar.cta()
+ nvvm.memory.barrier #nvvm.mem_scope<cta>
+ // CHECK: call void @llvm.nvvm.fence.sc.cluster()
+ nvvm.memory.barrier #nvvm.mem_scope<cluster>
+ // CHECK: call void @llvm.nvvm.membar.gl()
+ nvvm.memory.barrier #nvvm.mem_scope<gpu>
+ // CHECK: call void @llvm.nvvm.membar.sys()
+ nvvm.memory.barrier #nvvm.mem_scope<sys>
+ llvm.return
+}
diff --git a/mlir/test/Target/SPIRV/loop.mlir b/mlir/test/Target/SPIRV/loop.mlir
index 95b87b319ac2..b9a4295a04e7 100644
--- a/mlir/test/Target/SPIRV/loop.mlir
+++ b/mlir/test/Target/SPIRV/loop.mlir
@@ -1,5 +1,10 @@
// RUN: mlir-translate -no-implicit-module -split-input-file -test-spirv-roundtrip %s | FileCheck %s
+// RUN: %if spirv-tools %{ rm -rf %t %}
+// RUN: %if spirv-tools %{ mkdir %t %}
+// RUN: %if spirv-tools %{ mlir-translate --no-implicit-module --serialize-spirv --split-input-file --spirv-save-validation-files-with-prefix=%t/module %s %}
+// RUN: %if spirv-tools %{ spirv-val %t %}
+
// Single loop
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
@@ -62,7 +67,7 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
// Single loop with block arguments
-spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
+spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spirv.GlobalVariable @GV1 bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.array<10 x f32, stride=4> [0])>, StorageBuffer>
spirv.GlobalVariable @GV2 bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.array<10 x f32, stride=4> [0])>, StorageBuffer>
// CHECK-LABEL: @loop_kernel
diff --git a/mlir/test/Target/SPIRV/phi.mlir b/mlir/test/Target/SPIRV/phi.mlir
index ca635a469eea..92a3387c2f8d 100644
--- a/mlir/test/Target/SPIRV/phi.mlir
+++ b/mlir/test/Target/SPIRV/phi.mlir
@@ -1,5 +1,10 @@
// RUN: mlir-translate -no-implicit-module -split-input-file -test-spirv-roundtrip %s | FileCheck %s
+// RUN: %if spirv-tools %{ rm -rf %t %}
+// RUN: %if spirv-tools %{ mkdir %t %}
+// RUN: %if spirv-tools %{ mlir-translate --no-implicit-module --serialize-spirv --split-input-file --spirv-save-validation-files-with-prefix=%t/module %s %}
+// RUN: %if spirv-tools %{ spirv-val %t %}
+
// Test branch with one block argument
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
@@ -295,15 +300,26 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
%true = spirv.Constant true
%zero = spirv.Constant 0 : i32
%one = spirv.Constant 1 : i32
+ spirv.mlir.selection {
// CHECK: spirv.BranchConditional %{{.*}}, ^[[true1:.*]](%{{.*}}, %{{.*}} : i32, i32), ^[[false1:.*]]
- spirv.BranchConditional %true, ^true1(%zero, %zero: i32, i32), ^false1
+ spirv.BranchConditional %true, ^true1(%zero, %zero: i32, i32), ^false1
// CHECK: [[true1]](%{{.*}}: i32, %{{.*}}: i32)
- ^true1(%arg0: i32, %arg1: i32):
- spirv.Return
+ ^true1(%arg0: i32, %arg1: i32):
+ spirv.Return
// CHECK: [[false1]]:
- ^false1:
+ ^false1:
+ spirv.Return
+ ^merge:
+ spirv.mlir.merge
+ }
+
+ spirv.Return
+ }
+
+ spirv.func @main() -> () "None" {
spirv.Return
}
+ spirv.EntryPoint "GLCompute" @main
}
// -----
@@ -314,15 +330,26 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
%true = spirv.Constant true
%zero = spirv.Constant 0 : i32
%one = spirv.Constant 1 : i32
+ spirv.mlir.selection {
// CHECK: spirv.BranchConditional %{{.*}}, ^[[true1:.*]], ^[[false1:.*]](%{{.*}}, %{{.*}} : i32, i32)
- spirv.BranchConditional %true, ^true1, ^false1(%zero, %zero: i32, i32)
+ spirv.BranchConditional %true, ^true1, ^false1(%zero, %zero: i32, i32)
// CHECK: [[true1]]:
- ^true1:
- spirv.Return
+ ^true1:
+ spirv.Return
// CHECK: [[false1]](%{{.*}}: i32, %{{.*}}: i32):
- ^false1(%arg0: i32, %arg1: i32):
+ ^false1(%arg0: i32, %arg1: i32):
+ spirv.Return
+ ^merge:
+ spirv.mlir.merge
+ }
+
+ spirv.Return
+ }
+
+ spirv.func @main() -> () "None" {
spirv.Return
}
+ spirv.EntryPoint "GLCompute" @main
}
// -----
@@ -333,13 +360,24 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
%true = spirv.Constant true
%zero = spirv.Constant 0 : i32
%one = spirv.Constant 1 : i32
+ spirv.mlir.selection {
// CHECK: spirv.BranchConditional %{{.*}}, ^[[true1:.*]](%{{.*}} : i32), ^[[false1:.*]](%{{.*}}, %{{.*}} : i32, i32)
- spirv.BranchConditional %true, ^true1(%one: i32), ^false1(%zero, %zero: i32, i32)
+ spirv.BranchConditional %true, ^true1(%one: i32), ^false1(%zero, %zero: i32, i32)
// CHECK: [[true1]](%{{.*}}: i32):
- ^true1(%arg0: i32):
- spirv.Return
+ ^true1(%arg0: i32):
+ spirv.Return
// CHECK: [[false1]](%{{.*}}: i32, %{{.*}}: i32):
- ^false1(%arg1: i32, %arg2: i32):
+ ^false1(%arg1: i32, %arg2: i32):
+ spirv.Return
+ ^merge:
+ spirv.mlir.merge
+ }
+
spirv.Return
}
+
+ spirv.func @main() -> () "None" {
+ spirv.Return
+ }
+ spirv.EntryPoint "GLCompute" @main
}
diff --git a/mlir/test/Target/SPIRV/selection.mlir b/mlir/test/Target/SPIRV/selection.mlir
index 44625cc29923..12daf68538d0 100644
--- a/mlir/test/Target/SPIRV/selection.mlir
+++ b/mlir/test/Target/SPIRV/selection.mlir
@@ -1,5 +1,10 @@
// RUN: mlir-translate -no-implicit-module -test-spirv-roundtrip -split-input-file %s | FileCheck %s
+// RUN: %if spirv-tools %{ rm -rf %t %}
+// RUN: %if spirv-tools %{ mkdir %t %}
+// RUN: %if spirv-tools %{ mlir-translate --no-implicit-module --serialize-spirv --split-input-file --spirv-save-validation-files-with-prefix=%t/module %s %}
+// RUN: %if spirv-tools %{ spirv-val %t %}
+
// Selection with both then and else branches
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
@@ -136,19 +141,31 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
// CHECK-NEXT: spirv.Load "Function" %[[VAR]]
%cond = spirv.Load "Function" %var : i1
+ spirv.mlir.selection {
// CHECK: spirv.BranchConditional %1, ^[[THEN1:.+]](%{{.+}} : i32), ^[[ELSE1:.+]](%{{.+}}, %{{.+}} : i32, i32)
- spirv.BranchConditional %cond, ^then1(%one: i32), ^else1(%zero, %zero: i32, i32)
+ spirv.BranchConditional %cond, ^then1(%one: i32), ^else1(%zero, %zero: i32, i32)
// CHECK-NEXT: ^[[THEN1]](%{{.+}}: i32):
// CHECK-NEXT: spirv.Return
- ^then1(%arg0: i32):
- spirv.Return
+ ^then1(%arg0: i32):
+ spirv.Return
// CHECK-NEXT: ^[[ELSE1]](%{{.+}}: i32, %{{.+}}: i32):
// CHECK-NEXT: spirv.Return
- ^else1(%arg1: i32, %arg2: i32):
+ ^else1(%arg1: i32, %arg2: i32):
+ spirv.Return
+ ^merge:
+ spirv.mlir.merge
+ }
+
spirv.Return
}
+
+ spirv.func @main() -> () "None" {
+ spirv.Return
+ }
+ spirv.EntryPoint "GLCompute" @main
+ spirv.ExecutionMode @main "LocalSize", 1, 1, 1
}
// -----
diff --git a/mlir/test/Target/SPIRV/struct.mlir b/mlir/test/Target/SPIRV/struct.mlir
index 4984ee79f903..c4235005b07b 100644
--- a/mlir/test/Target/SPIRV/struct.mlir
+++ b/mlir/test/Target/SPIRV/struct.mlir
@@ -1,6 +1,11 @@
// RUN: mlir-translate -no-implicit-module -test-spirv-roundtrip %s | FileCheck %s
-spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
+// RUN: %if spirv-tools %{ rm -rf %t %}
+// RUN: %if spirv-tools %{ mkdir %t %}
+// RUN: %if spirv-tools %{ mlir-translate --no-implicit-module --serialize-spirv --split-input-file --spirv-save-validation-files-with-prefix=%t/module %s %}
+// RUN: %if spirv-tools %{ spirv-val %t %}
+
+spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, Addresses, Float64, Int64, Linkage], [SPV_KHR_storage_buffer_storage_class]> {
// CHECK: !spirv.ptr<!spirv.struct<(!spirv.array<128 x f32, stride=4> [0])>, Input>
spirv.GlobalVariable @var0 bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.array<128 x f32, stride=4> [0])>, Input>
@@ -16,8 +21,8 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
// CHECK: !spirv.ptr<!spirv.struct<(f32 [0, NonWritable], i32 [4]), Block>, StorageBuffer>
spirv.GlobalVariable @var4 : !spirv.ptr<!spirv.struct<(f32 [0, NonWritable], i32 [4]), Block>, StorageBuffer>
- // CHECK: !spirv.ptr<!spirv.struct<(f32 [NonWritable], i32 [NonWritable, NonReadable]), Block>, StorageBuffer>
- spirv.GlobalVariable @var5 : !spirv.ptr<!spirv.struct<(f32 [NonWritable], i32 [NonWritable, NonReadable]), Block>, StorageBuffer>
+ // CHECK: !spirv.ptr<!spirv.struct<(f32 [0, NonWritable], i32 [4, NonWritable, NonReadable]), Block>, StorageBuffer>
+ spirv.GlobalVariable @var5 : !spirv.ptr<!spirv.struct<(f32 [0, NonWritable], i32 [4, NonWritable, NonReadable]), Block>, StorageBuffer>
// CHECK: !spirv.ptr<!spirv.struct<(f32 [0, NonWritable], i32 [4, NonWritable, NonReadable]), Block>, StorageBuffer>
spirv.GlobalVariable @var6 : !spirv.ptr<!spirv.struct<(f32 [0, NonWritable], i32 [4, NonWritable, NonReadable]), Block>, StorageBuffer>
@@ -34,14 +39,14 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
// CHECK: !spirv.ptr<!spirv.struct<test_id, (!spirv.array<128 x f32, stride=4> [0])>, Input>
spirv.GlobalVariable @id_var0 : !spirv.ptr<!spirv.struct<test_id, (!spirv.array<128 x f32, stride=4> [0])>, Input>
- // CHECK: !spirv.ptr<!spirv.struct<rec, (!spirv.ptr<!spirv.struct<rec>, StorageBuffer>), Block>, StorageBuffer>
- spirv.GlobalVariable @recursive_simple : !spirv.ptr<!spirv.struct<rec, (!spirv.ptr<!spirv.struct<rec>, StorageBuffer>), Block>, StorageBuffer>
+ // CHECK: !spirv.ptr<!spirv.struct<rec, (!spirv.ptr<!spirv.struct<rec>, StorageBuffer> [0]), Block>, StorageBuffer>
+ spirv.GlobalVariable @recursive_simple : !spirv.ptr<!spirv.struct<rec, (!spirv.ptr<!spirv.struct<rec>, StorageBuffer> [0]), Block>, StorageBuffer>
- // CHECK: !spirv.ptr<!spirv.struct<a, (!spirv.ptr<!spirv.struct<b, (!spirv.ptr<!spirv.struct<a>, Uniform>), Block>, Uniform>), Block>, Uniform>
- spirv.GlobalVariable @recursive_2 : !spirv.ptr<!spirv.struct<a, (!spirv.ptr<!spirv.struct<b, (!spirv.ptr<!spirv.struct<a>, Uniform>), Block>, Uniform>), Block>, Uniform>
+ // CHECK: !spirv.ptr<!spirv.struct<a, (!spirv.ptr<!spirv.struct<b, (!spirv.ptr<!spirv.struct<a>, Uniform> [0]), Block>, Uniform> [0]), Block>, Uniform>
+ spirv.GlobalVariable @recursive_2 : !spirv.ptr<!spirv.struct<a, (!spirv.ptr<!spirv.struct<b, (!spirv.ptr<!spirv.struct<a>, Uniform> [0]), Block>, Uniform> [0]), Block>, Uniform>
- // CHECK: !spirv.ptr<!spirv.struct<axx, (!spirv.ptr<!spirv.struct<bxx, (!spirv.ptr<!spirv.struct<axx>, Uniform>, !spirv.ptr<!spirv.struct<bxx>, Uniform>), Block>, Uniform>), Block>, Uniform>
- spirv.GlobalVariable @recursive_3 : !spirv.ptr<!spirv.struct<axx, (!spirv.ptr<!spirv.struct<bxx, (!spirv.ptr<!spirv.struct<axx>, Uniform>, !spirv.ptr<!spirv.struct<bxx>, Uniform>), Block>, Uniform>), Block>, Uniform>
+ // CHECK: !spirv.ptr<!spirv.struct<axx, (!spirv.ptr<!spirv.struct<bxx, (!spirv.ptr<!spirv.struct<axx>, Uniform> [0], !spirv.ptr<!spirv.struct<bxx>, Uniform> [8]), Block>, Uniform> [0]), Block>, Uniform>
+ spirv.GlobalVariable @recursive_3 : !spirv.ptr<!spirv.struct<axx, (!spirv.ptr<!spirv.struct<bxx, (!spirv.ptr<!spirv.struct<axx>, Uniform> [0], !spirv.ptr<!spirv.struct<bxx>, Uniform> [8]), Block>, Uniform> [0]), Block>, Uniform>
// CHECK: spirv.GlobalVariable @block : !spirv.ptr<!spirv.struct<vert, (vector<4xf32> [BuiltIn=0], f32 [BuiltIn=1]), Block>, Output>
spirv.GlobalVariable @block : !spirv.ptr<!spirv.struct<vert, (vector<4xf32> [BuiltIn=0], f32 [BuiltIn=1]), Block>, Output>
diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
index 4d4ec02546bc..e21cf94f84b6 100644
--- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
@@ -1052,6 +1052,32 @@ LogicalResult OpWithRefineTypeInterfaceOp::refineReturnTypes(
}
//===----------------------------------------------------------------------===//
+// TilingNoDpsOp
+//===----------------------------------------------------------------------===//
+
+SmallVector<Range> TilingNoDpsOp::getIterationDomain(OpBuilder &builder) {
+ return {};
+}
+
+SmallVector<utils::IteratorType> TilingNoDpsOp::getLoopIteratorTypes() {
+ return {};
+}
+
+FailureOr<TilingResult>
+TilingNoDpsOp::getTiledImplementation(OpBuilder &builder,
+ ArrayRef<OpFoldResult> offsets,
+ ArrayRef<OpFoldResult> sizes) {
+ return failure();
+}
+
+LogicalResult TilingNoDpsOp::getResultTilePosition(
+ OpBuilder &builder, unsigned resultNumber, ArrayRef<OpFoldResult> offsets,
+ ArrayRef<OpFoldResult> sizes, SmallVector<OpFoldResult> &resultOffsets,
+ SmallVector<OpFoldResult> &resultSizes) {
+ return failure();
+}
+
+//===----------------------------------------------------------------------===//
// OpWithShapedTypeInferTypeAdaptorInterfaceOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index a3430ba49a29..620d950c0d2a 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -30,6 +30,7 @@ include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/Interfaces/MemorySlotInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/TilingInterface.td"
include "mlir/Interfaces/ValueBoundsOpInterface.td"
include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td"
include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td"
@@ -2888,6 +2889,20 @@ def TestLinalgFillOp :
}
//===----------------------------------------------------------------------===//
+// Test TilingInterface.
+//===----------------------------------------------------------------------===//
+
+def Test_TilingNoDpsOp : TEST_Op<"tiling_no_dps_op",
+ [Pure, DeclareOpInterfaceMethods<TilingInterface,
+ ["getIterationDomain",
+ "getLoopIteratorTypes",
+ "getResultTilePosition",
+ "getTiledImplementation"]>]> {
+ let arguments = (ins AnyRankedTensor:$lhs, AnyRankedTensor:$rhs);
+ let results = (outs AnyRankedTensor:$result);
+}
+
+//===----------------------------------------------------------------------===//
// Test NVVM RequiresSM trait.
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/mlir-tblgen/constraint-unique.td b/mlir/test/mlir-tblgen/constraint-unique.td
index d51e1a5f43ee..3f2e5cd4bfad 100644
--- a/mlir/test/mlir-tblgen/constraint-unique.td
+++ b/mlir/test/mlir-tblgen/constraint-unique.td
@@ -16,7 +16,7 @@ def AType : Type<ATypePred, "a type">;
def OtherType : Type<ATypePred, "another type">;
def AnAttrPred : CPred<"attrPred($_self, $_op)">;
-def AnAttr : Attr<AnAttrPred, "an attribute">;
+def AnAttr : Attr<AnAttrPred, "an attribute (got {{reformat($_self)}})">;
def OtherAttr : Attr<AnAttrPred, "another attribute">;
def ASuccessorPred : CPred<"successorPred($_self, $_op)">;
@@ -24,7 +24,7 @@ def ASuccessor : Successor<ASuccessorPred, "a successor">;
def OtherSuccessor : Successor<ASuccessorPred, "another successor">;
def ARegionPred : CPred<"regionPred($_self, $_op)">;
-def ARegion : Region<ARegionPred, "a region">;
+def ARegion : Region<ARegionPred, "a region ({{find(foo)}})">;
def OtherRegion : Region<ARegionPred, "another region">;
// OpA and OpB have the same type, attribute, successor, and region constraints.
@@ -71,10 +71,10 @@ def OpC : NS_Op<"op_c"> {
// CHECK: static ::llvm::LogicalResult [[$A_ATTR_CONSTRAINT:__mlir_ods_local_attr_constraint.*]](
// CHECK: if (attr && !((attrPred(attr, *op))))
// CHECK-NEXT: return emitError() << "attribute '" << attrName
-// CHECK-NEXT: << "' failed to satisfy constraint: an attribute";
+// CHECK-NEXT: << "' failed to satisfy constraint: an attribute (got " << reformat(attr) << ")";
/// Test that duplicate attribute constraint was not generated.
-// CHECK-NOT: << "' failed to satisfy constraint: an attribute";
+// CHECK-NOT: << "' failed to satisfy constraint: an attribute
/// Test that a attribute constraint with a different description was generated.
// CHECK: static ::llvm::LogicalResult [[$O_ATTR_CONSTRAINT:__mlir_ods_local_attr_constraint.*]](
@@ -103,7 +103,7 @@ def OpC : NS_Op<"op_c"> {
// CHECK: if (!((regionPred(region, *op)))) {
// CHECK-NEXT: return op->emitOpError("region #") << regionIndex
// CHECK-NEXT: << (regionName.empty() ? " " : " ('" + regionName + "') ")
-// CHECK-NEXT: << "failed to verify constraint: a region";
+// CHECK-NEXT: << "failed to verify constraint: a region (" << find(foo) << ")";
/// Test that duplicate region constraint was not generated.
// CHECK-NOT: << "failed to verify constraint: a region";
diff --git a/mlir/test/mlir-tblgen/op-attribute.td b/mlir/test/mlir-tblgen/op-attribute.td
index 549830e06042..a3cb9a41a5b7 100644
--- a/mlir/test/mlir-tblgen/op-attribute.td
+++ b/mlir/test/mlir-tblgen/op-attribute.td
@@ -69,19 +69,19 @@ def AOp : NS_Op<"a_op", []> {
// DEF: ::llvm::LogicalResult AOpAdaptor::verify
// DEF-NEXT: auto tblgen_aAttr = getProperties().aAttr; (void)tblgen_aAttr;
-// DEF-NEXT: if (!tblgen_aAttr) return emitError(loc, "'test.a_op' op ""requires attribute 'aAttr'");
+// DEF-NEXT: if (!tblgen_aAttr) return emitError(loc, "'test.a_op' op requires attribute 'aAttr'");
// DEF-NEXT: auto tblgen_bAttr = getProperties().bAttr; (void)tblgen_bAttr;
// DEF-NEXT: auto tblgen_cAttr = getProperties().cAttr; (void)tblgen_cAttr;
// DEF-NEXT: auto tblgen_dAttr = getProperties().dAttr; (void)tblgen_dAttr;
// DEF: if (tblgen_aAttr && !((some-condition)))
-// DEF-NEXT: return emitError(loc, "'test.a_op' op ""attribute 'aAttr' failed to satisfy constraint: some attribute kind");
+// DEF-NEXT: return emitError(loc, "'test.a_op' op attribute 'aAttr' failed to satisfy constraint: some attribute kind");
// DEF: if (tblgen_bAttr && !((some-condition)))
-// DEF-NEXT: return emitError(loc, "'test.a_op' op ""attribute 'bAttr' failed to satisfy constraint: some attribute kind");
+// DEF-NEXT: return emitError(loc, "'test.a_op' op attribute 'bAttr' failed to satisfy constraint: some attribute kind");
// DEF: if (tblgen_cAttr && !((some-condition)))
-// DEF-NEXT: return emitError(loc, "'test.a_op' op ""attribute 'cAttr' failed to satisfy constraint: some attribute kind");
+// DEF-NEXT: return emitError(loc, "'test.a_op' op attribute 'cAttr' failed to satisfy constraint: some attribute kind");
// DEF: if (tblgen_dAttr && !((some-condition)))
-// DEF-NEXT: return emitError(loc, "'test.a_op' op ""attribute 'dAttr' failed to satisfy constraint: some attribute kind");
+// DEF-NEXT: return emitError(loc, "'test.a_op' op attribute 'dAttr' failed to satisfy constraint: some attribute kind");
// Test getter methods
// ---
@@ -219,13 +219,13 @@ def AgetOp : Op<Test2_Dialect, "a_get_op", []> {
// DEF: ::llvm::LogicalResult AgetOpAdaptor::verify
// DEF: auto tblgen_aAttr = getProperties().aAttr; (void)tblgen_aAttr;
-// DEF: if (!tblgen_aAttr) return emitError(loc, "'test2.a_get_op' op ""requires attribute 'aAttr'");
+// DEF: if (!tblgen_aAttr) return emitError(loc, "'test2.a_get_op' op requires attribute 'aAttr'");
// DEF: auto tblgen_bAttr = getProperties().bAttr; (void)tblgen_bAttr;
// DEF: auto tblgen_cAttr = getProperties().cAttr; (void)tblgen_cAttr;
// DEF: if (tblgen_bAttr && !((some-condition)))
-// DEF-NEXT: return emitError(loc, "'test2.a_get_op' op ""attribute 'bAttr' failed to satisfy constraint: some attribute kind");
+// DEF-NEXT: return emitError(loc, "'test2.a_get_op' op attribute 'bAttr' failed to satisfy constraint: some attribute kind");
// DEF: if (tblgen_cAttr && !((some-condition)))
-// DEF-NEXT: return emitError(loc, "'test2.a_get_op' op ""attribute 'cAttr' failed to satisfy constraint: some attribute kind");
+// DEF-NEXT: return emitError(loc, "'test2.a_get_op' op attribute 'cAttr' failed to satisfy constraint: some attribute kind");
// Test getter methods
// ---
diff --git a/mlir/test/mlir-tblgen/op-properties-predicates.td b/mlir/test/mlir-tblgen/op-properties-predicates.td
index af09ee7c12f5..7cc963385006 100644
--- a/mlir/test/mlir-tblgen/op-properties-predicates.td
+++ b/mlir/test/mlir-tblgen/op-properties-predicates.td
@@ -74,7 +74,7 @@ def OpWithPredicates : NS_Op<"op_with_predicates"> {
// Note: comprehensive emission of verifiers is tested in verifyINvariantsImpl() below
// CHECK: int64_t tblgen_scalar = this->getScalar();
// CHECK: if (!((tblgen_scalar >= 0)))
-// CHECK: return emitError(loc, "'test.op_with_predicates' op ""property 'scalar' failed to satisfy constraint: non-negative int64_t");
+// CHECK: return emitError(loc, "'test.op_with_predicates' op property 'scalar' failed to satisfy constraint: non-negative int64_t");
// CHECK-LABEL: OpWithPredicates::verifyInvariantsImpl()
// Note: for test readability, we capture [[maybe_unused]] into the variable maybe_unused
diff --git a/mlir/test/mlir-tblgen/predicate.td b/mlir/test/mlir-tblgen/predicate.td
index c1fcd3fa7608..41e041f17121 100644
--- a/mlir/test/mlir-tblgen/predicate.td
+++ b/mlir/test/mlir-tblgen/predicate.td
@@ -55,7 +55,7 @@ def OpF : NS_Op<"op_for_int_min_val", []> {
// CHECK-LABEL: OpFAdaptor::verify
// CHECK: (::llvm::cast<::mlir::IntegerAttr>(tblgen_attr).getInt() >= 10)
-// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: 32-bit signless integer attribute whose minimum value is 10"
+// CHECK-NEXT: attribute 'attr' failed to satisfy constraint: 32-bit signless integer attribute whose minimum value is 10"
def OpFX : NS_Op<"op_for_int_max_val", []> {
let arguments = (ins ConfinedAttr<I32Attr, [IntMaxValue<10>]>:$attr);
@@ -63,7 +63,7 @@ def OpFX : NS_Op<"op_for_int_max_val", []> {
// CHECK-LABEL: OpFXAdaptor::verify
// CHECK: (::llvm::cast<::mlir::IntegerAttr>(tblgen_attr).getInt() <= 10)
-// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: 32-bit signless integer attribute whose maximum value is 10"
+// CHECK-NEXT: attribute 'attr' failed to satisfy constraint: 32-bit signless integer attribute whose maximum value is 10"
def OpG : NS_Op<"op_for_arr_min_count", []> {
let arguments = (ins ConfinedAttr<ArrayAttr, [ArrayMinCount<8>]>:$attr);
@@ -71,7 +71,7 @@ def OpG : NS_Op<"op_for_arr_min_count", []> {
// CHECK-LABEL: OpGAdaptor::verify
// CHECK: (::llvm::cast<::mlir::ArrayAttr>(tblgen_attr).size() >= 8)
-// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: array attribute with at least 8 elements"
+// CHECK-NEXT: attribute 'attr' failed to satisfy constraint: array attribute with at least 8 elements"
def OpH : NS_Op<"op_for_arr_value_at_index", []> {
let arguments = (ins ConfinedAttr<ArrayAttr, [IntArrayNthElemEq<0, 8>]>:$attr);
@@ -79,7 +79,7 @@ def OpH : NS_Op<"op_for_arr_value_at_index", []> {
// CHECK-LABEL: OpHAdaptor::verify
// CHECK: (((::llvm::cast<::mlir::ArrayAttr>(tblgen_attr).size() > 0)) && ((::llvm::cast<::mlir::IntegerAttr>(::llvm::cast<::mlir::ArrayAttr>(tblgen_attr)[0]).getInt() == 8)))))
-// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be 8"
+// CHECK-NEXT: attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be 8"
def OpI: NS_Op<"op_for_arr_min_value_at_index", []> {
let arguments = (ins ConfinedAttr<ArrayAttr, [IntArrayNthElemMinValue<0, 8>]>:$attr);
@@ -87,7 +87,7 @@ def OpI: NS_Op<"op_for_arr_min_value_at_index", []> {
// CHECK-LABEL: OpIAdaptor::verify
// CHECK: (((::llvm::cast<::mlir::ArrayAttr>(tblgen_attr).size() > 0)) && ((::llvm::cast<::mlir::IntegerAttr>(::llvm::cast<::mlir::ArrayAttr>(tblgen_attr)[0]).getInt() >= 8)))))
-// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be at least 8"
+// CHECK-NEXT: attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be at least 8"
def OpJ: NS_Op<"op_for_arr_max_value_at_index", []> {
let arguments = (ins ConfinedAttr<ArrayAttr, [IntArrayNthElemMaxValue<0, 8>]>:$attr);
@@ -95,7 +95,7 @@ def OpJ: NS_Op<"op_for_arr_max_value_at_index", []> {
// CHECK-LABEL: OpJAdaptor::verify
// CHECK: (((::llvm::cast<::mlir::ArrayAttr>(tblgen_attr).size() > 0)) && ((::llvm::cast<::mlir::IntegerAttr>(::llvm::cast<::mlir::ArrayAttr>(tblgen_attr)[0]).getInt() <= 8)))))
-// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be at most 8"
+// CHECK-NEXT: attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be at most 8"
def OpK: NS_Op<"op_for_arr_in_range_at_index", []> {
let arguments = (ins ConfinedAttr<ArrayAttr, [IntArrayNthElemInRange<0, 4, 8>]>:$attr);
@@ -103,7 +103,7 @@ def OpK: NS_Op<"op_for_arr_in_range_at_index", []> {
// CHECK-LABEL: OpKAdaptor::verify
// CHECK: (((::llvm::cast<::mlir::ArrayAttr>(tblgen_attr).size() > 0)) && ((::llvm::cast<::mlir::IntegerAttr>(::llvm::cast<::mlir::ArrayAttr>(tblgen_attr)[0]).getInt() >= 4)) && ((::llvm::cast<::mlir::IntegerAttr>(::llvm::cast<::mlir::ArrayAttr>(tblgen_attr)[0]).getInt() <= 8)))))
-// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be at least 4 and at most 8"
+// CHECK-NEXT: attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be at least 4 and at most 8"
def OpL: NS_Op<"op_for_TCopVTEtAreSameAt", [
PredOpTrait<"operands indexed at 0, 2, 3 should all have "
@@ -121,7 +121,7 @@ def OpL: NS_Op<"op_for_TCopVTEtAreSameAt", [
// CHECK: ::llvm::all_equal(::llvm::map_range(
// CHECK-SAME: ::mlir::ArrayRef<unsigned>({0, 2, 3}),
// CHECK-SAME: [this](unsigned i) { return getElementTypeOrSelf(this->getOperand(i)); }))
-// CHECK: "failed to verify that operands indexed at 0, 2, 3 should all have the same type"
+// CHECK: failed to verify that operands indexed at 0, 2, 3 should all have the same type"
def OpM : NS_Op<"op_for_AnyTensorOf", []> {
let arguments = (ins TensorOf<[F32, I32]>:$x);
diff --git a/mlir/test/python/dialects/transform.py b/mlir/test/python/dialects/transform.py
index 6c5e4e5505b1..f58442d04fc6 100644
--- a/mlir/test/python/dialects/transform.py
+++ b/mlir/test/python/dialects/transform.py
@@ -51,6 +51,26 @@ def testSequenceOp(module: Module):
transform.AnyOpType.get(),
)
with InsertionPoint(sequence.body):
+ res = transform.CastOp(transform.AnyOpType.get(), sequence.bodyTarget)
+ res2 = transform.cast(transform.any_op_t(), res.result)
+ transform.YieldOp([res2])
+ # CHECK-LABEL: TEST: testSequenceOp
+ # CHECK: transform.sequence
+ # CHECK: ^{{.*}}(%[[ARG0:.+]]: !transform.any_op):
+ # CHECK: %[[RES:.+]] = cast %[[ARG0]] : !transform.any_op to !transform.any_op
+ # CHECK: %[[RES2:.+]] = cast %[[RES]] : !transform.any_op to !transform.any_op
+ # CHECK: yield %[[RES2]] : !transform.any_op
+ # CHECK: }
+
+
+@run
+def testSequenceOp(module: Module):
+ sequence = transform.SequenceOp(
+ transform.FailurePropagationMode.Propagate,
+ [transform.AnyOpType.get()],
+ transform.AnyOpType.get(),
+ )
+ with InsertionPoint(sequence.body):
transform.YieldOp([sequence.bodyTarget])
# CHECK-LABEL: TEST: testSequenceOp
# CHECK: = transform.sequence -> !transform.any_op failures(propagate) {
@@ -58,6 +78,7 @@ def testSequenceOp(module: Module):
# CHECK: yield %[[ARG0]] : !transform.any_op
# CHECK: }
+
@run
def testNestedSequenceOp(module: Module):
sequence = transform.SequenceOp(
@@ -103,55 +124,65 @@ def testSequenceOpWithExtras(module: Module):
# CHECK-LABEL: TEST: testSequenceOpWithExtras
# CHECK: transform.sequence failures(propagate)
# CHECK: ^{{.*}}(%{{.*}}: !transform.any_op, %{{.*}}: !transform.any_op, %{{.*}}: !transform.op<"foo.bar">):
+ sequence = transform.sequence(
+ transform.FailurePropagationMode.Propagate,
+ [],
+ transform.AnyOpType.get(),
+ [transform.AnyOpType.get(), transform.OperationType.get("foo.bar")],
+ )
+ with InsertionPoint(sequence.body):
+ transform.yield_()
+ # CHECK: transform.sequence failures(propagate)
+ # CHECK: ^{{.*}}(%{{.*}}: !transform.any_op, %{{.*}}: !transform.any_op, %{{.*}}: !transform.op<"foo.bar">):
@run
def testNestedSequenceOpWithExtras(module: Module):
- sequence = transform.SequenceOp(
+ sequence = transform.SequenceOp(
transform.FailurePropagationMode.Propagate,
[],
transform.AnyOpType.get(),
[transform.AnyOpType.get(), transform.OperationType.get("foo.bar")],
)
- with InsertionPoint(sequence.body):
- nested = transform.SequenceOp(
+ with InsertionPoint(sequence.body):
+ nested = transform.SequenceOp(
transform.FailurePropagationMode.Propagate,
[],
sequence.bodyTarget,
sequence.bodyExtraArgs,
)
- with InsertionPoint(nested.body):
- transform.YieldOp()
- transform.YieldOp()
- # CHECK-LABEL: TEST: testNestedSequenceOpWithExtras
- # CHECK: transform.sequence failures(propagate)
- # CHECK: ^{{.*}}(%[[ARG0:.*]]: !transform.any_op, %[[ARG1:.*]]: !transform.any_op, %[[ARG2:.*]]: !transform.op<"foo.bar">):
- # CHECK: sequence %[[ARG0]], %[[ARG1]], %[[ARG2]] : (!transform.any_op, !transform.any_op, !transform.op<"foo.bar">)
+ with InsertionPoint(nested.body):
+ transform.YieldOp()
+ transform.YieldOp()
+ # CHECK-LABEL: TEST: testNestedSequenceOpWithExtras
+ # CHECK: transform.sequence failures(propagate)
+ # CHECK: ^{{.*}}(%[[ARG0:.*]]: !transform.any_op, %[[ARG1:.*]]: !transform.any_op, %[[ARG2:.*]]: !transform.op<"foo.bar">):
+ # CHECK: sequence %[[ARG0]], %[[ARG1]], %[[ARG2]] : (!transform.any_op, !transform.any_op, !transform.op<"foo.bar">)
@run
def testTransformPDLOps(module: Module):
- withPdl = transform_pdl.WithPDLPatternsOp(transform.AnyOpType.get())
- with InsertionPoint(withPdl.body):
- sequence = transform.SequenceOp(
- transform.FailurePropagationMode.Propagate,
- [transform.AnyOpType.get()],
- withPdl.bodyTarget,
- )
- with InsertionPoint(sequence.body):
- match = transform_pdl.PDLMatchOp(
- transform.AnyOpType.get(), sequence.bodyTarget, "pdl_matcher"
- )
- transform.YieldOp(match)
- # CHECK-LABEL: TEST: testTransformPDLOps
- # CHECK: transform.with_pdl_patterns {
- # CHECK: ^{{.*}}(%[[ARG0:.+]]: !transform.any_op):
- # CHECK: = sequence %[[ARG0]] : !transform.any_op -> !transform.any_op failures(propagate) {
- # CHECK: ^{{.*}}(%[[ARG1:.+]]: !transform.any_op):
- # CHECK: %[[RES:.+]] = pdl_match @pdl_matcher in %[[ARG1]]
- # CHECK: yield %[[RES]] : !transform.any_op
- # CHECK: }
- # CHECK: }
+ withPdl = transform_pdl.WithPDLPatternsOp(transform.AnyOpType.get())
+ with InsertionPoint(withPdl.body):
+ sequence = transform.SequenceOp(
+ transform.FailurePropagationMode.Propagate,
+ [transform.AnyOpType.get()],
+ withPdl.bodyTarget,
+ )
+ with InsertionPoint(sequence.body):
+ match = transform_pdl.PDLMatchOp(
+ transform.AnyOpType.get(), sequence.bodyTarget, "pdl_matcher"
+ )
+ transform.YieldOp(match)
+ # CHECK-LABEL: TEST: testTransformPDLOps
+ # CHECK: transform.with_pdl_patterns {
+ # CHECK: ^{{.*}}(%[[ARG0:.+]]: !transform.any_op):
+ # CHECK: = sequence %[[ARG0]] : !transform.any_op -> !transform.any_op failures(propagate) {
+ # CHECK: ^{{.*}}(%[[ARG1:.+]]: !transform.any_op):
+ # CHECK: %[[RES:.+]] = pdl_match @pdl_matcher in %[[ARG1]]
+ # CHECK: yield %[[RES]] : !transform.any_op
+ # CHECK: }
+ # CHECK: }
@run
@@ -161,32 +192,53 @@ def testNamedSequenceOp(module: Module):
"__transform_main",
[transform.AnyOpType.get()],
[transform.AnyOpType.get()],
- arg_attrs = [{"transform.consumed": UnitAttr.get()}])
+ arg_attrs=[{"transform.consumed": UnitAttr.get()}],
+ )
with InsertionPoint(named_sequence.body):
transform.YieldOp([named_sequence.bodyTarget])
# CHECK-LABEL: TEST: testNamedSequenceOp
# CHECK: module attributes {transform.with_named_sequence} {
- # CHECK: transform.named_sequence @__transform_main(%[[ARG0:.+]]: !transform.any_op {transform.consumed}) -> !transform.any_op {
- # CHECK: yield %[[ARG0]] : !transform.any_op
+ # CHECK: transform.named_sequence @__transform_main(%[[ARG0:.+]]: !transform.any_op {transform.consumed}) -> !transform.any_op {
+ # CHECK: yield %[[ARG0]] : !transform.any_op
+ named_sequence = transform.named_sequence(
+ "other_seq",
+ [transform.AnyOpType.get()],
+ [transform.AnyOpType.get()],
+ arg_attrs=[{"transform.consumed": UnitAttr.get()}],
+ )
+ with InsertionPoint(named_sequence.body):
+ transform.yield_([named_sequence.bodyTarget])
+ # CHECK: transform.named_sequence @other_seq(%[[ARG1:.+]]: !transform.any_op {transform.consumed}) -> !transform.any_op {
+ # CHECK: yield %[[ARG1]] : !transform.any_op
@run
def testGetParentOp(module: Module):
- sequence = transform.SequenceOp(
- transform.FailurePropagationMode.Propagate, [], transform.AnyOpType.get()
- )
- with InsertionPoint(sequence.body):
- transform.GetParentOp(
- transform.AnyOpType.get(),
- sequence.bodyTarget,
- isolated_from_above=True,
- nth_parent=2,
+ sequence = transform.SequenceOp(
+ transform.FailurePropagationMode.Propagate, [], transform.AnyOpType.get()
)
- transform.YieldOp()
- # CHECK-LABEL: TEST: testGetParentOp
- # CHECK: transform.sequence
- # CHECK: ^{{.*}}(%[[ARG1:.+]]: !transform.any_op):
- # CHECK: = get_parent_op %[[ARG1]] {isolated_from_above, nth_parent = 2 : i64}
+ with InsertionPoint(sequence.body):
+ transform.GetParentOp(
+ transform.AnyOpType.get(),
+ sequence.bodyTarget,
+ isolated_from_above=True,
+ nth_parent=2,
+ )
+ transform.get_parent_op(
+ transform.AnyOpType.get(),
+ sequence.bodyTarget,
+ isolated_from_above=True,
+ nth_parent=2,
+ allow_empty_results=True,
+ op_name="func.func",
+ deduplicate=True,
+ )
+ transform.YieldOp()
+ # CHECK-LABEL: TEST: testGetParentOp
+ # CHECK: transform.sequence
+ # CHECK: ^{{.*}}(%[[ARG1:.+]]: !transform.any_op):
+ # CHECK: = get_parent_op %[[ARG1]] {isolated_from_above, nth_parent = 2 : i64}
+ # CHECK: = get_parent_op %[[ARG1]] {allow_empty_results, deduplicate, isolated_from_above, nth_parent = 2 : i64, op_name = "func.func"}
@run
@@ -195,43 +247,58 @@ def testMergeHandlesOp(module: Module):
transform.FailurePropagationMode.Propagate, [], transform.AnyOpType.get()
)
with InsertionPoint(sequence.body):
- transform.MergeHandlesOp([sequence.bodyTarget])
+ res = transform.MergeHandlesOp([sequence.bodyTarget])
+ transform.merge_handles([res.result], deduplicate=True)
transform.YieldOp()
# CHECK-LABEL: TEST: testMergeHandlesOp
# CHECK: transform.sequence
# CHECK: ^{{.*}}(%[[ARG1:.+]]: !transform.any_op):
- # CHECK: = merge_handles %[[ARG1]]
+ # CHECK: %[[RES1:.+]] = merge_handles %[[ARG1]] : !transform.any_op
+ # CHECK: = merge_handles deduplicate %[[RES1]] : !transform.any_op
@run
def testApplyPatternsOpCompact(module: Module):
- sequence = transform.SequenceOp(
- transform.FailurePropagationMode.Propagate, [], transform.AnyOpType.get()
- )
- with InsertionPoint(sequence.body):
- with InsertionPoint(transform.ApplyPatternsOp(sequence.bodyTarget).patterns):
- transform.ApplyCanonicalizationPatternsOp()
- transform.YieldOp()
- # CHECK-LABEL: TEST: testApplyPatternsOpCompact
- # CHECK: apply_patterns to
- # CHECK: transform.apply_patterns.canonicalization
- # CHECK: !transform.any_op
+ sequence = transform.SequenceOp(
+ transform.FailurePropagationMode.Propagate, [], transform.AnyOpType.get()
+ )
+ with InsertionPoint(sequence.body):
+ with InsertionPoint(transform.ApplyPatternsOp(sequence.bodyTarget).patterns):
+ transform.ApplyCanonicalizationPatternsOp()
+ with InsertionPoint(
+ transform.apply_patterns(
+ sequence.bodyTarget,
+ apply_cse=True,
+ max_iterations=3,
+ max_num_rewrites=5,
+ ).patterns
+ ):
+ transform.ApplyCanonicalizationPatternsOp()
+ transform.YieldOp()
+ # CHECK-LABEL: TEST: testApplyPatternsOpCompact
+ # CHECK: apply_patterns to
+ # CHECK: transform.apply_patterns.canonicalization
+ # CHECK: } : !transform.any_op
+ # CHECK: apply_patterns to
+ # CHECK: transform.apply_patterns.canonicalization
+ # CHECK: } {apply_cse, max_iterations = 3 : i64, max_num_rewrites = 5 : i64} : !transform.any_op
@run
def testApplyPatternsOpWithType(module: Module):
- sequence = transform.SequenceOp(
- transform.FailurePropagationMode.Propagate, [],
- transform.OperationType.get('test.dummy')
- )
- with InsertionPoint(sequence.body):
- with InsertionPoint(transform.ApplyPatternsOp(sequence.bodyTarget).patterns):
- transform.ApplyCanonicalizationPatternsOp()
- transform.YieldOp()
- # CHECK-LABEL: TEST: testApplyPatternsOp
- # CHECK: apply_patterns to
- # CHECK: transform.apply_patterns.canonicalization
- # CHECK: !transform.op<"test.dummy">
+ sequence = transform.SequenceOp(
+ transform.FailurePropagationMode.Propagate,
+ [],
+ transform.OperationType.get("test.dummy"),
+ )
+ with InsertionPoint(sequence.body):
+ with InsertionPoint(transform.ApplyPatternsOp(sequence.bodyTarget).patterns):
+ transform.ApplyCanonicalizationPatternsOp()
+ transform.YieldOp()
+ # CHECK-LABEL: TEST: testApplyPatternsOp
+ # CHECK: apply_patterns to
+ # CHECK: transform.apply_patterns.canonicalization
+ # CHECK: !transform.op<"test.dummy">
@run
@@ -249,11 +316,13 @@ def testReplicateOp(module: Module):
transform.AnyOpType.get(), sequence.bodyTarget, "second"
)
transform.ReplicateOp(m1, [m2])
+ transform.replicate(m1, [m2])
transform.YieldOp()
# CHECK-LABEL: TEST: testReplicateOp
# CHECK: %[[FIRST:.+]] = pdl_match
# CHECK: %[[SECOND:.+]] = pdl_match
# CHECK: %{{.*}} = replicate num(%[[FIRST]]) %[[SECOND]]
+ # CHECK: %{{.*}} = replicate num(%[[FIRST]]) %[[SECOND]]
# CHECK-LABEL: TEST: testApplyRegisteredPassOp
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 4d9b1b232801..3b10842f2a12 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -17,6 +17,7 @@
#include "OpGenHelpers.h"
#include "mlir/TableGen/Argument.h"
#include "mlir/TableGen/Attribute.h"
+#include "mlir/TableGen/Builder.h"
#include "mlir/TableGen/Class.h"
#include "mlir/TableGen/CodeGenHelpers.h"
#include "mlir/TableGen/Format.h"
@@ -24,16 +25,24 @@
#include "mlir/TableGen/Interfaces.h"
#include "mlir/TableGen/Operator.h"
#include "mlir/TableGen/Property.h"
+#include "mlir/TableGen/Region.h"
#include "mlir/TableGen/SideEffects.h"
+#include "mlir/TableGen/Successor.h"
#include "mlir/TableGen/Trait.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/CodeGenHelpers.h"
@@ -380,9 +389,8 @@ public:
Formatter emitErrorPrefix() const {
return [this](raw_ostream &os) -> raw_ostream & {
if (emitForOp)
- return os << "emitOpError(";
- return os << formatv("emitError(loc, \"'{0}' op \"",
- op.getOperationName());
+ return os << "emitOpError(\"";
+ return os << formatv("emitError(loc, \"'{0}' op ", op.getOperationName());
};
}
@@ -940,7 +948,7 @@ genAttributeVerifier(const OpOrAdaptorHelper &emitHelper, FmtContext &ctx,
// {4}: Attribute/constraint description.
const char *const verifyAttrInline = R"(
if ({0} && !({1}))
- return {2}"attribute '{3}' failed to satisfy constraint: {4}");
+ return {2}attribute '{3}' failed to satisfy constraint: {4}");
)";
// Verify the attribute using a uniqued constraint. Can only be used within
// the context of an op.
@@ -993,10 +1001,11 @@ while (true) {{
(constraintFn = staticVerifierEmitter.getAttrConstraintFn(attr))) {
body << formatv(verifyAttrUnique, *constraintFn, varName, attrName);
} else {
- body << formatv(verifyAttrInline, varName,
- tgfmt(condition, &ctx.withSelf(varName)),
- emitHelper.emitErrorPrefix(), attrName,
- escapeString(attr.getSummary()));
+ body << formatv(
+ verifyAttrInline, varName, tgfmt(condition, &ctx.withSelf(varName)),
+ emitHelper.emitErrorPrefix(), attrName,
+ buildErrorStreamingString(attr.getSummary(), ctx.withSelf(varName),
+ ErrorStreamType::InsideOpError));
}
};
@@ -1017,7 +1026,7 @@ while (true) {{
it.first);
if (metadata.isRequired)
body << formatv(
- "if (!tblgen_{0}) return {1}\"requires attribute '{0}'\");\n",
+ "if (!tblgen_{0}) return {1}requires attribute '{0}'\");\n",
it.first, emitHelper.emitErrorPrefix());
}
} else {
@@ -1099,7 +1108,7 @@ static void genPropertyVerifier(
// {3}: Property description.
const char *const verifyPropertyInline = R"(
if (!({0}))
- return {1}"property '{2}' failed to satisfy constraint: {3}");
+ return {1}property '{2}' failed to satisfy constraint: {3}");
)";
// Verify the property using a uniqued constraint. Can only be used
@@ -1143,9 +1152,12 @@ static void genPropertyVerifier(
if (uniquedFn.has_value() && emitHelper.isEmittingForOp())
body << formatv(verifyPropertyUniqued, *uniquedFn, varName, prop.name);
else
- body << formatv(
- verifyPropertyInline, tgfmt(rawCondition, &ctx.withSelf(varName)),
- emitHelper.emitErrorPrefix(), prop.name, prop.prop.getSummary());
+ body << formatv(verifyPropertyInline,
+ tgfmt(rawCondition, &ctx.withSelf(varName)),
+ emitHelper.emitErrorPrefix(), prop.name,
+ buildErrorStreamingString(
+ prop.prop.getSummary(), ctx.withSelf(varName),
+ ErrorStreamType::InsideOpError));
}
}
diff --git a/mlir/unittests/TableGen/CMakeLists.txt b/mlir/unittests/TableGen/CMakeLists.txt
index c51bda6e8d6c..4d8e508ecdf5 100644
--- a/mlir/unittests/TableGen/CMakeLists.txt
+++ b/mlir/unittests/TableGen/CMakeLists.txt
@@ -25,6 +25,6 @@ target_include_directories(MLIRTableGenTests
)
target_link_libraries(MLIRTableGenTests
- PRIVATE MLIRTableGen MLIRIR
+ PRIVATE LLVMTableGen MLIRTableGen MLIRIR
PUBLIC MLIRTestDialect
)
diff --git a/mlir/utils/pygments/mlir_lexer.py b/mlir/utils/pygments/mlir_lexer.py
index 179a058e9110..4cbe0fe236fc 100644
--- a/mlir/utils/pygments/mlir_lexer.py
+++ b/mlir/utils/pygments/mlir_lexer.py
@@ -2,37 +2,132 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-from pygments.lexer import RegexLexer
+from pygments.lexer import RegexLexer, bygroups, include, using
from pygments.token import *
+import re
class MlirLexer(RegexLexer):
+ """Pygments lexer for MLIR.
+
+ This lexer focuses on accurate tokenization of common MLIR constructs:
+ - SSA values (%%... / %...)
+ - attribute and type aliases (#name =, !name =)
+ - types (builtin and dialect types, parametric types)
+ - attribute dictionaries and nested containers to a reasonable depth
+ - numbers (ints, floats with exponents, hex)
+ - strings with common escapes
+ - line comments (// ...)
+ - block labels (^foo) and operations
+ """
+
name = "MLIR"
aliases = ["mlir"]
filenames = ["*.mlir"]
+ flags = re.MULTILINE
+
+ class VariableList(RegexLexer):
+ """Lexer for lists of SSA variables separated by commas."""
+
+ tokens = {
+ "root": [
+ (r"\s+", Text),
+ (r",", Punctuation),
+ (r"%[_A-Za-z0-9\.\$\-:#]+", Name.Variable),
+ ]
+ }
+
tokens = {
"root": [
- (r"%[a-zA-Z0-9_]+", Name.Variable),
- (r"@[a-zA-Z_][a-zA-Z0-9_]+", Name.Function),
- (r"\^[a-zA-Z0-9_]+", Name.Label),
- (r"#[a-zA-Z0-9_]+", Name.Constant),
- (r"![a-zA-Z0-9_]+", Keyword.Type),
- (r"[a-zA-Z_][a-zA-Z0-9_]*\.", Name.Entity),
- (r"memref[^.]", Keyword.Type),
- (r"index", Keyword.Type),
- (r"i[0-9]+", Keyword.Type),
- (r"f[0-9]+", Keyword.Type),
+ # Comments
+ (r"//.*?$", Comment.Single),
+ # operation name with assignment: %... = op.name
+ (
+ r"^(\s*)(%[\%_A-Za-z0-9\:#\,\s]+)(=)(\s*)([A-Za-z0-9_\.\$\-]+)\b",
+ bygroups(Text, using(VariableList), Operator, Text, Name.Builtin),
+ ),
+ # operation name without result
+ (r"^(\s*)([A-Za-z0-9_\.\$\-]+)\b(?=[^<:])", bygroups(Text, Name.Builtin)),
+ # Attribute alias definition: #name =
+ (
+ r"^(\s*)(#[_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
+ bygroups(Text, Name.Constant, Text, Operator),
+ ),
+ # Type alias definition: !name =
+ (
+ r"^(\s*)(![_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
+ bygroups(Text, Keyword.Type, Text, Operator),
+ ),
+ # SSA values (uses)
+ (r"%[_A-Za-z0-9\.\$\-:#]+", Name.Variable),
+ # attribute refs, constants and named attributes
+ (r"#[_A-Za-z0-9\$\-\.]+\b", Name.Constant),
+ # symbol refs / function-like names
+ (r"@[_A-Za-z][_A-Za-z0-9\$\-\.]*\b", Name.Function),
+ # blocks
+ (r"\^[A-Za-z0-9_\$\.\-]+", Name.Label),
+ # types by exclamation or builtin names
+ (r"![_A-Za-z0-9\$\-\.]+\b", Keyword.Type),
+ # NOTE: please sync changes to corresponding builtin type rule in "angled-type"
+ (r"\b(bf16|f16|f32|f64|f80|f128|index|none|(u|s)?i[0-9]+)\b", Keyword.Type),
+ # container-like dialect types (tensor<...>, memref<...>, vector<...>)
+ (
+ r"\b(complex|memref|tensor|tuple|vector)\s*(<)",
+ bygroups(Keyword.Type, Punctuation),
+ "angled-type",
+ ),
+ # affine constructs
+ (r"\b(affine_map|affine_set)\b", Keyword.Reserved),
+ # common builtin operators / functions inside affine_map
+ (r"\b(ceildiv|floordiv|mod|symbol)\b", Name.Other),
+ # identifiers / bare words
+ (r"\b[_A-Za-z][_A-Za-z0-9\.-]*\b", Name.Other),
+ # numbers: hex, float (with exponent), integer
+ (r"\b0x[0-9A-Fa-f]+\b", Number.Hex),
+ (r"\b([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?\b", Number.Float),
+ (r"\b[0-9]+\b", Number.Integer),
+ # strings
+ (r'"', String.Double, "string"),
+ # punctuation and arrow-like tokens
+ (r"->|>=|<=|\>=|\<=|\->|\=>", Operator),
+ (r"[()\[\]{}<>,.:=]", Punctuation),
+ # operators
+ (r"[-+*/%]", Operator),
+ ],
+ # string state with common escapes
+ "string": [
+ (r'\\[ntr"\\]', String.Escape),
+ (r'[^"\\]+', String.Double),
+ (r'"', String.Double, "#pop"),
+ ],
+ # angled-type content
+ "angled-type": [
+ # match nested '<' and '>'
+ (r"<", Punctuation, "#push"),
+ (r">", Punctuation, "#pop"),
+ # dimensions like 3x or 3x3x... and standalone numbers:
+ # - match numbers that are followed by an 'x' (dimension separator)
+ (r"([0-9]+)(?=(?:x))", Number.Integer),
+ # - match bare numbers (sizes)
(r"[0-9]+", Number.Integer),
- (r"[0-9]*\.[0-9]*", Number.Float),
- (r'"[^"]*"', String.Double),
- (r"affine_map", Keyword.Reserved),
- # TODO: this should be within affine maps only
- (r"\+-\*\/", Operator),
- (r"floordiv", Operator.Word),
- (r"ceildiv", Operator.Word),
- (r"mod", Operator.Word),
- (r"()\[\]<>,{}", Punctuation),
- (r"\/\/.*\n", Comment.Single),
- ]
+ # dynamic dimension '?'
+ (r"\?", Name.Integer),
+ # the 'x' dimension separator (treat as punctuation)
+ (r"x", Punctuation),
+ # element / builtin types inside angle brackets (no word-boundary)
+ # NOTE: please sync changes to corresponding builtin type rule in "root"
+ (
+ r"(?:bf16|f16|f32|f64|f80|f128|index|none|(?:[us]?i[0-9]+))",
+ Keyword.Type,
+ ),
+ # also allow nested container-like types to be recognized
+ (
+ r"\b(complex|memref|tensor|tuple|vector)\s*(<)",
+ bygroups(Keyword.Type, Punctuation),
+ "angled-type",
+ ),
+ # fall back to root rules for anything else
+ include("root"),
+ ],
}
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index 2a283bd6fa4e..79e45fd8e082 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -21,7 +21,6 @@ enum class DeviceDebugKind : uint32_t {
Assertion = 1U << 0,
FunctionTracing = 1U << 1,
CommonIssues = 1U << 2,
- AllocationTracker = 1U << 3,
PGODump = 1U << 4,
};
@@ -36,27 +35,6 @@ struct DeviceEnvironmentTy {
uint64_t HardwareParallelism;
};
-struct DeviceMemoryPoolTy {
- void *Ptr;
- uint64_t Size;
-};
-
-struct DeviceMemoryPoolTrackingTy {
- uint64_t NumAllocations;
- uint64_t AllocationTotal;
- uint64_t AllocationMin;
- uint64_t AllocationMax;
-
- void combine(DeviceMemoryPoolTrackingTy &Other) {
- NumAllocations += Other.NumAllocations;
- AllocationTotal += Other.AllocationTotal;
- AllocationMin = AllocationMin > Other.AllocationMin ? Other.AllocationMin
- : AllocationMin;
- AllocationMax = AllocationMax < Other.AllocationMax ? Other.AllocationMax
- : AllocationMax;
- }
-};
-
// NOTE: Please don't change the order of those members as their indices are
// used in the middle end. Always add the new data member at the end.
// Different from KernelEnvironmentTy below, this structure contains members
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 928c6cd7569e..04b394452a44 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3109,17 +3109,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
StackSize = Value;
return Plugin::success();
}
- Error getDeviceHeapSize(uint64_t &Value) override {
- Value = DeviceMemoryPoolSize;
- return Plugin::success();
- }
- Error setDeviceHeapSize(uint64_t Value) override {
- for (DeviceImageTy *Image : LoadedImages)
- if (auto Err = setupDeviceMemoryPool(Plugin, *Image, Value))
- return Err;
- DeviceMemoryPoolSize = Value;
- return Plugin::success();
- }
Error getDeviceMemorySize(uint64_t &Value) override {
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
if (Pool->isGlobal()) {
@@ -3321,9 +3310,6 @@ private:
/// Reference to the host device.
AMDHostDeviceTy &HostDevice;
- /// The current size of the global device memory pool (managed by us).
- uint64_t DeviceMemoryPoolSize = 1L << 29L /*512MB=*/;
-
/// The current size of the stack that will be used in cases where it could
/// not be statically determined.
uint64_t StackSize = 16 * 1024 /* 16 KB */;
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index f9dcdea7213f..2135e0608323 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -819,10 +819,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error unloadBinary(DeviceImageTy *Image);
virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0;
- /// Setup the global device memory pool, if the plugin requires one.
- Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image,
- uint64_t PoolSize);
-
// Setup the RPC server for this device if needed. This may not run on some
// plugins like the CPU targets. By default, it will not be executed so it is
// up to the target to override this using the shouldSetupRPCServer function.
@@ -1067,6 +1063,16 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
virtual Error getDeviceStackSize(uint64_t &V) = 0;
+ virtual bool hasDeviceHeapSize() { return false; }
+ virtual Error getDeviceHeapSize(uint64_t &V) {
+ return Plugin::error(error::ErrorCode::UNSUPPORTED,
+ "%s not supported by platform", __func__);
+ }
+ virtual Error setDeviceHeapSize(uint64_t V) {
+ return Plugin::error(error::ErrorCode::UNSUPPORTED,
+ "%s not supported by platform", __func__);
+ }
+
/// Returns true if current plugin architecture is an APU
/// and unified_shared_memory was not requested by the program.
bool useAutoZeroCopy();
@@ -1159,12 +1165,6 @@ private:
/// plugin can implement the setters as no-op and setting the output
/// value to zero for the getters.
virtual Error setDeviceStackSize(uint64_t V) = 0;
- virtual Error getDeviceHeapSize(uint64_t &V) = 0;
- virtual Error setDeviceHeapSize(uint64_t V) = 0;
-
- /// Indicate whether the device should setup the global device memory pool. If
- /// false is return the value on the device will be uninitialized.
- virtual bool shouldSetupDeviceMemoryPool() const { return true; }
/// Indicate whether or not the device should setup the RPC server. This is
/// only necessary for unhosted targets like the GPU.
@@ -1251,10 +1251,6 @@ protected:
/// Internal representation for OMPT device (initialize & finalize)
std::atomic<bool> OmptInitialized;
#endif
-
-private:
- DeviceMemoryPoolTy DeviceMemoryPool = {nullptr, 0};
- DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking = {0, 0, ~0U, 0};
};
/// Class implementing common functionalities of offload plugins. Each plugin
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index d7e5a21600ab..ee2ecbcfd309 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -762,13 +762,15 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
return StackSizeEnvarOrErr.takeError();
OMPX_TargetStackSize = std::move(*StackSizeEnvarOrErr);
- auto HeapSizeEnvarOrErr = UInt64Envar::create(
- "LIBOMPTARGET_HEAP_SIZE",
- [this](uint64_t &V) -> Error { return getDeviceHeapSize(V); },
- [this](uint64_t V) -> Error { return setDeviceHeapSize(V); });
- if (!HeapSizeEnvarOrErr)
- return HeapSizeEnvarOrErr.takeError();
- OMPX_TargetHeapSize = std::move(*HeapSizeEnvarOrErr);
+ if (hasDeviceHeapSize()) {
+ auto HeapSizeEnvarOrErr = UInt64Envar::create(
+ "LIBOMPTARGET_HEAP_SIZE",
+ [this](uint64_t &V) -> Error { return getDeviceHeapSize(V); },
+ [this](uint64_t V) -> Error { return setDeviceHeapSize(V); });
+ if (!HeapSizeEnvarOrErr)
+ return HeapSizeEnvarOrErr.takeError();
+ OMPX_TargetHeapSize = std::move(*HeapSizeEnvarOrErr);
+ }
// Update the maximum number of teams and threads after the device
// initialization sets the corresponding hardware limit.
@@ -795,19 +797,6 @@ Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) {
if (auto Err = callGlobalDestructors(Plugin, *Image))
return Err;
- if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
- GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
- DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
- GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
- sizeof(DeviceMemoryPoolTrackingTy),
- &ImageDeviceMemoryPoolTracking);
- if (auto Err =
- GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
- consumeError(std::move(Err));
- }
- DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
- }
-
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
if (!ProfOrErr)
@@ -833,22 +822,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
return Err;
LoadedImages.clear();
- if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
- // TODO: Write this by default into a file.
- printf("\n\n|-----------------------\n"
- "| Device memory tracker:\n"
- "|-----------------------\n"
- "| #Allocations: %lu\n"
- "| Byes allocated: %lu\n"
- "| Minimal allocation: %lu\n"
- "| Maximal allocation: %lu\n"
- "|-----------------------\n\n\n",
- DeviceMemoryPoolTracking.NumAllocations,
- DeviceMemoryPoolTracking.AllocationTotal,
- DeviceMemoryPoolTracking.AllocationMin,
- DeviceMemoryPoolTracking.AllocationMax);
- }
-
// Delete the memory manager before deinitializing the device. Otherwise,
// we may delete device allocations after the device is deinitialized.
if (MemoryManager)
@@ -901,18 +874,6 @@ Expected<DeviceImageTy *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
// Add the image to list.
LoadedImages.push_back(Image);
- // Setup the global device memory pool if needed.
- if (!Plugin.getRecordReplay().isReplaying() &&
- shouldSetupDeviceMemoryPool()) {
- uint64_t HeapSize;
- auto SizeOrErr = getDeviceHeapSize(HeapSize);
- if (SizeOrErr) {
- REPORT("No global device memory pool due to error: %s\n",
- toString(std::move(SizeOrErr)).data());
- } else if (auto Err = setupDeviceMemoryPool(Plugin, *Image, HeapSize))
- return std::move(Err);
- }
-
if (auto Err = setupRPCServer(Plugin, *Image))
return std::move(Err);
@@ -936,51 +897,6 @@ Expected<DeviceImageTy *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
return Image;
}
-Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin,
- DeviceImageTy &Image,
- uint64_t PoolSize) {
- // Free the old pool, if any.
- if (DeviceMemoryPool.Ptr) {
- if (auto Err = dataDelete(DeviceMemoryPool.Ptr,
- TargetAllocTy::TARGET_ALLOC_DEVICE))
- return Err;
- }
-
- DeviceMemoryPool.Size = PoolSize;
- auto AllocOrErr = dataAlloc(PoolSize, /*HostPtr=*/nullptr,
- TargetAllocTy::TARGET_ALLOC_DEVICE);
- if (AllocOrErr) {
- DeviceMemoryPool.Ptr = *AllocOrErr;
- } else {
- auto Err = AllocOrErr.takeError();
- REPORT("Failure to allocate device memory for global memory pool: %s\n",
- toString(std::move(Err)).data());
- DeviceMemoryPool.Ptr = nullptr;
- DeviceMemoryPool.Size = 0;
- }
-
- // Create the metainfo of the device environment global.
- GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
- if (!GHandler.isSymbolInImage(*this, Image,
- "__omp_rtl_device_memory_pool_tracker")) {
- DP("Skip the memory pool as there is no tracker symbol in the image.");
- return Error::success();
- }
-
- GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
- sizeof(DeviceMemoryPoolTrackingTy),
- &DeviceMemoryPoolTracking);
- if (auto Err = GHandler.writeGlobalToDevice(*this, Image, TrackerGlobal))
- return Err;
-
- // Create the metainfo of the device environment global.
- GlobalTy DevEnvGlobal("__omp_rtl_device_memory_pool",
- sizeof(DeviceMemoryPoolTy), &DeviceMemoryPool);
-
- // Write device environment values to the device.
- return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal);
-}
-
Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin,
DeviceImageTy &Image) {
// The plugin either does not need an RPC server or it is unavailable.
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index a9adcc397fb7..45e580e7e0cd 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1235,11 +1235,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Info;
}
- virtual bool shouldSetupDeviceMemoryPool() const override {
- /// We use the CUDA malloc for now.
- return false;
- }
-
/// Getters and setters for stack and heap sizes.
Error getDeviceStackSize(uint64_t &Value) override {
return getCtxLimit(CU_LIMIT_STACK_SIZE, Value);
@@ -1247,6 +1242,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
Error setDeviceStackSize(uint64_t Value) override {
return setCtxLimit(CU_LIMIT_STACK_SIZE, Value);
}
+ bool hasDeviceHeapSize() override { return true; }
Error getDeviceHeapSize(uint64_t &Value) override {
return getCtxLimit(CU_LIMIT_MALLOC_HEAP_SIZE, Value);
}
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index eb4ecac9907a..48de1fefa29d 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -380,9 +380,6 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
return Info;
}
- /// This plugin should not setup the device environment or memory pool.
- virtual bool shouldSetupDeviceMemoryPool() const override { return false; };
-
/// Getters and setters for stack size and heap size not relevant.
Error getDeviceStackSize(uint64_t &Value) override {
Value = 0;
@@ -391,11 +388,6 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
Error setDeviceStackSize(uint64_t Value) override {
return Plugin::success();
}
- Error getDeviceHeapSize(uint64_t &Value) override {
- Value = 0;
- return Plugin::success();
- }
- Error setDeviceHeapSize(uint64_t Value) override { return Plugin::success(); }
private:
/// Grid values for Generic ELF64 plugins.
diff --git a/offload/test/offloading/malloc_parallel.c b/offload/test/libc/malloc_parallel.c
index 076a7ba397a3..076a7ba397a3 100644
--- a/offload/test/offloading/malloc_parallel.c
+++ b/offload/test/libc/malloc_parallel.c
diff --git a/offload/test/mapping/lambda_mapping.cpp b/offload/test/mapping/lambda_mapping.cpp
index 63b1719fbbc3..8e640b7fff3a 100644
--- a/offload/test/mapping/lambda_mapping.cpp
+++ b/offload/test/mapping/lambda_mapping.cpp
@@ -4,6 +4,8 @@
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
// RUN: %libomptarget-compileoptxx-run-and-check-generic
+// REQUIRES: libc
+
#include <iostream>
template <typename LOOP_BODY>
diff --git a/offload/test/offloading/interop-print.c b/offload/test/offloading/interop-print.c
index a3864209e17b..f7b37d992f17 100644
--- a/offload/test/offloading/interop-print.c
+++ b/offload/test/offloading/interop-print.c
@@ -8,6 +8,7 @@
// REQUIRES: gpu
// XFAIL: nvptx64-nvidia-cuda
+// XFAIL: nvptx64-nvidia-cuda-LTO
#include <omp.h>
#include <stdio.h>
diff --git a/offload/test/offloading/malloc.c b/offload/test/offloading/malloc.c
index 7b98e1f1110e..04e72561d312 100644
--- a/offload/test/offloading/malloc.c
+++ b/offload/test/offloading/malloc.c
@@ -10,7 +10,7 @@ int main() {
int Threads = 64;
int Teams = 10;
- // Allocate ~55MB on the device.
+ // Allocate ~160 KiB on the device.
#pragma omp target map(from : DP)
DP = (long unsigned *)malloc(sizeof(long unsigned) * N * Threads * Teams);
diff --git a/openmp/device/include/Allocator.h b/openmp/device/include/Allocator.h
index dc4d029ed75f..507ec6327126 100644
--- a/openmp/device/include/Allocator.h
+++ b/openmp/device/include/Allocator.h
@@ -14,18 +14,12 @@
#include "DeviceTypes.h"
-// Forward declaration.
-struct KernelEnvironmentTy;
-
namespace ompx {
namespace allocator {
static uint64_t constexpr ALIGNMENT = 16;
-/// Initialize the allocator according to \p KernelEnvironment
-void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
-
/// Allocate \p Size bytes.
[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT), gnu::malloc]] void *
alloc(uint64_t Size);
diff --git a/openmp/device/src/Allocator.cpp b/openmp/device/src/Allocator.cpp
index aac2a6005158..34c945c979ff 100644
--- a/openmp/device/src/Allocator.cpp
+++ b/openmp/device/src/Allocator.cpp
@@ -18,42 +18,36 @@
#include "Synchronization.h"
using namespace ompx;
+using namespace allocator;
+
+// Provide a default implementation of malloc / free for AMDGPU platforms built
+// without 'libc' support.
+extern "C" {
+#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+[[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); }
+[[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); }
+#else
+[[gnu::leaf]] void *malloc(size_t Size);
+[[gnu::leaf]] void free(void *Ptr);
+#endif
+}
-[[gnu::used, gnu::retain, gnu::weak,
- gnu::visibility(
- "protected")]] DeviceMemoryPoolTy __omp_rtl_device_memory_pool;
-[[gnu::used, gnu::retain, gnu::weak,
- gnu::visibility("protected")]] DeviceMemoryPoolTrackingTy
- __omp_rtl_device_memory_pool_tracker;
+static constexpr uint64_t MEMORY_SIZE = /* 1 MiB */ 1024 * 1024;
+alignas(ALIGNMENT) static uint8_t Memory[MEMORY_SIZE] = {0};
-/// Stateless bump allocator that uses the __omp_rtl_device_memory_pool
-/// directly.
+// Fallback bump pointer interface for platforms without a functioning
+// allocator.
struct BumpAllocatorTy final {
+ uint64_t Offset = 0;
void *alloc(uint64_t Size) {
Size = utils::roundUp(Size, uint64_t(allocator::ALIGNMENT));
- if (config::isDebugMode(DeviceDebugKind::AllocationTracker)) {
- atomic::add(&__omp_rtl_device_memory_pool_tracker.NumAllocations, 1,
- atomic::seq_cst);
- atomic::add(&__omp_rtl_device_memory_pool_tracker.AllocationTotal, Size,
- atomic::seq_cst);
- atomic::min(&__omp_rtl_device_memory_pool_tracker.AllocationMin, Size,
- atomic::seq_cst);
- atomic::max(&__omp_rtl_device_memory_pool_tracker.AllocationMax, Size,
- atomic::seq_cst);
- }
-
- uint64_t *Data =
- reinterpret_cast<uint64_t *>(&__omp_rtl_device_memory_pool.Ptr);
- uint64_t End =
- reinterpret_cast<uint64_t>(Data) + __omp_rtl_device_memory_pool.Size;
-
- uint64_t OldData = atomic::add(Data, Size, atomic::seq_cst);
- if (OldData + Size > End)
+ uint64_t OldData = atomic::add(&Offset, Size, atomic::seq_cst);
+ if (OldData + Size >= MEMORY_SIZE)
__builtin_trap();
- return reinterpret_cast<void *>(OldData);
+ return &Memory[OldData];
}
void free(void *) {}
@@ -65,13 +59,20 @@ BumpAllocatorTy BumpAllocator;
///
///{
-void allocator::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) {
- // TODO: Check KernelEnvironment for an allocator choice as soon as we have
- // more than one.
+void *allocator::alloc(uint64_t Size) {
+#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+ return BumpAllocator.alloc(Size);
+#else
+ return ::malloc(Size);
+#endif
}
-void *allocator::alloc(uint64_t Size) { return BumpAllocator.alloc(Size); }
-
-void allocator::free(void *Ptr) { BumpAllocator.free(Ptr); }
+void allocator::free(void *Ptr) {
+#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+ BumpAllocator.free(Ptr);
+#else
+ ::free(Ptr);
+#endif
+}
///}
diff --git a/openmp/device/src/Kernel.cpp b/openmp/device/src/Kernel.cpp
index 8c2828b27041..05af35d242ac 100644
--- a/openmp/device/src/Kernel.cpp
+++ b/openmp/device/src/Kernel.cpp
@@ -41,7 +41,6 @@ inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
synchronize::init(IsSPMD);
mapping::init(IsSPMD);
state::init(IsSPMD, KernelEnvironment, KernelLaunchEnvironment);
- allocator::init(IsSPMD, KernelEnvironment);
workshare::init(IsSPMD);
}
diff --git a/openmp/device/src/Misc.cpp b/openmp/device/src/Misc.cpp
index 563f674d166e..a53fb4302fdb 100644
--- a/openmp/device/src/Misc.cpp
+++ b/openmp/device/src/Misc.cpp
@@ -100,7 +100,7 @@ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
case omp_const_mem_alloc:
case omp_high_bw_mem_alloc:
case omp_low_lat_mem_alloc:
- return malloc(size);
+ return ompx::allocator::alloc(size);
default:
return nullptr;
}
@@ -113,7 +113,7 @@ void omp_free(void *ptr, omp_allocator_handle_t allocator) {
case omp_const_mem_alloc:
case omp_high_bw_mem_alloc:
case omp_low_lat_mem_alloc:
- free(ptr);
+ ompx::allocator::free(ptr);
return;
case omp_null_allocator:
default:
diff --git a/openmp/device/src/State.cpp b/openmp/device/src/State.cpp
index 475395102f47..9f38cf26f8c6 100644
--- a/openmp/device/src/State.cpp
+++ b/openmp/device/src/State.cpp
@@ -44,26 +44,6 @@ using namespace ompx;
namespace {
-/// Fallback implementations are missing to trigger a link time error.
-/// Implementations for new devices, including the host, should go into a
-/// dedicated begin/end declare variant.
-///
-///{
-extern "C" {
-#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
-
-[[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); }
-[[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); }
-
-#else
-
-[[gnu::weak, gnu::leaf]] void *malloc(size_t Size);
-[[gnu::weak, gnu::leaf]] void free(void *Ptr);
-
-#endif
-}
-///}
-
/// A "smart" stack in shared memory.
///
/// The stack exposes a malloc/free interface but works like a stack internally.
@@ -171,13 +151,13 @@ void memory::freeShared(void *Ptr, uint64_t Bytes, const char *Reason) {
}
void *memory::allocGlobal(uint64_t Bytes, const char *Reason) {
- void *Ptr = malloc(Bytes);
+ void *Ptr = allocator::alloc(Bytes);
if (config::isDebugMode(DeviceDebugKind::CommonIssues) && Ptr == nullptr)
printf("nullptr returned by malloc!\n");
return Ptr;
}
-void memory::freeGlobal(void *Ptr, const char *Reason) { free(Ptr); }
+void memory::freeGlobal(void *Ptr, const char *Reason) { allocator::free(Ptr); }
///}
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index cd78a5ba88e2..1b6f30ae73a3 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -1521,5 +1521,4 @@ debugging features are supported.
* Enable debugging assertions in the device. ``0x01``
* Enable diagnosing common problems during offloading . ``0x4``
- * Enable device malloc statistics (amdgpu only). ``0x8``
* Dump device PGO counters (only if PGO on GPU is enabled). ``0x10``
diff --git a/orc-rt/include/orc-rt/Endian.h b/orc-rt/include/orc-rt/Endian.h
new file mode 100644
index 000000000000..538eb3f23d0f
--- /dev/null
+++ b/orc-rt/include/orc-rt/Endian.h
@@ -0,0 +1,44 @@
+//===----- Endian.h - Endianness helpers for the ORC runtime ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Endianness helper functions for the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ORC_RT_ENDIAN_H
+#define ORC_RT_ENDIAN_H
+
+#include "bit.h"
+#include <cstring>
+#include <type_traits>
+
+namespace orc_rt {
+
+/// Read a value with the given endianness from memory.
+template <typename T>
+[[nodiscard]] inline std::enable_if_t<std::is_integral_v<T>, T>
+endian_read(const void *Src, orc_rt::endian E) noexcept {
+ T Val;
+ memcpy(&Val, Src, sizeof(T));
+ if (E != orc_rt::endian::native)
+ Val = orc_rt::byteswap(Val);
+ return Val;
+}
+
+/// Write a value with the given endianness to memory.
+template <typename T>
+inline std::enable_if_t<std::is_integral_v<T>>
+endian_write(void *Dst, T Val, orc_rt::endian E) noexcept {
+ if (E != orc_rt::endian::native)
+ Val = orc_rt::byteswap(Val);
+ memcpy(Dst, &Val, sizeof(T));
+}
+
+} // namespace orc_rt
+
+#endif // ORC_RT_ENDIAN_H
diff --git a/orc-rt/unittests/CMakeLists.txt b/orc-rt/unittests/CMakeLists.txt
index 2928fc862b8d..7b943e803944 100644
--- a/orc-rt/unittests/CMakeLists.txt
+++ b/orc-rt/unittests/CMakeLists.txt
@@ -15,6 +15,7 @@ add_orc_rt_unittest(CoreTests
AllocActionTest.cpp
BitmaskEnumTest.cpp
CallableTraitsHelperTest.cpp
+ EndianTest.cpp
ErrorTest.cpp
ExecutorAddressTest.cpp
IntervalMapTest.cpp
diff --git a/orc-rt/unittests/EndianTest.cpp b/orc-rt/unittests/EndianTest.cpp
new file mode 100644
index 000000000000..74dc6ef620bc
--- /dev/null
+++ b/orc-rt/unittests/EndianTest.cpp
@@ -0,0 +1,100 @@
+//===- EndianTest.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Tests for orc-rt's Endian.h APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "orc-rt/Endian.h"
+#include "gtest/gtest.h"
+
+#include <algorithm>
+#include <limits>
+
+using namespace orc_rt;
+
+template <typename T> static void endianRead(T Value, endian E) {
+ char Buffer[sizeof(T)];
+ memcpy(Buffer, &Value, sizeof(T));
+
+ if (E != endian::native)
+ std::reverse(Buffer, Buffer + sizeof(T));
+
+ T NewVal = endian_read<T>(Buffer, E);
+ EXPECT_EQ(NewVal, Value);
+}
+
+template <typename T> static void endianWrite(T Value, endian E) {
+ char Buffer[sizeof(T)];
+
+ endian_write(Buffer, Value, E);
+
+ if (E != endian::native)
+ std::reverse(Buffer, Buffer + sizeof(T));
+
+ T NewVal;
+ memcpy(&NewVal, Buffer, sizeof(T));
+
+ EXPECT_EQ(NewVal, Value);
+}
+
+template <typename T> static void endianReadAndWrite(T Value, endian E) {
+ endianRead(Value, E);
+ endianWrite(Value, E);
+}
+
+template <typename T> static void bothEndiansReadAndWrite(T Value) {
+ endianReadAndWrite(Value, endian::little);
+ endianReadAndWrite(Value, endian::big);
+}
+
+// Rotate the given bit pattern through all valid rotations for T, testing that
+// the given operation works for the given pattern.
+template <typename Op, typename T>
+void forAllRotatedValues(Op O, T InitialValue) {
+ T V = InitialValue;
+ for (size_t I = 0; I != CHAR_BIT * sizeof(T); ++I) {
+ O(V);
+ V = llvm::rotl(V, 1);
+ }
+}
+
+template <typename Op, typename T>
+void forAllShiftedValues(Op O, T InitialValue) {
+ T V = InitialValue;
+ constexpr T TopValueBit = 1 << (std::numeric_limits<T>::digits - 1);
+ for (size_t I = 0; I != CHAR_BIT * sizeof(T); ++I) {
+ O(V);
+ if (V & TopValueBit)
+ break;
+ V << 1;
+ }
+}
+
+TEST(EndianTest, ReadWrite) {
+ bothEndiansReadAndWrite<uint8_t>(0);
+ bothEndiansReadAndWrite<uint8_t>(0xff);
+ forAllRotatedValues(bothEndiansReadAndWrite<uint8_t>, uint8_t(1));
+ forAllRotatedValues(bothEndiansReadAndWrite<uint8_t>, uint8_t(0x5A));
+
+ bothEndiansReadAndWrite<uint16_t>(0);
+ bothEndiansReadAndWrite<uint16_t>(0xffff);
+ forAllRotatedValues(bothEndiansReadAndWrite<uint16_t>, uint16_t(1));
+ forAllRotatedValues(bothEndiansReadAndWrite<uint16_t>, uint16_t(0x5A5A));
+
+ bothEndiansReadAndWrite<uint32_t>(0);
+ bothEndiansReadAndWrite<uint32_t>(0xffffffff);
+ forAllRotatedValues(bothEndiansReadAndWrite<uint32_t>, uint32_t(1));
+ forAllRotatedValues(bothEndiansReadAndWrite<uint32_t>, uint32_t(0x5A5A5A5A));
+
+ bothEndiansReadAndWrite<uint64_t>(0);
+ bothEndiansReadAndWrite<uint64_t>(0xffffffffffffffff);
+ forAllRotatedValues(bothEndiansReadAndWrite<uint64_t>, uint64_t(1));
+ forAllRotatedValues(bothEndiansReadAndWrite<uint64_t>,
+ uint64_t(0x5A5A5A5A5A5A5A5A));
+}
diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt
index d3280a5867de..cc756da426e6 100644
--- a/runtimes/CMakeLists.txt
+++ b/runtimes/CMakeLists.txt
@@ -233,6 +233,20 @@ endif()
option(LLVM_INCLUDE_TESTS "Generate build targets for the runtimes unit tests." ON)
option(LLVM_INCLUDE_DOCS "Generate build targets for the runtimes documentation." ON)
option(LLVM_ENABLE_SPHINX "Use Sphinx to generate the runtimes documentation." OFF)
+option(RUNTIMES_EXECUTE_ONLY_CODE "Compile runtime libraries as execute-only." OFF)
+
+if (RUNTIMES_EXECUTE_ONLY_CODE)
+ # If a target doesn't support or recognise -mexecute-only, Clang will simply ignore the flag.
+ # We can check for this case using -Werror=unused-command-line-argument.
+ check_c_compiler_flag("-mexecute-only -Werror=unused-command-line-argument" C_SUPPORTS_MEXECUTE_ONLY)
+ if (NOT C_SUPPORTS_MEXECUTE_ONLY)
+ message(FATAL_ERROR "RUNTIMES_EXECUTE_ONLY_CODE was turned on, but the target '${LLVM_TARGET_TRIPLE}'"
+ " doesn't support the -mexecute-only flag")
+ endif()
+
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mexecute-only")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mexecute-only")
+endif()
# Use libtool instead of ar if you are both on an Apple host, and targeting Apple.
if(CMAKE_HOST_APPLE AND APPLE)
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index bb35815a18d7..3e7719c0d03c 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -2317,6 +2317,7 @@ llvm_target_lib_list = [lib for lib in [
"lib/Target/BPF/BPFGenInstrInfo.inc": ["-gen-instr-info"],
"lib/Target/BPF/BPFGenRegisterInfo.inc": ["-gen-register-info"],
"lib/Target/BPF/BPFGenSubtargetInfo.inc": ["-gen-subtarget"],
+ "lib/Target/BPF/BPFGenSDNodeInfo.inc": ["-gen-sd-node-info"],
},
},
{
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 943ae102f2f2..a4ea627fb3d1 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -8,6 +8,7 @@
load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
+load("@rules_python//python:defs.bzl", "py_binary")
load("//llvm:targets.bzl", "llvm_targets")
load(
":build_defs.bzl",
@@ -357,6 +358,34 @@ cc_library(
],
)
+td_library(
+ name = "AlignmentAttrInterfaceTdFiles",
+ srcs = ["include/mlir/Interfaces/AlignmentAttrInterface.td"],
+ includes = ["include"],
+ deps = [":OpBaseTdFiles"],
+)
+
+gentbl_cc_library(
+ name = "AlignmentAttrInterfaceIncGen",
+ tbl_outs = {
+ "include/mlir/Interfaces/AlignmentAttrInterface.h.inc": ["-gen-op-interface-decls"],
+ "include/mlir/Interfaces/AlignmentAttrInterface.cpp.inc": ["-gen-op-interface-defs"],
+ },
+ tblgen = ":mlir-tblgen",
+ td_file = "include/mlir/Interfaces/AlignmentAttrInterface.td",
+ deps = [":OpBaseTdFiles"],
+)
+
+cc_library(
+ name = "AlignmentAttrInterface",
+ hdrs = ["include/mlir/Interfaces/AlignmentAttrInterface.h"],
+ deps = [
+ ":AlignmentAttrInterfaceIncGen",
+ ":IR",
+ "//llvm:Support",
+ ],
+)
+
cc_library(
name = "IR",
srcs = glob([
@@ -3816,6 +3845,17 @@ cc_library(
],
)
+td_library(
+ name = "XeGPUTransformOpsTdFiles",
+ srcs = [
+ "include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td",
+ ],
+ includes = ["include"],
+ deps = [
+ ":TransformDialectTdFiles",
+ ],
+)
+
gentbl_cc_library(
name = "XeGPUTransformOpsIncGen",
tbl_outs = {
@@ -3826,6 +3866,7 @@ gentbl_cc_library(
td_file = "include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td",
deps = [
":TransformDialectTdFiles",
+ ":XeGPUTransformOpsTdFiles",
],
)
@@ -4877,6 +4918,7 @@ cc_library(
includes = ["include"],
deps = [
":AffineDialect",
+ ":AlignmentAttrInterface",
":Analysis",
":ArithDialect",
":ArithUtils",
@@ -6836,6 +6878,7 @@ td_library(
srcs = glob(["include/mlir/Dialect/SPIRV/IR/*.td"]),
includes = ["include"],
deps = [
+ ":AlignmentAttrInterfaceTdFiles",
":BuiltinDialectTdFiles",
":CallInterfacesTdFiles",
":ControlFlowInterfacesTdFiles",
@@ -6950,6 +6993,7 @@ cc_library(
]),
includes = ["include"],
deps = [
+ ":AlignmentAttrInterface",
":BytecodeOpInterface",
":CallOpInterfaces",
":CommonFolders",
@@ -11327,6 +11371,7 @@ td_library(
],
includes = ["include"],
deps = [
+ ":AlignmentAttrInterfaceTdFiles",
":ControlFlowInterfacesTdFiles",
":DestinationStyleOpInterfaceTdFiles",
":IndexingMapOpInterfaceTdFiles",
@@ -12907,6 +12952,7 @@ td_library(
],
includes = ["include"],
deps = [
+ ":AlignmentAttrInterfaceTdFiles",
":ArithOpsTdFiles",
":CastInterfacesTdFiles",
":ControlFlowInterfacesTdFiles",
@@ -12988,6 +13034,7 @@ cc_library(
],
includes = ["include"],
deps = [
+ ":AlignmentAttrInterface",
":AllocationOpInterface",
":ArithDialect",
":ArithUtils",
@@ -14382,3 +14429,8 @@ cc_library(
"//llvm:Support",
],
)
+
+py_binary(
+ name = "generate-test-checks",
+ srcs = ["utils/generate-test-checks.py"],
+)
diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
index be18ba5b4680..9658e4387a6c 100644
--- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
@@ -1541,6 +1541,7 @@ filegroup(
":TransformSMTExtensionOpsPyGen",
":VectorTransformEnumPyGen",
":VectorTransformOpsPyGen",
+ ":XeGPUTransformOpsPyGen",
],
)
@@ -1626,3 +1627,21 @@ filegroup(
":VectorOpsPyGen",
],
)
+
+##---------------------------------------------------------------------------##
+# XEGPU dialect.
+##---------------------------------------------------------------------------##
+
+gentbl_filegroup(
+ name = "XeGPUTransformOpsPyGen",
+ tbl_outs = {"mlir/dialects/_xegpu_transform_ops_gen.py": [
+ "-gen-python-op-bindings",
+ "-bind-dialect=transform",
+ "-dialect-extension=xegpu_transform",
+ ]},
+ tblgen = "//mlir:mlir-tblgen",
+ td_file = "mlir/dialects/XeGPUTransformOps.td",
+ deps = [
+ "//mlir:XeGPUTransformOpsTdFiles",
+ ],
+)