add release noteusers/MaskRay/spr/sema-allow-wno-main-to-suppress-the-arg-wrong-error

Created using spr 1.3.5-bogner
author: Fangrui Song <i@maskray.me> 2024-03-16 14:24:50 -0700
committer: Fangrui Song <i@maskray.me> 2024-03-16 14:24:50 -0700
commit: 3aa7d5b9726ba51fc943ad80c11369264806fcce (patch)
tree: 32beb24493d323a3264e32ae5733011dd7530f1a
parent: 0fd3675447f41b033472f9269648e8e735bc543a (diff)
parent: 84b5178124e47f6019b56c04abcfb978a94b1c3c (diff)
182 files changed, 3788 insertions, 808 deletions
diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index 3d0c23917bd4..1d1fa2483b65 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -53,7 +53,7 @@ jobs:
       - name: Install clang-format
         uses: aminya/setup-cpp@v1
         with:
-          clangformat: 17.0.1
+          clangformat: 18.1.1
 
       - name: Setup Python env
         uses: actions/setup-python@v4
diff --git a/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp
index 42dd612eeeec..656b62c9a1f4 100644
--- a/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp
+++ b/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp
@@ -72,6 +72,22 @@ TEST_F(ExtractVariableTest, Test) {
     )cpp";
   EXPECT_UNAVAILABLE(NoCrashCasesC);
 
+  ExtraArgs = {"-xc"};
+  const char *NoCrashDesignator = R"cpp(
+    struct A {
+      struct {
+        int x;
+      };
+    };
+    struct B {
+      int y;
+    };
+    void foo(struct B *b) {
+      struct A a = {.x=b[[->]]y};
+    }
+  )cpp";
+  EXPECT_AVAILABLE(NoCrashDesignator);
+
   ExtraArgs = {"-xobjective-c"};
   const char *AvailableObjC = R"cpp(
     __attribute__((objc_root_class))
diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index db7430b3344c..d5546e20873b 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -11,6 +11,7 @@ set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "
 
 set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "")
 set(LLVM_ENABLE_DIA_SDK OFF CACHE BOOL "")
+set(LLVM_ENABLE_FATLTO ON CACHE BOOL "")
 set(LLVM_ENABLE_HTTPLIB ON CACHE BOOL "")
 set(LLVM_ENABLE_LIBCXX ON CACHE BOOL "")
 set(LLVM_ENABLE_LIBEDIT OFF CACHE BOOL "")
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 623a4b3c18bb..0982a18e4629 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -47,6 +47,12 @@ C++ Specific Potentially Breaking Changes
 
 ABI Changes in This Version
 ---------------------------
+- Fixed Microsoft name mangling of implicitly defined variables used for thread
+  safe static initialization of static local variables. This change resolves
+  incompatibilities with code compiled by MSVC but might introduce
+  incompatibilities with code compiled by earlier versions of Clang when an
+  inline member function that contains a static local variable with a dynamic
+  initializer is declared with ``__declspec(dllimport)``. (#GH83616).
 
 AST Dumping Potentially Breaking Changes
 ----------------------------------------
@@ -244,6 +250,9 @@ Improvements to Clang's diagnostics
   such as attempting to call ``free`` on an unallocated object. Fixes
   `#79443 <https://github.com/llvm/llvm-project/issues/79443>`_.
 
+- The ``XXX parameter of 'main' must be of type`` error can now be disabled via ``-Wno-main``.
+  `#85494 <https://github.com/llvm/llvm-project/pull/85494>`_.
+
 Improvements to Clang's time-trace
 ----------------------------------
 
@@ -288,6 +297,9 @@ Bug Fixes in This Version
   by the C standard. This significantly improves codegen of `*` and `/` especially.
   Fixes (`#31205 <https://github.com/llvm/llvm-project/issues/31205>`_).
 
+- Fixes an assertion failure on invalid code when trying to define member
+  functions in lambdas.
+
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -381,6 +393,8 @@ Bug Fixes to C++ Support
   Fixes (#GH80997)
 - Fix an issue where missing set friend declaration in template class instantiation.
   Fixes (#GH84368).
+- Fixed a crash while checking constraints of a trailing requires-expression of a lambda, that the
+  expression references to an entity declared outside of the lambda. (#GH64808)
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/AST/Availability.h b/clang/include/clang/AST/Availability.h
index ae3acbeffe7f..5cfbaf0cdfbd 100644
--- a/clang/include/clang/AST/Availability.h
+++ b/clang/include/clang/AST/Availability.h
@@ -75,6 +75,9 @@ struct AvailabilityInfo {
   /// Determine if this AvailabilityInfo represents the default availability.
   bool isDefault() const { return *this == AvailabilityInfo(); }
 
+  /// Check if the symbol has been obsoleted.
+  bool isObsoleted() const { return !Obsoleted.empty(); }
+
   /// Check if the symbol is unconditionally deprecated.
   ///
   /// i.e. \code __attribute__((deprecated)) \endcode
diff --git a/clang/include/clang/Basic/AllDiagnostics.h b/clang/include/clang/Basic/AllDiagnostics.h
index cc6aa631534a..e64634cc138f 100644
--- a/clang/include/clang/Basic/AllDiagnostics.h
+++ b/clang/include/clang/Basic/AllDiagnostics.h
@@ -20,6 +20,7 @@
 #include "clang/Basic/DiagnosticCrossTU.h"
 #include "clang/Basic/DiagnosticDriver.h"
 #include "clang/Basic/DiagnosticFrontend.h"
+#include "clang/Basic/DiagnosticInstallAPI.h"
 #include "clang/Basic/DiagnosticLex.h"
 #include "clang/Basic/DiagnosticParse.h"
 #include "clang/Basic/DiagnosticSema.h"
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index eae41b5505a4..491c9d895413 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4591,6 +4591,12 @@ def HLSLWaveActiveCountBits : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "unsigned int(bool)";
 }
 
+def HLSLClamp : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_clamp"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 def HLSLCreateHandle : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_create_handle"];
   let Attributes = [NoThrow, Const];
diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 7785fb430c06..7d53c751c13a 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -12,6 +12,7 @@ clang_diag_gen(Common)
 clang_diag_gen(CrossTU)
 clang_diag_gen(Driver)
 clang_diag_gen(Frontend)
+clang_diag_gen(InstallAPI)
 clang_diag_gen(Lex)
 clang_diag_gen(Parse)
 clang_diag_gen(Refactoring)
diff --git a/clang/include/clang/Basic/Diagnostic.td b/clang/include/clang/Basic/Diagnostic.td
index 8d66e265fbae..0b8b3af939ba 100644
--- a/clang/include/clang/Basic/Diagnostic.td
+++ b/clang/include/clang/Basic/Diagnostic.td
@@ -162,6 +162,7 @@ include "DiagnosticCommonKinds.td"
 include "DiagnosticCrossTUKinds.td"
 include "DiagnosticDriverKinds.td"
 include "DiagnosticFrontendKinds.td"
+include "DiagnosticInstallAPIKinds.td"
 include "DiagnosticLexKinds.td"
 include "DiagnosticParseKinds.td"
 include "DiagnosticRefactoringKinds.td"
diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h
index 0cdda42793f6..95b502b1e97a 100644
--- a/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/clang/include/clang/Basic/DiagnosticIDs.h
@@ -42,6 +42,7 @@ namespace clang {
       DIAG_SIZE_SEMA          = 4500,
       DIAG_SIZE_ANALYSIS      =  100,
       DIAG_SIZE_REFACTORING   = 1000,
+      DIAG_SIZE_INSTALLAPI    =  100,
     };
     // Start position for diagnostics.
     enum {
@@ -57,7 +58,8 @@ namespace clang {
       DIAG_START_SEMA          = DIAG_START_CROSSTU       + static_cast<int>(DIAG_SIZE_CROSSTU),
       DIAG_START_ANALYSIS      = DIAG_START_SEMA          + static_cast<int>(DIAG_SIZE_SEMA),
       DIAG_START_REFACTORING   = DIAG_START_ANALYSIS      + static_cast<int>(DIAG_SIZE_ANALYSIS),
-      DIAG_UPPER_LIMIT         = DIAG_START_REFACTORING   + static_cast<int>(DIAG_SIZE_REFACTORING)
+      DIAG_START_INSTALLAPI    = DIAG_START_REFACTORING   + static_cast<int>(DIAG_SIZE_REFACTORING),
+      DIAG_UPPER_LIMIT         = DIAG_START_INSTALLAPI    + static_cast<int>(DIAG_SIZE_INSTALLAPI)
     };
 
     class CustomDiagInfo;
diff --git a/clang/include/clang/Basic/DiagnosticInstallAPI.h b/clang/include/clang/Basic/DiagnosticInstallAPI.h
new file mode 100644
index 000000000000..a76f6e087a2b
--- /dev/null
+++ b/clang/include/clang/Basic/DiagnosticInstallAPI.h
@@ -0,0 +1,26 @@
+//===--- DiagnosticInstallAPI.h - Diagnostics for InstallAPI-----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_DIAGNOSTICINSTALLAPI_H
+#define LLVM_CLANG_BASIC_DIAGNOSTICINSTALLAPI_H
+
+#include "clang/Basic/Diagnostic.h"
+namespace clang {
+namespace diag {
+enum {
+#define DIAG(ENUM, FLAGS, DEFAULT_MAPPING, DESC, GROUP, SFINAE, NOWERROR,      \
+             SHOWINSYSHEADER, SHOWINSYSMACRO, DEFERRABLE, CATEGORY)            \
+  ENUM,
+#define INSTALLAPISTART
+#include "clang/Basic/DiagnosticInstallAPIKinds.inc"
+#undef DIAG
+  NUM_BUILTIN_INSTALLAPI_DIAGNOSTICS
+};
+} // namespace diag
+} // namespace clang
+#endif // LLVM_CLANG_BASIC_DIAGNOSTICINSTALLAPI_H
diff --git a/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td b/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td
new file mode 100644
index 000000000000..31be4f09cf3a
--- /dev/null
+++ b/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td
@@ -0,0 +1,20 @@
+//==--- DiagnosticInstallAPIKinds.td - installapi diagnostics -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// InstallAPI Diagnostics
+//===----------------------------------------------------------------------===//
+
+let Component = "InstallAPI" in {
+let CategoryName = "Command line" in {
+def err_cannot_write_file : Error<"cannot write file '%0': %1">;
+def err_no_install_name : Error<"no install name specified: add -install_name <path>">;
+def err_no_output_file: Error<"no output file specified">;
+} // end of command line category.
+
+} // end of InstallAPI component
diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index c4cab360bab3..8e27f75012ee 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -28,8 +28,8 @@
 #include "llvm/Option/ArgList.h"
 #include "llvm/Support/StringSaver.h"
 
-#include <list>
 #include <map>
+#include <set>
 #include <string>
 #include <vector>
 
@@ -839,6 +839,13 @@ llvm::Error expandResponseFiles(SmallVectorImpl<const char *> &Args,
                                 bool ClangCLMode, llvm::BumpPtrAllocator &Alloc,
                                 llvm::vfs::FileSystem *FS = nullptr);
 
+/// Apply a space separated list of edits to the input argument lists.
+/// See applyOneOverrideOption.
+void applyOverrideOptions(SmallVectorImpl<const char *> &Args,
+                          const char *OverrideOpts,
+                          llvm::StringSet<> &SavedStrings,
+                          raw_ostream *OS = nullptr);
+
 } // end namespace driver
 } // end namespace clang
 
diff --git a/clang/include/clang/Format/.clang-format b/clang/include/clang/Format/.clang-format
index f95602cab0f7..d7331b3c8cf0 100644
--- a/clang/include/clang/Format/.clang-format
+++ b/clang/include/clang/Format/.clang-format
@@ -1,6 +1 @@
-BasedOnStyle: LLVM
-InsertBraces: true
-InsertNewlineAtEOF: true
-LineEnding: LF
-RemoveBracesLLVM: true
-RemoveParentheses: ReturnStatement
+BasedOnStyle: clang-format
diff --git a/clang/include/clang/InstallAPI/Context.h b/clang/include/clang/InstallAPI/Context.h
index bdb576d7d85f..54e517544b8e 100644
--- a/clang/include/clang/InstallAPI/Context.h
+++ b/clang/include/clang/InstallAPI/Context.h
@@ -11,6 +11,7 @@
 
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
+#include "clang/InstallAPI/DylibVerifier.h"
 #include "clang/InstallAPI/HeaderFile.h"
 #include "clang/InstallAPI/MachO.h"
 #include "llvm/ADT/DenseMap.h"
@@ -45,6 +46,9 @@ struct InstallAPIContext {
   /// DiagnosticsEngine for all error reporting.
   DiagnosticsEngine *Diags = nullptr;
 
+  /// Verifier when binary dylib is passed as input.
+  std::unique_ptr<DylibVerifier> Verifier = nullptr;
+
   /// File Path of output location.
   llvm::StringRef OutputLoc{};
 
diff --git a/clang/include/clang/InstallAPI/DylibVerifier.h b/clang/include/clang/InstallAPI/DylibVerifier.h
new file mode 100644
index 000000000000..72c4743fdf65
--- /dev/null
+++ b/clang/include/clang/InstallAPI/DylibVerifier.h
@@ -0,0 +1,104 @@
+//===- InstallAPI/DylibVerifier.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_INSTALLAPI_DYLIBVERIFIER_H
+#define LLVM_CLANG_INSTALLAPI_DYLIBVERIFIER_H
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/InstallAPI/MachO.h"
+
+namespace clang {
+namespace installapi {
+struct FrontendAttrs;
+
+/// A list of InstallAPI verification modes.
+enum class VerificationMode {
+  Invalid,
+  ErrorsOnly,
+  ErrorsAndWarnings,
+  Pedantic,
+};
+
+/// Service responsible to tracking state of verification across the
+/// lifetime of InstallAPI.
+/// As declarations are collected during AST traversal, they are
+/// compared as symbols against what is available in the binary dylib.
+class DylibVerifier {
+private:
+  struct SymbolContext;
+
+public:
+  enum class Result { NoVerify, Ignore, Valid, Invalid };
+  struct VerifierContext {
+    // Current target being verified against the AST.
+    llvm::MachO::Target Target;
+
+    // Query state of verification after AST has been traversed.
+    Result FrontendState;
+
+    // First error for AST traversal, which is tied to the target triple.
+    bool DiscoveredFirstError;
+  };
+
+  DylibVerifier() = default;
+
+  DylibVerifier(llvm::MachO::Records &&Dylib, DiagnosticsEngine *Diag,
+                VerificationMode Mode, bool Demangle)
+      : Dylib(std::move(Dylib)), Diag(Diag), Mode(Mode), Demangle(Demangle),
+        Exports(std::make_unique<SymbolSet>()) {}
+
+  Result verify(GlobalRecord *R, const FrontendAttrs *FA);
+  Result verify(ObjCInterfaceRecord *R, const FrontendAttrs *FA);
+  Result verify(ObjCIVarRecord *R, const FrontendAttrs *FA,
+                const StringRef SuperClass);
+
+  /// Initialize target for verification.
+  void setTarget(const Target &T);
+
+  /// Release ownership over exports.
+  std::unique_ptr<SymbolSet> getExports() { return std::move(Exports); }
+
+  /// Get result of verification.
+  Result getState() const { return Ctx.FrontendState; }
+
+private:
+  /// Determine whether to compare declaration to symbol in binary.
+  bool canVerify();
+
+  /// Shared implementation for verifying exported symbols.
+  Result verifyImpl(Record *R, SymbolContext &SymCtx);
+
+  /// Update result state on each call to `verify`.
+  void updateState(Result State);
+
+  /// Add verified exported symbol.
+  void addSymbol(const Record *R, SymbolContext &SymCtx,
+                 TargetList &&Targets = {});
+
+  // Symbols in dylib.
+  llvm::MachO::Records Dylib;
+
+  // Engine for reporting violations.
+  [[maybe_unused]] DiagnosticsEngine *Diag = nullptr;
+
+  // Controls what class of violations to report.
+  [[maybe_unused]] VerificationMode Mode = VerificationMode::Invalid;
+
+  // Attempt to demangle when reporting violations.
+  bool Demangle = false;
+
+  // Valid symbols in final text file.
+  std::unique_ptr<SymbolSet> Exports = std::make_unique<SymbolSet>();
+
+  // Track current state of verification while traversing AST.
+  VerifierContext Ctx;
+};
+
+} // namespace installapi
+} // namespace clang
+#endif // LLVM_CLANG_INSTALLAPI_DYLIBVERIFIER_H
diff --git a/clang/include/clang/InstallAPI/Frontend.h b/clang/include/clang/InstallAPI/Frontend.h
index 873cb50d60a5..660fc8cd69a5 100644
--- a/clang/include/clang/InstallAPI/Frontend.h
+++ b/clang/include/clang/InstallAPI/Frontend.h
@@ -14,7 +14,6 @@
 #define LLVM_CLANG_INSTALLAPI_FRONTEND_H
 
 #include "clang/AST/ASTConsumer.h"
-#include "clang/AST/Availability.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/InstallAPI/Context.h"
diff --git a/clang/include/clang/InstallAPI/FrontendRecords.h b/clang/include/clang/InstallAPI/FrontendRecords.h
index 1f5bc37798be..59271e81e230 100644
--- a/clang/include/clang/InstallAPI/FrontendRecords.h
+++ b/clang/include/clang/InstallAPI/FrontendRecords.h
@@ -43,13 +43,13 @@ public:
   /// \param Flags The flags that describe attributes of the symbol.
   /// \param Inlined Whether declaration is inlined, only applicable to
   /// functions.
-  /// \return The non-owning pointer to added record in slice.
-  GlobalRecord *addGlobal(StringRef Name, RecordLinkage Linkage,
-                          GlobalRecord::Kind GV,
-                          const clang::AvailabilityInfo Avail, const Decl *D,
-                          const HeaderType Access,
-                          SymbolFlags Flags = SymbolFlags::None,
-                          bool Inlined = false);
+  /// \return The non-owning pointer to added record in slice with it's frontend
+  /// attributes.
+  std::pair<GlobalRecord *, FrontendAttrs *>
+  addGlobal(StringRef Name, RecordLinkage Linkage, GlobalRecord::Kind GV,
+            const clang::AvailabilityInfo Avail, const Decl *D,
+            const HeaderType Access, SymbolFlags Flags = SymbolFlags::None,
+            bool Inlined = false);
 
   /// Add ObjC Class record with attributes from AST.
   ///
@@ -60,11 +60,12 @@ public:
   /// \param D The pointer to the declaration from traversing AST.
   /// \param Access The intended access level of symbol.
   /// \param IsEHType Whether declaration has an exception attribute.
-  /// \return The non-owning pointer to added record in slice.
-  ObjCInterfaceRecord *addObjCInterface(StringRef Name, RecordLinkage Linkage,
-                                        const clang::AvailabilityInfo Avail,
-                                        const Decl *D, HeaderType Access,
-                                        bool IsEHType);
+  /// \return The non-owning pointer to added record in slice with it's frontend
+  /// attributes.
+  std::pair<ObjCInterfaceRecord *, FrontendAttrs *>
+  addObjCInterface(StringRef Name, RecordLinkage Linkage,
+                   const clang::AvailabilityInfo Avail, const Decl *D,
+                   HeaderType Access, bool IsEHType);
 
   /// Add ObjC Category record with attributes from AST.
   ///
@@ -75,11 +76,12 @@ public:
   /// to the active target triple.
   /// \param D The pointer to the declaration from traversing AST.
   /// \param Access The intended access level of symbol.
-  /// \return The non-owning pointer to added record in slice.
-  ObjCCategoryRecord *addObjCCategory(StringRef ClassToExtend,
-                                      StringRef CategoryName,
-                                      const clang::AvailabilityInfo Avail,
-                                      const Decl *D, HeaderType Access);
+  /// \return The non-owning pointer to added record in slice with it's frontend
+  /// attributes.
+  std::pair<ObjCCategoryRecord *, FrontendAttrs *>
+  addObjCCategory(StringRef ClassToExtend, StringRef CategoryName,
+                  const clang::AvailabilityInfo Avail, const Decl *D,
+                  HeaderType Access);
 
   /// Add ObjC IVar record with attributes from AST.
   ///
@@ -91,12 +93,13 @@ public:
   /// \param D The pointer to the declaration from traversing AST.
   /// \param Access The intended access level of symbol.
   /// \param AC The access control tied to the ivar declaration.
-  /// \return The non-owning pointer to added record in slice.
-  ObjCIVarRecord *addObjCIVar(ObjCContainerRecord *Container,
-                              StringRef IvarName, RecordLinkage Linkage,
-                              const clang::AvailabilityInfo Avail,
-                              const Decl *D, HeaderType Access,
-                              const clang::ObjCIvarDecl::AccessControl AC);
+  /// \return The non-owning pointer to added record in slice with it's frontend
+  /// attributes.
+  std::pair<ObjCIVarRecord *, FrontendAttrs *>
+  addObjCIVar(ObjCContainerRecord *Container, StringRef IvarName,
+              RecordLinkage Linkage, const clang::AvailabilityInfo Avail,
+              const Decl *D, HeaderType Access,
+              const clang::ObjCIvarDecl::AccessControl AC);
 
 private:
   /// Mapping of records stored in slice to their frontend attributes.
diff --git a/clang/include/clang/InstallAPI/InstallAPIDiagnostic.h b/clang/include/clang/InstallAPI/InstallAPIDiagnostic.h
new file mode 100644
index 000000000000..547fb0bcf9a8
--- /dev/null
+++ b/clang/include/clang/InstallAPI/InstallAPIDiagnostic.h
@@ -0,0 +1,14 @@
+//===--- InstallAPIDiagnostic.h - Diagnostics for InstallAPI ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_INSTALLAPI_INSTALLAPIDIAGNOSTIC_H
+#define LLVM_CLANG_INSTALLAPI_INSTALLAPIDIAGNOSTIC_H
+
+#include "clang/Basic/DiagnosticInstallAPI.h"
+
+#endif
diff --git a/clang/include/clang/InstallAPI/MachO.h b/clang/include/clang/InstallAPI/MachO.h
index 55e5591389ce..6dee6f224203 100644
--- a/clang/include/clang/InstallAPI/MachO.h
+++ b/clang/include/clang/InstallAPI/MachO.h
@@ -18,6 +18,7 @@
 #include "llvm/TextAPI/PackedVersion.h"
 #include "llvm/TextAPI/Platform.h"
 #include "llvm/TextAPI/RecordVisitor.h"
+#include "llvm/TextAPI/Symbol.h"
 #include "llvm/TextAPI/Target.h"
 #include "llvm/TextAPI/TextAPIWriter.h"
 #include "llvm/TextAPI/Utils.h"
@@ -33,8 +34,10 @@ using ObjCIVarRecord = llvm::MachO::ObjCIVarRecord;
 using Records = llvm::MachO::Records;
 using BinaryAttrs = llvm::MachO::RecordsSlice::BinaryAttrs;
 using SymbolSet = llvm::MachO::SymbolSet;
+using SimpleSymbol = llvm::MachO::SimpleSymbol;
 using FileType = llvm::MachO::FileType;
 using PackedVersion = llvm::MachO::PackedVersion;
 using Target = llvm::MachO::Target;
+using TargetList = llvm::MachO::TargetList;
 
 #endif // LLVM_CLANG_INSTALLAPI_MACHO_H
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index 1c3dcf63465c..645ec2f7563b 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -1567,10 +1567,9 @@ bool CXXRecordDecl::isGenericLambda() const {
 
 #ifndef NDEBUG
 static bool allLookupResultsAreTheSame(const DeclContext::lookup_result &R) {
-  for (auto *D : R)
-    if (!declaresSameEntity(D, R.front()))
-      return false;
-  return true;
+  return llvm::all_of(R, [&](NamedDecl *D) {
+    return D->isInvalidDecl() || declaresSameEntity(D, R.front());
+  });
 }
 #endif
 
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index f5ad402e3bd7..131dace77f9c 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4604,8 +4604,17 @@ SourceRange DesignatedInitExpr::getDesignatorsSourceRange() const {
 SourceLocation DesignatedInitExpr::getBeginLoc() const {
   auto *DIE = const_cast<DesignatedInitExpr *>(this);
   Designator &First = *DIE->getDesignator(0);
-  if (First.isFieldDesignator())
-    return GNUSyntax ? First.getFieldLoc() : First.getDotLoc();
+  if (First.isFieldDesignator()) {
+    // Skip past implicit designators for anonymous structs/unions, since
+    // these do not have valid source locations.
+    for (unsigned int i = 0; i < DIE->size(); i++) {
+      Designator &Des = *DIE->getDesignator(i);
+      SourceLocation retval = GNUSyntax ? Des.getFieldLoc() : Des.getDotLoc();
+      if (!retval.isValid())
+        continue;
+      return retval;
+    }
+  }
   return First.getLBracketLoc();
 }
 
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 7137efb7876d..fa9e8ecf6543 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -5594,6 +5594,9 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
         if (Assumption->isValueDependent())
           return ESR_Failed;
 
+        if (Assumption->HasSideEffects(Info.getCtx()))
+          continue;
+
         bool Value;
         if (!EvaluateAsBooleanCondition(Assumption, Value, Info))
           return ESR_Failed;
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index f07e430e279d..2e48ec2c5087 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -1756,6 +1756,14 @@ bool ByteCodeExprGen<Emitter>::VisitTypeTraitExpr(const TypeTraitExpr *E) {
 }
 
 template <class Emitter>
+bool ByteCodeExprGen<Emitter>::VisitArrayTypeTraitExpr(
+    const ArrayTypeTraitExpr *E) {
+  if (DiscardResult)
+    return true;
+  return this->emitConst(E->getValue(), E);
+}
+
+template <class Emitter>
 bool ByteCodeExprGen<Emitter>::VisitLambdaExpr(const LambdaExpr *E) {
   if (DiscardResult)
     return true;
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h
index 969598c97805..db0d73ce23f7 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -99,6 +99,7 @@ public:
   bool VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *E);
   bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E);
   bool VisitTypeTraitExpr(const TypeTraitExpr *E);
+  bool VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *E);
   bool VisitLambdaExpr(const LambdaExpr *E);
   bool VisitPredefinedExpr(const PredefinedExpr *E);
   bool VisitCXXThrowExpr(const CXXThrowExpr *E);
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index b272a546573a..aa26bb7ed46f 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -390,6 +390,7 @@ public:
                           const FunctionDecl *D = nullptr,
                           bool ForceThisQuals = false,
                           bool MangleExceptionSpec = true);
+  void mangleSourceName(StringRef Name);
   void mangleNestedName(GlobalDecl GD);
 
 private:
@@ -408,7 +409,6 @@ private:
     mangleUnqualifiedName(GD, cast<NamedDecl>(GD.getDecl())->getDeclName());
   }
   void mangleUnqualifiedName(GlobalDecl GD, DeclarationName Name);
-  void mangleSourceName(StringRef Name);
   void mangleOperatorName(OverloadedOperatorKind OO, SourceLocation Loc);
   void mangleCXXDtorType(CXXDtorType T);
   void mangleQualifiers(Qualifiers Quals, bool IsMember);
@@ -3920,7 +3920,8 @@ void MicrosoftMangleContextImpl::mangleThreadSafeStaticGuardVariable(
   msvc_hashing_ostream MHO(Out);
   MicrosoftCXXNameMangler Mangler(*this, MHO);
 
-  Mangler.getStream() << "?$TSS" << GuardNum << '@';
+  Mangler.getStream() << "?";
+  Mangler.mangleSourceName("$TSS" + llvm::utostr(GuardNum));
   Mangler.mangleNestedName(VD);
   Mangler.getStream() << "@4HA";
 }
diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp
index b353a6627f29..4138b4ee4b0e 100644
--- a/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/clang/lib/Basic/DiagnosticIDs.cpp
@@ -49,6 +49,7 @@ struct StaticDiagInfoDescriptionStringTable {
 #include "clang/Basic/DiagnosticSemaKinds.inc"
 #include "clang/Basic/DiagnosticAnalysisKinds.inc"
 #include "clang/Basic/DiagnosticRefactoringKinds.inc"
+#include "clang/Basic/DiagnosticInstallAPIKinds.inc"
   // clang-format on
 #undef DIAG
 };
@@ -70,7 +71,8 @@ const StaticDiagInfoDescriptionStringTable StaticDiagInfoDescriptions = {
 #include "clang/Basic/DiagnosticSemaKinds.inc"
 #include "clang/Basic/DiagnosticAnalysisKinds.inc"
 #include "clang/Basic/DiagnosticRefactoringKinds.inc"
-  // clang-format on
+#include "clang/Basic/DiagnosticInstallAPIKinds.inc"
+// clang-format on
 #undef DIAG
 };
 
@@ -95,7 +97,8 @@ const uint32_t StaticDiagInfoDescriptionOffsets[] = {
 #include "clang/Basic/DiagnosticSemaKinds.inc"
 #include "clang/Basic/DiagnosticAnalysisKinds.inc"
 #include "clang/Basic/DiagnosticRefactoringKinds.inc"
-  // clang-format on
+#include "clang/Basic/DiagnosticInstallAPIKinds.inc"
+// clang-format on
 #undef DIAG
 };
 
@@ -173,6 +176,7 @@ VALIDATE_DIAG_SIZE(CROSSTU)
 VALIDATE_DIAG_SIZE(SEMA)
 VALIDATE_DIAG_SIZE(ANALYSIS)
 VALIDATE_DIAG_SIZE(REFACTORING)
+VALIDATE_DIAG_SIZE(INSTALLAPI)
 #undef VALIDATE_DIAG_SIZE
 #undef STRINGIFY_NAME
 
@@ -204,6 +208,7 @@ const StaticDiagInfoRec StaticDiagInfo[] = {
 #include "clang/Basic/DiagnosticSemaKinds.inc"
 #include "clang/Basic/DiagnosticAnalysisKinds.inc"
 #include "clang/Basic/DiagnosticRefactoringKinds.inc"
+#include "clang/Basic/DiagnosticInstallAPIKinds.inc"
 // clang-format on
 #undef DIAG
 };
@@ -246,6 +251,7 @@ CATEGORY(CROSSTU, COMMENT)
 CATEGORY(SEMA, CROSSTU)
 CATEGORY(ANALYSIS, SEMA)
 CATEGORY(REFACTORING, ANALYSIS)
+CATEGORY(INSTALLAPI, REFACTORING)
 #undef CATEGORY
 
   // Avoid out of bounds reads.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e708bf3d6df1..e965df810add 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18048,6 +18048,21 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
         Intrinsic::dx_any, ArrayRef<Value *>{Op0}, nullptr, "dx.any");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_clamp: {
+    Value *OpX = EmitScalarExpr(E->getArg(0));
+    Value *OpMin = EmitScalarExpr(E->getArg(1));
+    Value *OpMax = EmitScalarExpr(E->getArg(2));
+
+    QualType Ty = E->getArg(0)->getType();
+    bool IsUnsigned = false;
+    if (auto *VecTy = Ty->getAs<VectorType>())
+      Ty = VecTy->getElementType();
+    IsUnsigned = Ty->isUnsignedIntegerType();
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/OpX->getType(),
+        IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
+        ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
+  }
   case Builtin::BI__builtin_hlsl_dot: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     Value *Op1 = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 190782a79a24..e3f5f5905a72 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -87,6 +87,7 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/RISCVISAInfo.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/StringSaver.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -6677,3 +6678,131 @@ llvm::Error driver::expandResponseFiles(SmallVectorImpl<const char *> &Args,
 
   return llvm::Error::success();
 }
+
+static const char *GetStableCStr(llvm::StringSet<> &SavedStrings, StringRef S) {
+  return SavedStrings.insert(S).first->getKeyData();
+}
+
+/// Apply a list of edits to the input argument lists.
+///
+/// The input string is a space separated list of edits to perform,
+/// they are applied in order to the input argument lists. Edits
+/// should be one of the following forms:
+///
+///  '#': Silence information about the changes to the command line arguments.
+///
+///  '^': Add FOO as a new argument at the beginning of the command line.
+///
+///  '+': Add FOO as a new argument at the end of the command line.
+///
+///  's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command
+///  line.
+///
+///  'xOPTION': Removes all instances of the literal argument OPTION.
+///
+///  'XOPTION': Removes all instances of the literal argument OPTION,
+///  and the following argument.
+///
+///  'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox'
+///  at the end of the command line.
+///
+/// \param OS - The stream to write edit information to.
+/// \param Args - The vector of command line arguments.
+/// \param Edit - The override command to perform.
+/// \param SavedStrings - Set to use for storing string representations.
+static void applyOneOverrideOption(raw_ostream &OS,
+                                   SmallVectorImpl<const char *> &Args,
+                                   StringRef Edit,
+                                   llvm::StringSet<> &SavedStrings) {
+  // This does not need to be efficient.
+
+  if (Edit[0] == '^') {
+    const char *Str = GetStableCStr(SavedStrings, Edit.substr(1));
+    OS << "### Adding argument " << Str << " at beginning\n";
+    Args.insert(Args.begin() + 1, Str);
+  } else if (Edit[0] == '+') {
+    const char *Str = GetStableCStr(SavedStrings, Edit.substr(1));
+    OS << "### Adding argument " << Str << " at end\n";
+    Args.push_back(Str);
+  } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.ends_with("/") &&
+             Edit.slice(2, Edit.size() - 1).contains('/')) {
+    StringRef MatchPattern = Edit.substr(2).split('/').first;
+    StringRef ReplPattern = Edit.substr(2).split('/').second;
+    ReplPattern = ReplPattern.slice(0, ReplPattern.size() - 1);
+
+    for (unsigned i = 1, e = Args.size(); i != e; ++i) {
+      // Ignore end-of-line response file markers
+      if (Args[i] == nullptr)
+        continue;
+      std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]);
+
+      if (Repl != Args[i]) {
+        OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n";
+        Args[i] = GetStableCStr(SavedStrings, Repl);
+      }
+    }
+  } else if (Edit[0] == 'x' || Edit[0] == 'X') {
+    auto Option = Edit.substr(1);
+    for (unsigned i = 1; i < Args.size();) {
+      if (Option == Args[i]) {
+        OS << "### Deleting argument " << Args[i] << '\n';
+        Args.erase(Args.begin() + i);
+        if (Edit[0] == 'X') {
+          if (i < Args.size()) {
+            OS << "### Deleting argument " << Args[i] << '\n';
+            Args.erase(Args.begin() + i);
+          } else
+            OS << "### Invalid X edit, end of command line!\n";
+        }
+      } else
+        ++i;
+    }
+  } else if (Edit[0] == 'O') {
+    for (unsigned i = 1; i < Args.size();) {
+      const char *A = Args[i];
+      // Ignore end-of-line response file markers
+      if (A == nullptr)
+        continue;
+      if (A[0] == '-' && A[1] == 'O' &&
+          (A[2] == '\0' || (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' ||
+                                             ('0' <= A[2] && A[2] <= '9'))))) {
+        OS << "### Deleting argument " << Args[i] << '\n';
+        Args.erase(Args.begin() + i);
+      } else
+        ++i;
+    }
+    OS << "### Adding argument " << Edit << " at end\n";
+    Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str()));
+  } else {
+    OS << "### Unrecognized edit: " << Edit << "\n";
+  }
+}
+
+void driver::applyOverrideOptions(SmallVectorImpl<const char *> &Args,
+                                  const char *OverrideStr,
+                                  llvm::StringSet<> &SavedStrings,
+                                  raw_ostream *OS) {
+  if (!OS)
+    OS = &llvm::nulls();
+
+  if (OverrideStr[0] == '#') {
+    ++OverrideStr;
+    OS = &llvm::nulls();
+  }
+
+  *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n";
+
+  // This does not need to be efficient.
+
+  const char *S = OverrideStr;
+  while (*S) {
+    const char *End = ::strchr(S, ' ');
+    if (!End)
+      End = S + strlen(S);
+    if (End != S)
+      applyOneOverrideOption(*OS, Args, std::string(S, End), SavedStrings);
+    S = End;
+    if (*S != '\0')
+      ++S;
+  }
+}
diff --git a/clang/lib/Format/.clang-format b/clang/lib/Format/.clang-format
index f95602cab0f7..d7331b3c8cf0 100644
--- a/clang/lib/Format/.clang-format
+++ b/clang/lib/Format/.clang-format
@@ -1,6 +1 @@
-BasedOnStyle: LLVM
-InsertBraces: true
-InsertNewlineAtEOF: true
-LineEnding: LF
-RemoveBracesLLVM: true
-RemoveParentheses: ReturnStatement
+BasedOnStyle: clang-format
diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h
index e7c0680641e2..8b9360a3335e 100644
--- a/clang/lib/Format/BreakableToken.h
+++ b/clang/lib/Format/BreakableToken.h
@@ -18,11 +18,8 @@
 #define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
 
 #include "Encoding.h"
-#include "TokenAnnotator.h"
 #include "WhitespaceManager.h"
 #include "llvm/ADT/StringSet.h"
-#include "llvm/Support/Regex.h"
-#include <utility>
 
 namespace clang {
 namespace format {
diff --git a/clang/lib/Format/ContinuationIndenter.h b/clang/lib/Format/ContinuationIndenter.h
index 2598947bb624..18441e10a124 100644
--- a/clang/lib/Format/ContinuationIndenter.h
+++ b/clang/lib/Format/ContinuationIndenter.h
@@ -17,11 +17,6 @@
 
 #include "Encoding.h"
 #include "FormatToken.h"
-#include "clang/Format/Format.h"
-#include "llvm/Support/Regex.h"
-#include <map>
-#include <optional>
-#include <tuple>
 
 namespace clang {
 class SourceManager;
diff --git a/clang/lib/Format/Encoding.h b/clang/lib/Format/Encoding.h
index a0d664121b2b..12f9043bb95a 100644
--- a/clang/lib/Format/Encoding.h
+++ b/clang/lib/Format/Encoding.h
@@ -16,7 +16,6 @@
 #define LLVM_CLANG_LIB_FORMAT_ENCODING_H
 
 #include "clang/Basic/LLVM.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Unicode.h"
 
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index faf65c619b23..d5d115a3c8db 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -13,44 +13,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Format/Format.h"
-#include "AffectedRangeManager.h"
-#include "BreakableToken.h"
-#include "ContinuationIndenter.h"
 #include "DefinitionBlockSeparator.h"
-#include "FormatInternal.h"
-#include "FormatToken.h"
-#include "FormatTokenLexer.h"
 #include "IntegerLiteralSeparatorFixer.h"
 #include "NamespaceEndCommentsFixer.h"
 #include "ObjCPropertyAttributeOrderFixer.h"
 #include "QualifierAlignmentFixer.h"
 #include "SortJavaScriptImports.h"
-#include "TokenAnalyzer.h"
-#include "TokenAnnotator.h"
 #include "UnwrappedLineFormatter.h"
-#include "UnwrappedLineParser.h"
 #include "UsingDeclarationsSorter.h"
-#include "WhitespaceManager.h"
-#include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/DiagnosticOptions.h"
-#include "clang/Basic/SourceManager.h"
-#include "clang/Lex/Lexer.h"
 #include "clang/Tooling/Inclusions/HeaderIncludes.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/Regex.h"
-#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Support/YAMLTraits.h"
-#include <algorithm>
-#include <memory>
-#include <mutex>
-#include <optional>
-#include <string>
-#include <unordered_map>
 
 #define DEBUG_TYPE "format-formatter"
 
diff --git a/clang/lib/Format/FormatInternal.h b/clang/lib/Format/FormatInternal.h
index 9043ce32e9e3..87974a853365 100644
--- a/clang/lib/Format/FormatInternal.h
+++ b/clang/lib/Format/FormatInternal.h
@@ -15,9 +15,6 @@
 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H
 #define LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H
 
-#include "BreakableToken.h"
-#include <utility>
-
 namespace clang {
 namespace format {
 namespace internal {
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index ee96d072b120..c9022aba2871 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -19,8 +19,6 @@
 #include "clang/Basic/OperatorPrecedence.h"
 #include "clang/Format/Format.h"
 #include "clang/Lex/Lexer.h"
-#include <memory>
-#include <optional>
 #include <unordered_set>
 
 namespace clang {
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 65dd733bd533..277cc0a2dfde 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -17,14 +17,9 @@
 
 #include "Encoding.h"
 #include "FormatToken.h"
-#include "clang/Basic/LangOptions.h"
-#include "clang/Basic/SourceLocation.h"
-#include "clang/Basic/SourceManager.h"
-#include "clang/Format/Format.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringSet.h"
-#include "llvm/Support/Regex.h"
 
 #include <stack>
 
diff --git a/clang/lib/Format/FormatTokenSource.h b/clang/lib/Format/FormatTokenSource.h
index 7819244eb7d1..cce19f527a92 100644
--- a/clang/lib/Format/FormatTokenSource.h
+++ b/clang/lib/Format/FormatTokenSource.h
@@ -15,10 +15,7 @@
 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
 
-#include "FormatToken.h"
 #include "UnwrappedLineParser.h"
-#include "llvm/ADT/DenseMap.h"
-#include <cstddef>
 
 #define DEBUG_TYPE "format-token-source"
 
diff --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h
index d2f7fe502364..fb12d22299de 100644
--- a/clang/lib/Format/Macros.h
+++ b/clang/lib/Format/Macros.h
@@ -39,15 +39,9 @@
 #define CLANG_LIB_FORMAT_MACROS_H
 
 #include <list>
-#include <map>
-#include <string>
-#include <vector>
 
 #include "FormatToken.h"
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace clang {
 namespace format {
diff --git a/clang/lib/Format/SortJavaScriptImports.h b/clang/lib/Format/SortJavaScriptImports.h
index 7336db9537b0..b55b149aab4c 100644
--- a/clang/lib/Format/SortJavaScriptImports.h
+++ b/clang/lib/Format/SortJavaScriptImports.h
@@ -14,10 +14,7 @@
 #ifndef LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H
 #define LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H
 
-#include "clang/Basic/LLVM.h"
 #include "clang/Format/Format.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace clang {
 namespace format {
diff --git a/clang/lib/Format/TokenAnalyzer.h b/clang/lib/Format/TokenAnalyzer.h
index 4086dab1c94c..b7494c395c8a 100644
--- a/clang/lib/Format/TokenAnalyzer.h
+++ b/clang/lib/Format/TokenAnalyzer.h
@@ -17,19 +17,8 @@
 #define LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H
 
 #include "AffectedRangeManager.h"
-#include "Encoding.h"
-#include "FormatToken.h"
 #include "FormatTokenLexer.h"
 #include "TokenAnnotator.h"
-#include "UnwrappedLineParser.h"
-#include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/DiagnosticOptions.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/SourceManager.h"
-#include "clang/Format/Format.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-#include <memory>
 
 namespace clang {
 namespace format {
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index e464c2b5731a..1342d37a1479 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3595,6 +3595,13 @@ static bool isFunctionDeclarationName(const FormatToken &Current,
   if (!Current.Tok.getIdentifierInfo())
     return false;
 
+  const auto &Previous = *Current.Previous;
+
+  if (const auto *PrevPrev = Previous.Previous;
+      PrevPrev && PrevPrev->is(TT_ObjCDecl)) {
+    return false;
+  }
+
   auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
     for (; Next; Next = Next->Next) {
       if (Next->is(TT_OverloadedOperatorLParen))
@@ -3633,18 +3640,17 @@ static bool isFunctionDeclarationName(const FormatToken &Current,
   // Find parentheses of parameter list.
   const FormatToken *Next = Current.Next;
   if (Current.is(tok::kw_operator)) {
-    const auto *Previous = Current.Previous;
-    if (Previous->Tok.getIdentifierInfo() &&
-        !Previous->isOneOf(tok::kw_return, tok::kw_co_return)) {
+    if (Previous.Tok.getIdentifierInfo() &&
+        !Previous.isOneOf(tok::kw_return, tok::kw_co_return)) {
       return true;
     }
-    if (Previous->is(tok::r_paren) && Previous->is(TT_TypeDeclarationParen)) {
-      assert(Previous->MatchingParen);
-      assert(Previous->MatchingParen->is(tok::l_paren));
-      assert(Previous->MatchingParen->is(TT_TypeDeclarationParen));
+    if (Previous.is(tok::r_paren) && Previous.is(TT_TypeDeclarationParen)) {
+      assert(Previous.MatchingParen);
+      assert(Previous.MatchingParen->is(tok::l_paren));
+      assert(Previous.MatchingParen->is(TT_TypeDeclarationParen));
       return true;
     }
-    if (!Previous->isPointerOrReference() && Previous->isNot(TT_TemplateCloser))
+    if (!Previous.isPointerOrReference() && Previous.isNot(TT_TemplateCloser))
       return false;
     Next = skipOperatorName(Next);
   } else {
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index b0931e85c16c..aa44b31e0fce 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -16,7 +16,6 @@
 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
 
 #include "UnwrappedLineParser.h"
-#include "clang/Format/Format.h"
 
 namespace clang {
 namespace format {
diff --git a/clang/lib/Format/UnwrappedLineFormatter.h b/clang/lib/Format/UnwrappedLineFormatter.h
index ee6d31de8c42..9b8acf427a2a 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.h
+++ b/clang/lib/Format/UnwrappedLineFormatter.h
@@ -16,8 +16,6 @@
 #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H
 
 #include "ContinuationIndenter.h"
-#include "clang/Format/Format.h"
-#include <map>
 
 namespace clang {
 namespace format {
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index a1f6ce05e45e..2b893f7abe40 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1224,7 +1224,6 @@ void UnwrappedLineParser::parsePPUnknown() {
 static bool tokenCanStartNewLine(const FormatToken &Tok) {
   // Semicolon can be a null-statement, l_square can be a start of a macro or
   // a C++11 attribute, but this doesn't seem to be common.
-  assert(Tok.isNot(TT_AttributeSquare));
   return !Tok.isOneOf(tok::semi, tok::l_brace,
                       // Tokens that can only be used as binary operators and a
                       // part of overloaded operator names.
@@ -3712,14 +3711,19 @@ bool UnwrappedLineParser::parseEnum() {
   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
     return false;
 
-  // Eat up enum class ...
-  if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
-    nextToken();
+  if (IsCpp) {
+    // Eat up enum class ...
+    if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
+      nextToken();
+    while (FormatTok->is(tok::l_square))
+      if (!handleCppAttributes())
+        return false;
+  }
 
   while (FormatTok->Tok.getIdentifierInfo() ||
          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
                             tok::greater, tok::comma, tok::question,
-                            tok::l_square, tok::r_square)) {
+                            tok::l_square)) {
     if (Style.isVerilog()) {
       FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
       nextToken();
@@ -3732,7 +3736,6 @@ bool UnwrappedLineParser::parseEnum() {
     // We can have macros or attributes in between 'enum' and the enum name.
     if (FormatTok->is(tok::l_paren))
       parseParens();
-    assert(FormatTok->isNot(TT_AttributeSquare));
     if (FormatTok->is(tok::identifier)) {
       nextToken();
       // If there are two identifiers in a row, this is likely an elaborate
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index 1403533a2d0e..da3188ed240c 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -15,17 +15,8 @@
 #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
 #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
 
-#include "Encoding.h"
-#include "FormatToken.h"
 #include "Macros.h"
-#include "clang/Basic/IdentifierTable.h"
-#include "clang/Format/Format.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/Support/Regex.h"
-#include <list>
 #include <stack>
-#include <vector>
 
 namespace clang {
 namespace format {
diff --git a/clang/lib/Format/WhitespaceManager.h b/clang/lib/Format/WhitespaceManager.h
index 9942e0f35738..0ebc6cf8377c 100644
--- a/clang/lib/Format/WhitespaceManager.h
+++ b/clang/lib/Format/WhitespaceManager.h
@@ -17,11 +17,6 @@
 
 #include "TokenAnnotator.h"
 #include "clang/Basic/SourceManager.h"
-#include "clang/Format/Format.h"
-#include "llvm/ADT/SmallVector.h"
-#include <algorithm>
-#include <string>
-#include <tuple>
 
 namespace clang {
 namespace format {
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 718fb9a9b35c..5e703772b7ee 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -253,6 +253,116 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil)
 double4 ceil(double4);
 
 //===----------------------------------------------------------------------===//
+// clamp builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T clamp(T X, T Min, T Max)
+/// \brief Clamps the specified value \a X to the specified
+/// minimum ( \a Min) and maximum ( \a Max) range.
+/// \param X A value to clamp.
+/// \param Min The specified minimum range.
+/// \param Max The specified maximum range.
+///
+/// Returns The clamped value for the \a X parameter.
+/// For values of -INF or INF, clamp will behave as expected.
+/// However for values of NaN, the results are undefined.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half clamp(half, half, half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half2 clamp(half2, half2, half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half3 clamp(half3, half3, half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half4 clamp(half4, half4, half4);
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t clamp(int16_t, int16_t, int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t2 clamp(int16_t2, int16_t2, int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t3 clamp(int16_t3, int16_t3, int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t4 clamp(int16_t4, int16_t4, int16_t4);
+
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t clamp(uint16_t, uint16_t, uint16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t2 clamp(uint16_t2, uint16_t2, uint16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t3 clamp(uint16_t3, uint16_t3, uint16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t4 clamp(uint16_t4, uint16_t4, uint16_t4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int clamp(int, int, int);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int2 clamp(int2, int2, int2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int3 clamp(int3, int3, int3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int4 clamp(int4, int4, int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint clamp(uint, uint, uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint2 clamp(uint2, uint2, uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint3 clamp(uint3, uint3, uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint4 clamp(uint4, uint4, uint4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t clamp(int64_t, int64_t, int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t2 clamp(int64_t2, int64_t2, int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t3 clamp(int64_t3, int64_t3, int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t4 clamp(int64_t4, int64_t4, int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t clamp(uint64_t, uint64_t, uint64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t2 clamp(uint64_t2, uint64_t2, uint64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t3 clamp(uint64_t3, uint64_t3, uint64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t4 clamp(uint64_t4, uint64_t4, uint64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float clamp(float, float, float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float2 clamp(float2, float2, float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float3 clamp(float3, float3, float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float4 clamp(float4, float4, float4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double clamp(double, double, double);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double2 clamp(double2, double2, double2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double3 clamp(double3, double3, double3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double4 clamp(double4, double4, double4);
+
+//===----------------------------------------------------------------------===//
 // cos builtins
 //===----------------------------------------------------------------------===//
 
diff --git a/clang/lib/InstallAPI/CMakeLists.txt b/clang/lib/InstallAPI/CMakeLists.txt
index dc90d6370de4..894db699578f 100644
--- a/clang/lib/InstallAPI/CMakeLists.txt
+++ b/clang/lib/InstallAPI/CMakeLists.txt
@@ -1,10 +1,12 @@
 set(LLVM_LINK_COMPONENTS
   Support
   TextAPI
+  Demangle
   Core
   )
 
 add_clang_library(clangInstallAPI
+  DylibVerifier.cpp
   FileList.cpp
   Frontend.cpp
   HeaderFile.cpp
diff --git a/clang/lib/InstallAPI/DylibVerifier.cpp b/clang/lib/InstallAPI/DylibVerifier.cpp
new file mode 100644
index 000000000000..b7dd85d63fa1
--- /dev/null
+++ b/clang/lib/InstallAPI/DylibVerifier.cpp
@@ -0,0 +1,212 @@
+#include "clang/InstallAPI/DylibVerifier.h"
+#include "clang/InstallAPI/FrontendRecords.h"
+#include "llvm/Demangle/Demangle.h"
+
+using namespace llvm::MachO;
+
+namespace clang {
+namespace installapi {
+
+/// Metadata stored about a mapping of a declaration to a symbol.
+struct DylibVerifier::SymbolContext {
+  // Name to use for printing in diagnostics.
+  std::string PrettyPrintName{""};
+
+  // Name to use for all querying and verification
+  // purposes.
+  std::string SymbolName{""};
+
+  // Kind to map symbol type against record.
+  EncodeKind Kind = EncodeKind::GlobalSymbol;
+
+  // Frontend Attributes tied to the AST.
+  const FrontendAttrs *FA = nullptr;
+
+  // The ObjCInterface symbol type, if applicable.
+  ObjCIFSymbolKind ObjCIFKind = ObjCIFSymbolKind::None;
+};
+
+static std::string
+getAnnotatedName(const Record *R, EncodeKind Kind, StringRef Name,
+                 bool ValidSourceLoc = true,
+                 ObjCIFSymbolKind ObjCIF = ObjCIFSymbolKind::None) {
+  assert(!Name.empty() && "Need symbol name for printing");
+
+  std::string Annotation;
+  if (R->isWeakDefined())
+    Annotation += "(weak-def) ";
+  if (R->isWeakReferenced())
+    Annotation += "(weak-ref) ";
+  if (R->isThreadLocalValue())
+    Annotation += "(tlv) ";
+
+  // Check if symbol represents only part of a @interface declaration.
+  const bool IsAnnotatedObjCClass = ((ObjCIF != ObjCIFSymbolKind::None) &&
+                                     (ObjCIF <= ObjCIFSymbolKind::EHType));
+
+  if (IsAnnotatedObjCClass) {
+    if (ObjCIF == ObjCIFSymbolKind::EHType)
+      Annotation += "Exception Type of ";
+    if (ObjCIF == ObjCIFSymbolKind::MetaClass)
+      Annotation += "Metaclass of ";
+    if (ObjCIF == ObjCIFSymbolKind::Class)
+      Annotation += "Class of ";
+  }
+
+  // Only print symbol type prefix or leading "_" if there is no source location
+  // tied to it. This can only ever happen when the location has to come from
+  // debug info.
+  if (ValidSourceLoc) {
+    if ((Kind == EncodeKind::GlobalSymbol) && Name.starts_with("_"))
+      return Annotation + Name.drop_front(1).str();
+    return Annotation + Name.str();
+  }
+
+  if (IsAnnotatedObjCClass)
+    return Annotation + Name.str();
+
+  switch (Kind) {
+  case EncodeKind::GlobalSymbol:
+    return Annotation + Name.str();
+  case EncodeKind::ObjectiveCInstanceVariable:
+    return Annotation + "(ObjC IVar) " + Name.str();
+  case EncodeKind::ObjectiveCClass:
+    return Annotation + "(ObjC Class) " + Name.str();
+  case EncodeKind::ObjectiveCClassEHType:
+    return Annotation + "(ObjC Class EH) " + Name.str();
+  }
+
+  llvm_unreachable("unexpected case for EncodeKind");
+}
+
+static std::string demangle(StringRef Name) {
+  // Itanium encoding requires 1 or 3 leading underscores, followed by 'Z'.
+  if (!(Name.starts_with("_Z") || Name.starts_with("___Z")))
+    return Name.str();
+  char *Result = llvm::itaniumDemangle(Name.data());
+  if (!Result)
+    return Name.str();
+
+  std::string Demangled(Result);
+  free(Result);
+  return Demangled;
+}
+
+static DylibVerifier::Result updateResult(const DylibVerifier::Result Prev,
+                                          const DylibVerifier::Result Curr) {
+  if (Prev == Curr)
+    return Prev;
+
+  // Never update from invalid or noverify state.
+  if ((Prev == DylibVerifier::Result::Invalid) ||
+      (Prev == DylibVerifier::Result::NoVerify))
+    return Prev;
+
+  // Don't let an ignored verification remove a valid one.
+  if (Prev == DylibVerifier::Result::Valid &&
+      Curr == DylibVerifier::Result::Ignore)
+    return Prev;
+
+  return Curr;
+}
+
+void DylibVerifier::updateState(Result State) {
+  Ctx.FrontendState = updateResult(Ctx.FrontendState, State);
+}
+
+void DylibVerifier::addSymbol(const Record *R, SymbolContext &SymCtx,
+                              TargetList &&Targets) {
+  if (Targets.empty())
+    Targets = {Ctx.Target};
+
+  Exports->addGlobal(SymCtx.Kind, SymCtx.SymbolName, R->getFlags(), Targets);
+}
+
+DylibVerifier::Result DylibVerifier::verifyImpl(Record *R,
+                                                SymbolContext &SymCtx) {
+  R->setVerify();
+  if (!canVerify()) {
+    // Accumulate symbols when not in verifying against dylib.
+    if (R->isExported() && !SymCtx.FA->Avail.isUnconditionallyUnavailable() &&
+        !SymCtx.FA->Avail.isObsoleted()) {
+      addSymbol(R, SymCtx);
+    }
+    return Ctx.FrontendState;
+  }
+  return Ctx.FrontendState;
+}
+
+bool DylibVerifier::canVerify() {
+  return Ctx.FrontendState != Result::NoVerify;
+}
+
+void DylibVerifier::setTarget(const Target &T) {
+  Ctx.Target = T;
+  Ctx.DiscoveredFirstError = false;
+  updateState(Dylib.empty() ? Result::NoVerify : Result::Ignore);
+}
+
+DylibVerifier::Result DylibVerifier::verify(ObjCIVarRecord *R,
+                                            const FrontendAttrs *FA,
+                                            const StringRef SuperClass) {
+  if (R->isVerified())
+    return getState();
+
+  std::string FullName =
+      ObjCIVarRecord::createScopedName(SuperClass, R->getName());
+  SymbolContext SymCtx{
+      getAnnotatedName(R, EncodeKind::ObjectiveCInstanceVariable,
+                       Demangle ? demangle(FullName) : FullName),
+      FullName, EncodeKind::ObjectiveCInstanceVariable, FA};
+  return verifyImpl(R, SymCtx);
+}
+
+static ObjCIFSymbolKind assignObjCIFSymbolKind(const ObjCInterfaceRecord *R) {
+  ObjCIFSymbolKind Result = ObjCIFSymbolKind::None;
+  if (R->getLinkageForSymbol(ObjCIFSymbolKind::Class) != RecordLinkage::Unknown)
+    Result |= ObjCIFSymbolKind::Class;
+  if (R->getLinkageForSymbol(ObjCIFSymbolKind::MetaClass) !=
+      RecordLinkage::Unknown)
+    Result |= ObjCIFSymbolKind::MetaClass;
+  if (R->getLinkageForSymbol(ObjCIFSymbolKind::EHType) !=
+      RecordLinkage::Unknown)
+    Result |= ObjCIFSymbolKind::EHType;
+  return Result;
+}
+
+DylibVerifier::Result DylibVerifier::verify(ObjCInterfaceRecord *R,
+                                            const FrontendAttrs *FA) {
+  if (R->isVerified())
+    return getState();
+  SymbolContext SymCtx;
+  SymCtx.SymbolName = R->getName();
+  SymCtx.ObjCIFKind = assignObjCIFSymbolKind(R);
+
+  std::string DisplayName =
+      Demangle ? demangle(SymCtx.SymbolName) : SymCtx.SymbolName;
+  SymCtx.Kind = R->hasExceptionAttribute() ? EncodeKind::ObjectiveCClassEHType
+                                           : EncodeKind::ObjectiveCClass;
+  SymCtx.PrettyPrintName = getAnnotatedName(R, SymCtx.Kind, DisplayName);
+  SymCtx.FA = FA;
+
+  return verifyImpl(R, SymCtx);
+}
+
+DylibVerifier::Result DylibVerifier::verify(GlobalRecord *R,
+                                            const FrontendAttrs *FA) {
+  if (R->isVerified())
+    return getState();
+
+  // Global classifications could be obfusciated with `asm`.
+  SimpleSymbol Sym = parseSymbol(R->getName());
+  SymbolContext SymCtx;
+  SymCtx.SymbolName = Sym.Name;
+  SymCtx.PrettyPrintName =
+      getAnnotatedName(R, Sym.Kind, Demangle ? demangle(Sym.Name) : Sym.Name);
+  SymCtx.Kind = Sym.Kind;
+  SymCtx.FA = FA;
+  return verifyImpl(R, SymCtx);
+}
+
+} // namespace installapi
+} // namespace clang
diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp
index 707aeb17dc89..12cd5fcbc22b 100644
--- a/clang/lib/InstallAPI/Frontend.cpp
+++ b/clang/lib/InstallAPI/Frontend.cpp
@@ -16,41 +16,47 @@ using namespace llvm;
 using namespace llvm::MachO;
 
 namespace clang::installapi {
-
-GlobalRecord *FrontendRecordsSlice::addGlobal(
+std::pair<GlobalRecord *, FrontendAttrs *> FrontendRecordsSlice::addGlobal(
     StringRef Name, RecordLinkage Linkage, GlobalRecord::Kind GV,
     const clang::AvailabilityInfo Avail, const Decl *D, const HeaderType Access,
     SymbolFlags Flags, bool Inlined) {
 
-  auto *GR =
+  GlobalRecord *GR =
       llvm::MachO::RecordsSlice::addGlobal(Name, Linkage, GV, Flags, Inlined);
-  FrontendRecords.insert({GR, FrontendAttrs{Avail, D, Access}});
-  return GR;
+  auto Result = FrontendRecords.insert({GR, FrontendAttrs{Avail, D, Access}});
+  return {GR, &(Result.first->second)};
 }
 
-ObjCInterfaceRecord *FrontendRecordsSlice::addObjCInterface(
-    StringRef Name, RecordLinkage Linkage, const clang::AvailabilityInfo Avail,
-    const Decl *D, HeaderType Access, bool IsEHType) {
+std::pair<ObjCInterfaceRecord *, FrontendAttrs *>
+FrontendRecordsSlice::addObjCInterface(StringRef Name, RecordLinkage Linkage,
+                                       const clang::AvailabilityInfo Avail,
+                                       const Decl *D, HeaderType Access,
+                                       bool IsEHType) {
   ObjCIFSymbolKind SymType =
       ObjCIFSymbolKind::Class | ObjCIFSymbolKind::MetaClass;
   if (IsEHType)
     SymType |= ObjCIFSymbolKind::EHType;
-  auto *ObjCR =
+
+  ObjCInterfaceRecord *ObjCR =
       llvm::MachO::RecordsSlice::addObjCInterface(Name, Linkage, SymType);
-  FrontendRecords.insert({ObjCR, FrontendAttrs{Avail, D, Access}});
-  return ObjCR;
+  auto Result =
+      FrontendRecords.insert({ObjCR, FrontendAttrs{Avail, D, Access}});
+  return {ObjCR, &(Result.first->second)};
 }
 
-ObjCCategoryRecord *FrontendRecordsSlice::addObjCCategory(
-    StringRef ClassToExtend, StringRef CategoryName,
-    const clang::AvailabilityInfo Avail, const Decl *D, HeaderType Access) {
-  auto *ObjCR =
+std::pair<ObjCCategoryRecord *, FrontendAttrs *>
+FrontendRecordsSlice::addObjCCategory(StringRef ClassToExtend,
+                                      StringRef CategoryName,
+                                      const clang::AvailabilityInfo Avail,
+                                      const Decl *D, HeaderType Access) {
+  ObjCCategoryRecord *ObjCR =
       llvm::MachO::RecordsSlice::addObjCCategory(ClassToExtend, CategoryName);
-  FrontendRecords.insert({ObjCR, FrontendAttrs{Avail, D, Access}});
-  return ObjCR;
+  auto Result =
+      FrontendRecords.insert({ObjCR, FrontendAttrs{Avail, D, Access}});
+  return {ObjCR, &(Result.first->second)};
 }
 
-ObjCIVarRecord *FrontendRecordsSlice::addObjCIVar(
+std::pair<ObjCIVarRecord *, FrontendAttrs *> FrontendRecordsSlice::addObjCIVar(
     ObjCContainerRecord *Container, StringRef IvarName, RecordLinkage Linkage,
     const clang::AvailabilityInfo Avail, const Decl *D, HeaderType Access,
     const clang::ObjCIvarDecl::AccessControl AC) {
@@ -59,11 +65,12 @@ ObjCIVarRecord *FrontendRecordsSlice::addObjCIVar(
   if ((Linkage == RecordLinkage::Exported) &&
       ((AC == ObjCIvarDecl::Private) || (AC == ObjCIvarDecl::Package)))
     Linkage = RecordLinkage::Internal;
-  auto *ObjCR =
+  ObjCIVarRecord *ObjCR =
       llvm::MachO::RecordsSlice::addObjCIVar(Container, IvarName, Linkage);
-  FrontendRecords.insert({ObjCR, FrontendAttrs{Avail, D, Access}});
+  auto Result =
+      FrontendRecords.insert({ObjCR, FrontendAttrs{Avail, D, Access}});
 
-  return nullptr;
+  return {ObjCR, &(Result.first->second)};
 }
 
 std::optional<HeaderType>
diff --git a/clang/lib/InstallAPI/Visitor.cpp b/clang/lib/InstallAPI/Visitor.cpp
index b4ed5974a057..187afe59309d 100644
--- a/clang/lib/InstallAPI/Visitor.cpp
+++ b/clang/lib/InstallAPI/Visitor.cpp
@@ -11,6 +11,7 @@
 #include "clang/AST/ParentMapContext.h"
 #include "clang/AST/VTableBuilder.h"
 #include "clang/Basic/Linkage.h"
+#include "clang/InstallAPI/DylibVerifier.h"
 #include "clang/InstallAPI/FrontendRecords.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
@@ -156,7 +157,9 @@ void InstallAPIVisitor::recordObjCInstanceVariables(
     StringRef Name = IV->getName();
     const AvailabilityInfo Avail = AvailabilityInfo::createFromDecl(IV);
     auto AC = IV->getCanonicalAccessControl();
-    Ctx.Slice->addObjCIVar(Record, Name, Linkage, Avail, IV, *Access, AC);
+    auto [ObjCIVR, FA] =
+        Ctx.Slice->addObjCIVar(Record, Name, Linkage, Avail, IV, *Access, AC);
+    Ctx.Verifier->verify(ObjCIVR, FA, SuperClass);
   }
 }
 
@@ -178,15 +181,16 @@ bool InstallAPIVisitor::VisitObjCInterfaceDecl(const ObjCInterfaceDecl *D) {
       (!D->getASTContext().getLangOpts().ObjCRuntime.isFragile() &&
        hasObjCExceptionAttribute(D));
 
-  ObjCInterfaceRecord *Class =
+  auto [Class, FA] =
       Ctx.Slice->addObjCInterface(Name, Linkage, Avail, D, *Access, IsEHType);
+  Ctx.Verifier->verify(Class, FA);
 
   // Get base class.
   StringRef SuperClassName;
   if (const auto *SuperClass = D->getSuperClass())
     SuperClassName = SuperClass->getObjCRuntimeNameAsString();
 
-  recordObjCInstanceVariables(D->getASTContext(), Class, SuperClassName,
+  recordObjCInstanceVariables(D->getASTContext(), Class, Class->getName(),
                               D->ivars());
   return true;
 }
@@ -201,8 +205,8 @@ bool InstallAPIVisitor::VisitObjCCategoryDecl(const ObjCCategoryDecl *D) {
   const ObjCInterfaceDecl *InterfaceD = D->getClassInterface();
   const StringRef InterfaceName = InterfaceD->getName();
 
-  ObjCCategoryRecord *Category = Ctx.Slice->addObjCCategory(
-      InterfaceName, CategoryName, Avail, D, *Access);
+  auto [Category, FA] = Ctx.Slice->addObjCCategory(InterfaceName, CategoryName,
+                                                   Avail, D, *Access);
   recordObjCInstanceVariables(D->getASTContext(), Category, InterfaceName,
                               D->ivars());
   return true;
@@ -236,8 +240,10 @@ bool InstallAPIVisitor::VisitVarDecl(const VarDecl *D) {
   const bool WeakDef = D->hasAttr<WeakAttr>();
   const bool ThreadLocal = D->getTLSKind() != VarDecl::TLS_None;
   const AvailabilityInfo Avail = AvailabilityInfo::createFromDecl(D);
-  Ctx.Slice->addGlobal(getMangledName(D), Linkage, GlobalRecord::Kind::Variable,
-                       Avail, D, *Access, getFlags(WeakDef, ThreadLocal));
+  auto [GR, FA] = Ctx.Slice->addGlobal(getMangledName(D), Linkage,
+                                       GlobalRecord::Kind::Variable, Avail, D,
+                                       *Access, getFlags(WeakDef, ThreadLocal));
+  Ctx.Verifier->verify(GR, FA);
   return true;
 }
 
@@ -287,8 +293,10 @@ bool InstallAPIVisitor::VisitFunctionDecl(const FunctionDecl *D) {
   const RecordLinkage Linkage = (Inlined || !isExported(D))
                                     ? RecordLinkage::Internal
                                     : RecordLinkage::Exported;
-  Ctx.Slice->addGlobal(Name, Linkage, GlobalRecord::Kind::Function, Avail, D,
-                       *Access, getFlags(WeakDef), Inlined);
+  auto [GR, FA] =
+      Ctx.Slice->addGlobal(Name, Linkage, GlobalRecord::Kind::Function, Avail,
+                           D, *Access, getFlags(WeakDef), Inlined);
+  Ctx.Verifier->verify(GR, FA);
   return true;
 }
 
@@ -478,9 +486,10 @@ void InstallAPIVisitor::emitVTableSymbols(const CXXRecordDecl *D,
         VTableLinkage == CXXLinkage::WeakODRLinkage) {
       const std::string Name = getMangledCXXVTableName(D);
       const bool WeakDef = VTableLinkage == CXXLinkage::WeakODRLinkage;
-      Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                           GlobalRecord::Kind::Variable, Avail, D, Access,
-                           getFlags(WeakDef));
+      auto [GR, FA] = Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
+                                           GlobalRecord::Kind::Variable, Avail,
+                                           D, Access, getFlags(WeakDef));
+      Ctx.Verifier->verify(GR, FA);
       if (!D->getDescribedClassTemplate() && !D->isInvalidDecl()) {
         VTableContextBase *VTable = D->getASTContext().getVTableContext();
         auto AddThunk = [&](GlobalDecl GD) {
@@ -491,9 +500,10 @@ void InstallAPIVisitor::emitVTableSymbols(const CXXRecordDecl *D,
 
           for (const auto &Thunk : *Thunks) {
             const std::string Name = getMangledCXXThunk(GD, Thunk);
-            Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                                 GlobalRecord::Kind::Function, Avail,
-                                 GD.getDecl(), Access);
+            auto [GR, FA] = Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
+                                                 GlobalRecord::Kind::Function,
+                                                 Avail, GD.getDecl(), Access);
+            Ctx.Verifier->verify(GR, FA);
           }
         };
 
@@ -519,12 +529,16 @@ void InstallAPIVisitor::emitVTableSymbols(const CXXRecordDecl *D,
 
   if (hasRTTI(D)) {
     std::string Name = getMangledCXXRTTI(D);
-    Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                         GlobalRecord::Kind::Variable, Avail, D, Access);
+    auto [GR, FA] =
+        Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
+                             GlobalRecord::Kind::Variable, Avail, D, Access);
+    Ctx.Verifier->verify(GR, FA);
 
     Name = getMangledCXXRTTIName(D);
-    Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                         GlobalRecord::Kind::Variable, Avail, D, Access);
+    auto [NamedGR, NamedFA] =
+        Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
+                             GlobalRecord::Kind::Variable, Avail, D, Access);
+    Ctx.Verifier->verify(NamedGR, NamedFA);
   }
 
   for (const auto &It : D->bases()) {
@@ -615,15 +629,17 @@ bool InstallAPIVisitor::VisitCXXRecordDecl(const CXXRecordDecl *D) {
         continue;
 
       std::string Name = getMangledCtorDtor(M, Ctor_Base);
-      Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                           GlobalRecord::Kind::Function, Avail, D, *Access,
-                           getFlags(WeakDef));
+      auto [GR, FA] = Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
+                                           GlobalRecord::Kind::Function, Avail,
+                                           D, *Access, getFlags(WeakDef));
+      Ctx.Verifier->verify(GR, FA);
 
       if (!D->isAbstract()) {
         std::string Name = getMangledCtorDtor(M, Ctor_Complete);
-        Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                             GlobalRecord::Kind::Function, Avail, D, *Access,
-                             getFlags(WeakDef));
+        auto [GR, FA] = Ctx.Slice->addGlobal(
+            Name, RecordLinkage::Exported, GlobalRecord::Kind::Function, Avail,
+            D, *Access, getFlags(WeakDef));
+        Ctx.Verifier->verify(GR, FA);
       }
 
       continue;
@@ -635,20 +651,23 @@ bool InstallAPIVisitor::VisitCXXRecordDecl(const CXXRecordDecl *D) {
         continue;
 
       std::string Name = getMangledCtorDtor(M, Dtor_Base);
-      Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                           GlobalRecord::Kind::Function, Avail, D, *Access,
-                           getFlags(WeakDef));
+      auto [GR, FA] = Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
+                                           GlobalRecord::Kind::Function, Avail,
+                                           D, *Access, getFlags(WeakDef));
+      Ctx.Verifier->verify(GR, FA);
 
       Name = getMangledCtorDtor(M, Dtor_Complete);
-      Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                           GlobalRecord::Kind::Function, Avail, D, *Access,
-                           getFlags(WeakDef));
+      auto [CompleteGR, CompleteFA] = Ctx.Slice->addGlobal(
+          Name, RecordLinkage::Exported, GlobalRecord::Kind::Function, Avail, D,
+          *Access, getFlags(WeakDef));
+      Ctx.Verifier->verify(CompleteGR, CompleteFA);
 
       if (Dtor->isVirtual()) {
         Name = getMangledCtorDtor(M, Dtor_Deleting);
-        Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                             GlobalRecord::Kind::Function, Avail, D, *Access,
-                             getFlags(WeakDef));
+        auto [VirtualGR, VirtualFA] = Ctx.Slice->addGlobal(
+            Name, RecordLinkage::Exported, GlobalRecord::Kind::Function, Avail,
+            D, *Access, getFlags(WeakDef));
+        Ctx.Verifier->verify(VirtualGR, VirtualFA);
       }
 
       continue;
@@ -661,9 +680,10 @@ bool InstallAPIVisitor::VisitCXXRecordDecl(const CXXRecordDecl *D) {
       continue;
 
     std::string Name = getMangledName(M);
-    Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                         GlobalRecord::Kind::Function, Avail, D, *Access,
-                         getFlags(WeakDef));
+    auto [GR, FA] = Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
+                                         GlobalRecord::Kind::Function, Avail, D,
+                                         *Access, getFlags(WeakDef));
+    Ctx.Verifier->verify(GR, FA);
   }
 
   if (auto *Templ = dyn_cast<ClassTemplateSpecializationDecl>(D)) {
@@ -694,9 +714,10 @@ bool InstallAPIVisitor::VisitCXXRecordDecl(const CXXRecordDecl *D) {
     const AvailabilityInfo Avail = AvailabilityInfo::createFromDecl(Var);
     const bool WeakDef = Var->hasAttr<WeakAttr>() || KeepInlineAsWeak;
 
-    Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
-                         GlobalRecord::Kind::Variable, Avail, D, *Access,
-                         getFlags(WeakDef));
+    auto [GR, FA] = Ctx.Slice->addGlobal(Name, RecordLinkage::Exported,
+                                         GlobalRecord::Kind::Variable, Avail, D,
+                                         *Access, getFlags(WeakDef));
+    Ctx.Verifier->verify(GR, FA);
   }
 
   return true;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e303a7c6511c..46b783862e4e 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5499,6 +5499,17 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_clamp: {
+    if (checkArgCount(*this, TheCall, 3))
+      return true;
+    if (CheckVectorElementCallArgs(this, TheCall))
+      return true;
+    if (SemaBuiltinElementwiseTernaryMath(
+            TheCall, /*CheckForFloatArgs*/
+            TheCall->getArg(0)->getType()->hasFloatingRepresentation()))
+      return true;
+    break;
+  }
   case Builtin::BI__builtin_hlsl_dot: {
     if (checkArgCount(*this, TheCall, 2))
       return true;
@@ -5547,7 +5558,9 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     if (CheckVectorElementCallArgs(this, TheCall))
       return true;
-    if (SemaBuiltinElementwiseTernaryMath(TheCall, /*CheckForFloatArgs*/ false))
+    if (SemaBuiltinElementwiseTernaryMath(
+            TheCall, /*CheckForFloatArgs*/
+            TheCall->getArg(0)->getType()->hasFloatingRepresentation()))
       return true;
   }
   }
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index a8e387e35fb4..1c546e9f5894 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -692,11 +692,15 @@ bool Sema::CheckFunctionConstraints(const FunctionDecl *FD,
   // A lambda conversion operator has the same constraints as the call operator
   // and constraints checking relies on whether we are in a lambda call operator
   // (and may refer to its parameters), so check the call operator instead.
+  // Note that the declarations outside of the lambda should also be
+  // considered. Turning on the 'ForOverloadResolution' flag results in the
+  // LocalInstantiationScope not looking into its parents, but we can still
+  // access Decls from the parents while building a lambda RAII scope later.
   if (const auto *MD = dyn_cast<CXXConversionDecl>(FD);
       MD && isLambdaConversionOperator(const_cast<CXXConversionDecl *>(MD)))
     return CheckFunctionConstraints(MD->getParent()->getLambdaCallOperator(),
                                     Satisfaction, UsageLoc,
-                                    ForOverloadResolution);
+                                    /*ShouldAddDeclsFromParentScope=*/true);
 
   DeclContext *CtxToSave = const_cast<FunctionDecl *>(FD);
 
diff --git a/clang/test/AST/Interp/builtin-functions.cpp b/clang/test/AST/Interp/builtin-functions.cpp
index 6c8df99a1597..a09c6d3acca5 100644
--- a/clang/test/AST/Interp/builtin-functions.cpp
+++ b/clang/test/AST/Interp/builtin-functions.cpp
@@ -514,7 +514,9 @@ namespace bswap {
 #define CFSTR __builtin___CFStringMakeConstantString
 void test7(void) {
   const void *X;
+#if !defined(_AIX)
   X = CFSTR("\242"); // both-warning {{input conversion stopped}}
+#endif
   X = CFSTR("\0"); // no-warning
   X = CFSTR(242); // both-error {{cannot initialize a parameter of type 'const char *' with an rvalue of type 'int'}}
   X = CFSTR("foo", "bar"); // both-error {{too many arguments to function call}}
diff --git a/clang/test/AST/Interp/literals.cpp b/clang/test/AST/Interp/literals.cpp
index 0a9580b6f664..277438d2e631 100644
--- a/clang/test/AST/Interp/literals.cpp
+++ b/clang/test/AST/Interp/literals.cpp
@@ -910,6 +910,18 @@ namespace TypeTraits {
   struct U {};
   static_assert(S3<U>{}.foo(), "");
   static_assert(!S3<T>{}.foo(), "");
+
+  typedef int Int;
+  typedef Int IntAr[10];
+  typedef const IntAr ConstIntAr;
+  typedef ConstIntAr ConstIntArAr[4];
+
+  static_assert(__array_rank(IntAr) == 1, "");
+  static_assert(__array_rank(ConstIntArAr) == 2, "");
+
+  static_assert(__array_extent(IntAr, 0) == 10, "");
+  static_assert(__array_extent(ConstIntArAr, 0) == 4, "");
+  static_assert(__array_extent(ConstIntArAr, 1) == 10, "");
 }
 
 #if __cplusplus >= 201402L
diff --git a/clang/test/CodeGenCXX/mangle-ms-back-references.cpp b/clang/test/CodeGenCXX/mangle-ms-back-references.cpp
index cb95c100b3d2..b27a9c5acacb 100644
--- a/clang/test/CodeGenCXX/mangle-ms-back-references.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms-back-references.cpp
@@ -83,3 +83,20 @@ class H;
 
 void ManyParams(T01 &, T02 &, T03 &, T04 &, T05 &, T06 &, T07 &, T08 &, T09 &, T10 &, H<T11> &, H<T11> &) {}
 // CHECK: "?ManyParams@@YAXAAVT01@@AAVT02@@AAVT03@@AAVT04@@AAVT05@@AAVT06@@AAVT07@@AAVT08@@AAVT09@@AAVT10@@AAV?$H@VT11@@@@AAV?$H@VT11@@@@@Z"
+
+namespace NS {
+// The name "TSS0" for the name of the class below has been specifically
+// chosen to ensure that back reference lookup does not match against the
+// implicitly generated "$TSS0" name of the thread safe static initialization
+// variable.
+struct __declspec(dllexport) TSS0 {
+  static TSS0& get();
+  __forceinline static TSS0& singleton() {
+    static TSS0& lsv = get();
+    return lsv;
+  }
+};
+}
+// CHECK: "?singleton@TSS0@NS@@SAAAU12@XZ"
+// CHECK: "?lsv@?1??singleton@TSS0@NS@@SAAAU23@XZ@4AAU23@A"
+// CHECK: "?$TSS0@?1??singleton@TSS0@NS@@SAAAU23@XZ@4HA"
diff --git a/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl
new file mode 100644
index 000000000000..e3ef26429e7e
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: builtin_test_clamp_int4
+// CHECK: %dx.clamp = call <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+// CHECK: ret <4 x i32> %dx.clamp
+int4 builtin_test_clamp_int4(int4 p0, int4 p1, int4 p2) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p2);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
new file mode 100644
index 000000000000..029e48ffe258
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+#ifdef __HLSL_ENABLE_16_BIT
+// NATIVE_HALF: define noundef i16 @
+// NATIVE_HALF: call i16 @llvm.dx.clamp.i16(
+int16_t test_clamp_short(int16_t p0, int16_t p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <2 x i16> @
+// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16(
+int16_t2 test_clamp_short2(int16_t2 p0, int16_t2 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <3 x i16> @
+// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16
+int16_t3 test_clamp_short3(int16_t3 p0, int16_t3 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <4 x i16> @
+// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16
+int16_t4 test_clamp_short4(int16_t4 p0, int16_t4 p1) { return clamp(p0, p1,p1); }
+
+// NATIVE_HALF: define noundef i16 @
+// NATIVE_HALF: call i16 @llvm.dx.uclamp.i16(
+uint16_t test_clamp_ushort(uint16_t p0, uint16_t p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <2 x i16> @
+// NATIVE_HALF: call <2 x i16> @llvm.dx.uclamp.v2i16
+uint16_t2 test_clamp_ushort2(uint16_t2 p0, uint16_t2 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <3 x i16> @
+// NATIVE_HALF: call <3 x i16> @llvm.dx.uclamp.v3i16
+uint16_t3 test_clamp_ushort3(uint16_t3 p0, uint16_t3 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <4 x i16> @
+// NATIVE_HALF: call <4 x i16> @llvm.dx.uclamp.v4i16
+uint16_t4 test_clamp_ushort4(uint16_t4 p0, uint16_t4 p1) { return clamp(p0, p1,p1); }
+#endif
+
+// CHECK: define noundef i32 @
+// CHECK: call i32 @llvm.dx.clamp.i32(
+int test_clamp_int(int p0, int p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i32> @
+// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32
+int2 test_clamp_int2(int2 p0, int2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i32> @
+// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32
+int3 test_clamp_int3(int3 p0, int3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i32> @
+// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32
+int4 test_clamp_int4(int4 p0, int4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef i32 @
+// CHECK: call i32 @llvm.dx.uclamp.i32(
+int test_clamp_uint(uint p0, uint p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i32> @
+// CHECK: call <2 x i32> @llvm.dx.uclamp.v2i32
+uint2 test_clamp_uint2(uint2 p0, uint2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i32> @
+// CHECK: call <3 x i32> @llvm.dx.uclamp.v3i32
+uint3 test_clamp_uint3(uint3 p0, uint3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i32> @
+// CHECK: call <4 x i32> @llvm.dx.uclamp.v4i32
+uint4 test_clamp_uint4(uint4 p0, uint4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef i64 @
+// CHECK: call i64 @llvm.dx.clamp.i64(
+int64_t test_clamp_long(int64_t p0, int64_t p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i64> @
+// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64
+int64_t2 test_clamp_long2(int64_t2 p0, int64_t2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i64> @
+// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64
+int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i64> @
+// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64
+int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef i64 @
+// CHECK: call i64 @llvm.dx.uclamp.i64(
+uint64_t test_clamp_long(uint64_t p0, uint64_t p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i64> @
+// CHECK: call <2 x i64> @llvm.dx.uclamp.v2i64
+uint64_t2 test_clamp_long2(uint64_t2 p0, uint64_t2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i64> @
+// CHECK: call <3 x i64> @llvm.dx.uclamp.v3i64
+uint64_t3 test_clamp_long3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i64> @
+// CHECK: call <4 x i64> @llvm.dx.uclamp.v4i64
+uint64_t4 test_clamp_long4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); }
+
+// NATIVE_HALF: define noundef half @
+// NATIVE_HALF: call half @llvm.dx.clamp.f16(
+// NO_HALF: define noundef float @"?test_clamp_half
+// NO_HALF: call float @llvm.dx.clamp.f32(
+half test_clamp_half(half p0, half p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <2 x half> @
+// NATIVE_HALF: call <2 x half> @llvm.dx.clamp.v2f16
+// NO_HALF: define noundef <2 x float> @"?test_clamp_half2
+// NO_HALF: call <2 x float> @llvm.dx.clamp.v2f32(
+half2 test_clamp_half2(half2 p0, half2 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <3 x half> @
+// NATIVE_HALF: call <3 x half> @llvm.dx.clamp.v3f16
+// NO_HALF: define noundef <3 x float> @"?test_clamp_half3
+// NO_HALF: call <3 x float> @llvm.dx.clamp.v3f32(
+half3 test_clamp_half3(half3 p0, half3 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <4 x half> @
+// NATIVE_HALF: call <4 x half> @llvm.dx.clamp.v4f16
+// NO_HALF: define noundef <4 x float> @"?test_clamp_half4
+// NO_HALF: call <4 x float> @llvm.dx.clamp.v4f32(
+half4 test_clamp_half4(half4 p0, half4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef float @"?test_clamp_float
+// CHECK: call float @llvm.dx.clamp.f32(
+float test_clamp_float(float p0, float p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x float> @"?test_clamp_float2
+// CHECK: call <2 x float> @llvm.dx.clamp.v2f32
+float2 test_clamp_float2(float2 p0, float2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x float> @"?test_clamp_float3
+// CHECK: call <3 x float> @llvm.dx.clamp.v3f32
+float3 test_clamp_float3(float3 p0, float3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x float> @"?test_clamp_float4
+// CHECK: call <4 x float> @llvm.dx.clamp.v4f32
+float4 test_clamp_float4(float4 p0, float4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef double @
+// CHECK: call double @llvm.dx.clamp.f64(
+double test_clamp_double(double p0, double p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x double> @
+// CHECK: call <2 x double> @llvm.dx.clamp.v2f64
+double2 test_clamp_double2(double2 p0, double2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x double> @
+// CHECK: call <3 x double> @llvm.dx.clamp.v3f64
+double3 test_clamp_double3(double3 p0, double3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x double> @
+// CHECK: call <4 x double> @llvm.dx.clamp.v4f64
+double4 test_clamp_double4(double4 p0, double4 p1) { return clamp(p0, p1,p1); }
diff --git a/clang/test/InstallAPI/asm.test b/clang/test/InstallAPI/asm.test
new file mode 100644
index 000000000000..b6af7f643d72
--- /dev/null
+++ b/clang/test/InstallAPI/asm.test
@@ -0,0 +1,90 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json
+
+// RUN: clang-installapi -target arm64-apple-macos13.1 \
+// RUN: -I%t/usr/include \
+// RUN: -install_name @rpath/lib/libasm.dylib \
+// RUN: %t/inputs.json -o %t/output.tbd 2>&1 | FileCheck %s --allow-empty
+// RUN: llvm-readtapi -compare %t/output.tbd %t/expected.tbd 2>&1 | FileCheck %s --allow-empty
+
+// CHECK-NOT: error: 
+// CHECK-NOT: warning: 
+
+//--- usr/include/asm.h
+#ifndef ASM_H
+#define ASM_H
+
+extern int ivar __asm("_OBJC_IVAR_$_SomeClass._ivar1");
+extern int objcClass1 __asm("_OBJC_CLASS_$_SomeClass");
+extern int objcClass2 __asm("_OBJC_METACLASS_$_SomeClass");
+extern int objcClass3 __asm("_OBJC_EHTYPE_$_SomeClass");
+extern int objcClass4 __asm(".objc_class_name_SomeClass");
+
+__attribute__((visibility("hidden")))
+@interface NSString {
+}
+@end
+
+extern int ivarExtra __asm("_OBJC_IVAR_$_NSString._ivar1");
+#endif // ASM_H
+
+//--- inputs.json.in
+{
+  "headers": [ {
+    "path" : "DSTROOT/usr/include/asm.h",
+    "type" : "public"
+  }],
+  "version": "3"
+}
+
+//--- expected.tbd
+{
+  "main_library": {
+    "compatibility_versions": [
+      {
+        "version": "0"
+      }
+    ],
+    "current_versions": [
+      {
+        "version": "0"
+      }
+    ],
+    "exported_symbols": [
+      {
+        "data": {
+          "objc_class": [
+            "SomeClass"
+          ],
+          "objc_eh_type": [
+            "SomeClass"
+          ],
+          "objc_ivar": [
+            "NSString._ivar1",
+            "SomeClass._ivar1"
+          ]
+        }
+      }
+    ],
+    "flags": [
+      {
+        "attributes": [
+          "not_app_extension_safe"
+        ]
+      }
+    ],
+    "install_names": [
+      {
+        "name": "@rpath/lib/libasm.dylib"
+      }
+    ],
+    "target_info": [
+      {
+        "min_deployment": "13.1",
+        "target": "arm64-macos"
+      }
+    ]
+  },
+  "tapi_tbd_version": 5
+}
diff --git a/clang/test/InstallAPI/driver-invalid-options.test b/clang/test/InstallAPI/driver-invalid-options.test
index a2e008e1eb03..69f3b2d66ab8 100644
--- a/clang/test/InstallAPI/driver-invalid-options.test
+++ b/clang/test/InstallAPI/driver-invalid-options.test
@@ -1,4 +1,9 @@
 /// Check non-darwin triple is rejected.
-// RUN: not clang-installapi -target x86_64-unknown-unknown %s 2> %t 
+// RUN: not clang-installapi -target x86_64-unknown-unknown %s -o tmp.tbd 2> %t 
 // RUN: FileCheck --check-prefix INVALID_INSTALLAPI -input-file %t %s
 // INVALID_INSTALLAPI: error: unsupported option 'installapi' for target 'x86_64-unknown-unknown'
+
+/// Check that missing install_name is reported.
+// RUN: not clang-installapi -target x86_64-apple-ios-simulator  %s -o tmp.tbd 2> %t 
+// RUN: FileCheck --check-prefix INVALID_INSTALL_NAME -input-file %t %s
+// INVALID_INSTALL_NAME: error: no install name specified: add -install_name <path>
diff --git a/clang/test/InstallAPI/functions.test b/clang/test/InstallAPI/functions.test
index 527965303cb3..5b5fd1308842 100644
--- a/clang/test/InstallAPI/functions.test
+++ b/clang/test/InstallAPI/functions.test
@@ -4,7 +4,7 @@
 
 // RUN: clang-installapi -target arm64-apple-macos13.1 \
 // RUN: -I%t/usr/include -I%t/usr/local/include \
-// RUN: -install_name @rpath/lib/libfunctions.dylib \
+// RUN: -install_name @rpath/lib/libfunctions.dylib --filetype=tbd-v4 \
 // RUN: %t/inputs.json -o %t/outputs.tbd 2>&1 | FileCheck %s --allow-empty
 // RUN: llvm-readtapi -compare %t/outputs.tbd %t/expected.tbd 2>&1 | FileCheck %s --allow-empty
 
diff --git a/clang/test/SemaCXX/cxx23-assume.cpp b/clang/test/SemaCXX/cxx23-assume.cpp
index 2d7c9b174d90..478da092471a 100644
--- a/clang/test/SemaCXX/cxx23-assume.cpp
+++ b/clang/test/SemaCXX/cxx23-assume.cpp
@@ -126,3 +126,13 @@ static_assert(f5<D>() == 1); // expected-note 3 {{while checking constraint sati
 static_assert(f5<double>() == 2);
 static_assert(f5<E>() == 1); // expected-note {{while checking constraint satisfaction}} expected-note {{in instantiation of}}
 static_assert(f5<F>() == 2); // expected-note {{while checking constraint satisfaction}} expected-note {{in instantiation of}}
+
+// Do not validate assumptions whose evaluation would have side-effects.
+constexpr int foo() {
+  int a = 0;
+  [[assume(a++)]] [[assume(++a)]]; // expected-warning 2 {{has side effects that will be discarded}} ext-warning 2 {{C++23 extension}}
+  [[assume((a+=1))]]; // expected-warning {{has side effects that will be discarded}} ext-warning {{C++23 extension}}
+  return a;
+}
+
+static_assert(foo() == 0);
diff --git a/clang/test/SemaCXX/lambda-expressions.cpp b/clang/test/SemaCXX/lambda-expressions.cpp
index 0516a5da31ae..389002ab0e34 100644
--- a/clang/test/SemaCXX/lambda-expressions.cpp
+++ b/clang/test/SemaCXX/lambda-expressions.cpp
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -std=c++11 -Wno-unused-value -fsyntax-only -verify=expected,expected-cxx14,cxx11 -fblocks %s
 // RUN: %clang_cc1 -std=c++14 -Wno-unused-value -fsyntax-only -verify -verify=expected-cxx14 -fblocks %s
 // RUN: %clang_cc1 -std=c++17 -Wno-unused-value -verify -ast-dump -fblocks %s | FileCheck %s
 
@@ -558,8 +559,8 @@ struct B {
   int x;
   A a = [&] { int y = x; };
   A b = [&] { [&] { [&] { int y = x; }; }; };
-  A d = [&](auto param) { int y = x; };
-  A e = [&](auto param) { [&] { [&](auto param2) { int y = x; }; }; };
+  A d = [&](auto param) { int y = x; }; // cxx11-error {{'auto' not allowed in lambda parameter}}
+  A e = [&](auto param) { [&] { [&](auto param2) { int y = x; }; }; }; // cxx11-error 2 {{'auto' not allowed in lambda parameter}}
 };
 
 B<int> b;
@@ -589,6 +590,7 @@ struct S1 {
 void foo1() {
   auto s0 = S1{[name=]() {}}; // expected-error 2 {{expected expression}}
   auto s1 = S1{[name=name]() {}}; // expected-error {{use of undeclared identifier 'name'; did you mean 'name1'?}}
+                                  // cxx11-warning@-1 {{initialized lambda captures are a C++14 extension}}
 }
 }
 
@@ -604,7 +606,7 @@ namespace PR25627_dont_odr_use_local_consts {
 
 namespace ConversionOperatorDoesNotHaveDeducedReturnType {
   auto x = [](int){};
-  auto y = [](auto &v) -> void { v.n = 0; };
+  auto y = [](auto &v) -> void { v.n = 0; }; // cxx11-error {{'auto' not allowed in lambda parameter}} cxx11-note {{candidate function not viable}} cxx11-note {{conversion candidate}}
   using T = decltype(x);
   using U = decltype(y);
   using ExpectedTypeT = void (*)(int);
@@ -624,22 +626,22 @@ namespace ConversionOperatorDoesNotHaveDeducedReturnType {
     template<typename T>
       friend constexpr U::operator ExpectedTypeU<T>() const noexcept;
 #else
-    friend auto T::operator()(int) const;
+    friend auto T::operator()(int) const; // cxx11-error {{'auto' return without trailing return type; deduced return types are a C++14 extension}}
     friend T::operator ExpectedTypeT() const;
 
     template<typename T>
-      friend void U::operator()(T&) const;
+      friend void U::operator()(T&) const; // cxx11-error {{friend declaration of 'operator()' does not match any declaration}}
     // FIXME: This should not match, as above.
     template<typename T>
-      friend U::operator ExpectedTypeU<T>() const;
+      friend U::operator ExpectedTypeU<T>() const; // cxx11-error {{friend declaration of 'operator void (*)(type-parameter-0-0 &)' does not match any declaration}}
 #endif
 
   private:
     int n;
   };
 
-  // Should be OK: lambda's call operator is a friend.
-  void use(X &x) { y(x); }
+  // Should be OK in C++14 and later: lambda's call operator is a friend.
+  void use(X &x) { y(x); } // cxx11-error {{no matching function for call to object}}
 
   // This used to crash in return type deduction for the conversion opreator.
   struct A { int n; void f() { +[](decltype(n)) {}; } };
@@ -733,6 +735,8 @@ void GH67492() {
   auto lambda = (test, []() noexcept(true) {});
 }
 
+// FIXME: This currently causes clang to crash in C++11 mode.
+#if __cplusplus >= 201402L
 namespace GH83267 {
 auto l = [](auto a) { return 1; };
 using type = decltype(l);
@@ -747,3 +751,4 @@ using t = decltype(ll);
 template auto t::operator()<int>(int a) const; // expected-note {{in instantiation}}
 
 }
+#endif
diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl
new file mode 100644
index 000000000000..4c0e5315ce53
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected
+
+float2 test_no_second_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_clamp(p0);
+  // expected-error@-1 {{too few arguments to function call, expected 3, have 1}}
+}
+
+float2 test_no_third_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_clamp(p0, p0);
+  // expected-error@-1 {{too few arguments to function call, expected 3, have 2}}
+}
+
+float2 test_too_many_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_clamp(p0, p0, p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 3, have 4}}
+}
+
+float2 test_clamp_no_second_arg(float2 p0) {
+  return clamp(p0);
+  // expected-error@-1 {{no matching function for call to 'clamp'}}
+}
+
+float2 test_clamp_vector_size_mismatch(float3 p0, float2 p1) {
+  return clamp(p0, p0, p1);
+  // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}}
+}
+
+float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}}
+}
+
+float test_clamp_scalar_mismatch(float p0, half p1) {
+  return clamp(p1, p0, p1);
+  // expected-error@-1 {{call to 'clamp' is ambiguous}}
+}
+
+float2 test_clamp_element_type_mismatch(half2 p0, float2 p1) {
+  return clamp(p1, p0, p1);
+  // expected-error@-1 {{call to 'clamp' is ambiguous}}
+}
+
+float2 test_builtin_clamp_float2_splat(float p0, float2 p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float3 test_builtin_clamp_float3_splat(float p0, float3 p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float4 test_builtin_clamp_float4_splat(float p0, float4 p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float2 test_clamp_float2_int_splat(float2 p0, int p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float3 test_clamp_float3_int_splat(float3 p0, int p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float2 test_builtin_clamp_int_vect_to_float_vec_promotion(int2 p0, float p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float test_builtin_clamp_bool_type_promotion(bool p0) {
+  return __builtin_hlsl_elementwise_clamp(p0, p0, p0);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'bool')}}
+}
+
+float builtin_bool_to_float_type_promotion(float p0, bool p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p0, p1);
+  // expected-error@-1 {{3rd argument must be a floating point type (was 'bool')}}
+}
+
+float builtin_bool_to_float_type_promotion2(bool p0, float p1) {
+  return __builtin_hlsl_elementwise_clamp(p1, p0, p1);
+  // expected-error@-1 {{2nd argument must be a floating point type (was 'bool')}}
+}
+
+float builtin_clamp_int_to_float_promotion(float p0, int p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p0, p1);
+  // expected-error@-1 {{3rd argument must be a floating point type (was 'int')}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
index 0b6843591455..97ce931bf1b5 100644
--- a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
@@ -72,15 +72,15 @@ float2 test_builtin_mad_int_vect_to_float_vec_promotion(int2 p0, float p1) {
 
 float builtin_bool_to_float_type_promotion(float p0, bool p1) {
   return __builtin_hlsl_mad(p0, p0, p1);
-  // expected-error@-1 {{3rd argument must be a vector, integer or floating point type (was 'bool')}}
+  // expected-error@-1 {{3rd argument must be a floating point type (was 'bool')}}
 }
 
 float builtin_bool_to_float_type_promotion2(bool p0, float p1) {
   return __builtin_hlsl_mad(p1, p0, p1);
-  // expected-error@-1 {{2nd argument must be a vector, integer or floating point type (was 'bool')}}
+  // expected-error@-1 {{2nd argument must be a floating point type (was 'bool')}}
 }
 
 float builtin_mad_int_to_float_promotion(float p0, int p1) {
   return __builtin_hlsl_mad(p0, p0, p1);
-  // expected-error@-1 {{arguments are of different types ('double' vs 'int')}}
+  // expected-error@-1 {{3rd argument must be a floating point type (was 'int')}}
 }
diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp
index bac209a28da9..b7ea0d003a52 100644
--- a/clang/test/SemaTemplate/concepts.cpp
+++ b/clang/test/SemaTemplate/concepts.cpp
@@ -1085,3 +1085,32 @@ template void Struct<void>::bar<>();
 template int Struct<void>::field<1, 2>;
 
 }
+
+namespace GH64808 {
+
+template <class T> struct basic_sender {
+  T func;
+  basic_sender(T) : func(T()) {}
+};
+
+auto a = basic_sender{[](auto... __captures) {
+  return []() // #note-a-1
+    requires((__captures, ...), false) // #note-a-2
+  {};
+}()};
+
+auto b = basic_sender{[](auto... __captures) {
+  return []()
+    requires([](int, double) { return true; }(decltype(__captures)()...))
+  {};
+}(1, 2.33)};
+
+void foo() {
+  a.func();
+  // expected-error@-1{{no matching function for call}}
+  // expected-note@#note-a-1{{constraints not satisfied}}
+  // expected-note@#note-a-2{{evaluated to false}}
+  b.func();
+}
+
+} // namespace GH64808
diff --git a/clang/tools/clang-format/.clang-format b/clang/tools/clang-format/.clang-format
index f95602cab0f7..d7331b3c8cf0 100644
--- a/clang/tools/clang-format/.clang-format
+++ b/clang/tools/clang-format/.clang-format
@@ -1,6 +1 @@
-BasedOnStyle: LLVM
-InsertBraces: true
-InsertNewlineAtEOF: true
-LineEnding: LF
-RemoveBracesLLVM: true
-RemoveParentheses: ReturnStatement
+BasedOnStyle: clang-format
diff --git a/clang/tools/clang-installapi/CMakeLists.txt b/clang/tools/clang-installapi/CMakeLists.txt
index e05f4eac3ad1..b90ffc847b15 100644
--- a/clang/tools/clang-installapi/CMakeLists.txt
+++ b/clang/tools/clang-installapi/CMakeLists.txt
@@ -2,13 +2,20 @@ set(LLVM_LINK_COMPONENTS
   Support
   TargetParser
   TextAPI
+  TextAPIBinaryReader
   Option
   )
 
+set(LLVM_TARGET_DEFINITIONS InstallAPIOpts.td)
+tablegen(LLVM InstallAPIOpts.inc -gen-opt-parser-defs)
+add_public_tablegen_target(InstallAPIDriverOptions)
+
 add_clang_tool(clang-installapi
   ClangInstallAPI.cpp
   Options.cpp
 
+  DEPENDS
+  InstallAPIDriverOptions
   GENERATE_DRIVER
   )
 
@@ -22,3 +29,4 @@ clang_target_link_libraries(clang-installapi
   clangTooling
   clangSerialization
   )
+
diff --git a/clang/tools/clang-installapi/ClangInstallAPI.cpp b/clang/tools/clang-installapi/ClangInstallAPI.cpp
index 15b0baee88bc..d758f7311790 100644
--- a/clang/tools/clang-installapi/ClangInstallAPI.cpp
+++ b/clang/tools/clang-installapi/ClangInstallAPI.cpp
@@ -14,12 +14,12 @@
 #include "Options.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticFrontend.h"
-#include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/InstallAPI/Frontend.h"
 #include "clang/InstallAPI/FrontendRecords.h"
+#include "clang/InstallAPI/InstallAPIDiagnostic.h"
 #include "clang/InstallAPI/MachO.h"
 #include "clang/Tooling/Tooling.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -92,22 +92,8 @@ static bool run(ArrayRef<const char *> Args, const char *ProgName) {
   IntrusiveRefCntPtr<clang::FileManager> FM(
       new FileManager(clang::FileSystemOptions(), OverlayFileSystem));
 
-  // Set up driver to parse input arguments.
-  auto DriverArgs = llvm::ArrayRef(Args).slice(1);
-  clang::driver::Driver Driver(ProgName, llvm::sys::getDefaultTargetTriple(),
-                               *Diag, "clang installapi tool");
-  auto TargetAndMode =
-      clang::driver::ToolChain::getTargetAndModeFromProgramName(ProgName);
-  Driver.setTargetAndMode(TargetAndMode);
-  bool HasError = false;
-  llvm::opt::InputArgList ArgList =
-      Driver.ParseArgStrings(DriverArgs, /*UseDriverMode=*/true, HasError);
-  if (HasError)
-    return EXIT_FAILURE;
-  Driver.setCheckInputsExist(false);
-
-  // Capture InstallAPI specific options and diagnose any option errors.
-  Options Opts(*Diag, FM.get(), ArgList);
+  // Capture all options and diagnose any errors.
+  Options Opts(*Diag, FM.get(), Args, ProgName);
   if (Diag->hasErrorOccurred())
     return EXIT_FAILURE;
 
@@ -130,6 +116,7 @@ static bool run(ArrayRef<const char *> Args, const char *ProgName) {
     for (const HeaderType Type :
          {HeaderType::Public, HeaderType::Private, HeaderType::Project}) {
       Ctx.Slice = std::make_shared<FrontendRecordsSlice>(Trip);
+      Ctx.Verifier->setTarget(Targ);
       Ctx.Type = Type;
       if (!runFrontend(ProgName, Opts.DriverOpts.Verbose, Ctx,
                        InMemoryFileSystem.get(), Opts.getClangFrontendArgs()))
@@ -138,6 +125,9 @@ static bool run(ArrayRef<const char *> Args, const char *ProgName) {
     }
   }
 
+  if (Ctx.Verifier->getState() == DylibVerifier::Result::Invalid)
+    return EXIT_FAILURE;
+
   // After symbols have been collected, prepare to write output.
   auto Out = CI->createOutputFile(Ctx.OutputLoc, /*Binary=*/false,
                                   /*RemoveFileOnSignal=*/false,
@@ -147,13 +137,7 @@ static bool run(ArrayRef<const char *> Args, const char *ProgName) {
     return EXIT_FAILURE;
 
   // Assign attributes for serialization.
-  auto Symbols = std::make_unique<SymbolSet>();
-  for (const auto &FR : FrontendResults) {
-    SymbolConverter Converter(Symbols.get(), FR->getTarget());
-    FR->visit(Converter);
-  }
-
-  InterfaceFile IF(std::move(Symbols));
+  InterfaceFile IF(Ctx.Verifier->getExports());
   for (const auto &TargetInfo : Opts.DriverOpts.Targets) {
     IF.addTarget(TargetInfo.first);
     IF.setFromBinaryAttrs(Ctx.BA, TargetInfo.first);
@@ -161,7 +145,8 @@ static bool run(ArrayRef<const char *> Args, const char *ProgName) {
 
   // Write output file and perform CI cleanup.
   if (auto Err = TextAPIWriter::writeToStream(*Out, IF, Ctx.FT)) {
-    Diag->Report(diag::err_cannot_open_file) << Ctx.OutputLoc;
+    Diag->Report(diag::err_cannot_write_file)
+        << Ctx.OutputLoc << std::move(Err);
     CI->clearOutputFiles(/*EraseFiles=*/true);
     return EXIT_FAILURE;
   }
diff --git a/clang/tools/clang-installapi/InstallAPIOpts.td b/clang/tools/clang-installapi/InstallAPIOpts.td
new file mode 100644
index 000000000000..87f4c3327e84
--- /dev/null
+++ b/clang/tools/clang-installapi/InstallAPIOpts.td
@@ -0,0 +1,31 @@
+//===--- InstallAPIOpts.td ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the specific options for InstallAPI.
+//
+//===----------------------------------------------------------------------===//
+
+// Include the common option parsing interfaces.
+include "llvm/Option/OptParser.td"
+
+
+/////////
+// Options
+
+// TextAPI options.
+def filetype : Joined<["--"], "filetype=">,
+  HelpText<"Specify the output file type (tbd-v4 or tbd-v5)">;
+
+// Verification options.
+def verify_against : Separate<["-"], "verify-against">,
+  HelpText<"Verify the specified dynamic library/framework against the headers">;
+def verify_against_EQ : Joined<["--"], "verify-against=">, Alias<verify_against>;
+def verify_mode_EQ : Joined<["--"], "verify-mode=">,
+  HelpText<"Specify the severity and extend of the validation. Valid modes are ErrorsOnly, ErrorsAndWarnings, and Pedantic.">;
+def demangle : Flag<["--", "-"], "demangle">,
+  HelpText<"Demangle symbols when printing warnings and errors">;
diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp
index 70cb80f0fdb3..e5ff8a1aaa4f 100644
--- a/clang/tools/clang-installapi/Options.cpp
+++ b/clang/tools/clang-installapi/Options.cpp
@@ -10,35 +10,85 @@
 #include "clang/Driver/Driver.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/InstallAPI/FileList.h"
+#include "clang/InstallAPI/InstallAPIDiagnostic.h"
 #include "llvm/Support/Program.h"
 #include "llvm/TargetParser/Host.h"
+#include "llvm/TextAPI/DylibReader.h"
+#include "llvm/TextAPI/TextAPIWriter.h"
 
-using namespace clang::driver;
-using namespace clang::driver::options;
+using namespace llvm;
 using namespace llvm::opt;
 using namespace llvm::MachO;
 
+namespace drv = clang::driver::options;
+
 namespace clang {
 namespace installapi {
 
+/// Create prefix string literals used in InstallAPIOpts.td.
+#define PREFIX(NAME, VALUE)                                                    \
+  static constexpr llvm::StringLiteral NAME##_init[] = VALUE;                  \
+  static constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                   \
+      NAME##_init, std::size(NAME##_init) - 1);
+#include "InstallAPIOpts.inc"
+#undef PREFIX
+
+static constexpr const llvm::StringLiteral PrefixTable_init[] =
+#define PREFIX_UNION(VALUES) VALUES
+#include "InstallAPIOpts.inc"
+#undef PREFIX_UNION
+    ;
+static constexpr const ArrayRef<StringLiteral>
+    PrefixTable(PrefixTable_init, std::size(PrefixTable_init) - 1);
+
+/// Create table mapping all options defined in InstallAPIOpts.td.
+static constexpr OptTable::Info InfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS,         \
+               VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES)                   \
+  {PREFIX, NAME,  HELPTEXT,   METAVAR,     OPT_##ID,    Option::KIND##Class,   \
+   PARAM,  FLAGS, VISIBILITY, OPT_##GROUP, OPT_##ALIAS, ALIASARGS,             \
+   VALUES},
+#include "InstallAPIOpts.inc"
+#undef OPTION
+};
+
+namespace {
+
+/// \brief Create OptTable class for parsing actual command line arguments.
+class DriverOptTable : public opt::PrecomputedOptTable {
+public:
+  DriverOptTable() : PrecomputedOptTable(InfoTable, PrefixTable) {}
+};
+
+} // end anonymous namespace.
+
+static llvm::opt::OptTable *createDriverOptTable() {
+  return new DriverOptTable();
+}
+
 bool Options::processDriverOptions(InputArgList &Args) {
   // Handle inputs.
-  llvm::append_range(DriverOpts.FileLists, Args.getAllArgValues(OPT_INPUT));
+  llvm::append_range(DriverOpts.FileLists,
+                     Args.getAllArgValues(drv::OPT_INPUT));
 
   // Handle output.
   SmallString<PATH_MAX> OutputPath;
-  if (auto *Arg = Args.getLastArg(OPT_o)) {
+  if (auto *Arg = Args.getLastArg(drv::OPT_o)) {
     OutputPath = Arg->getValue();
     if (OutputPath != "-")
       FM->makeAbsolutePath(OutputPath);
     DriverOpts.OutputPath = std::string(OutputPath);
   }
+  if (DriverOpts.OutputPath.empty()) {
+    Diags->Report(diag::err_no_output_file);
+    return false;
+  }
 
   // Do basic error checking first for mixing -target and -arch options.
-  auto *ArgArch = Args.getLastArgNoClaim(OPT_arch);
-  auto *ArgTarget = Args.getLastArgNoClaim(OPT_target);
+  auto *ArgArch = Args.getLastArgNoClaim(drv::OPT_arch);
+  auto *ArgTarget = Args.getLastArgNoClaim(drv::OPT_target);
   auto *ArgTargetVariant =
-      Args.getLastArgNoClaim(OPT_darwin_target_variant_triple);
+      Args.getLastArgNoClaim(drv::OPT_darwin_target_variant_triple);
   if (ArgArch && (ArgTarget || ArgTargetVariant)) {
     Diags->Report(clang::diag::err_drv_argument_not_allowed_with)
         << ArgArch->getAsString(Args)
@@ -46,7 +96,7 @@ bool Options::processDriverOptions(InputArgList &Args) {
     return false;
   }
 
-  auto *ArgMinTargetOS = Args.getLastArgNoClaim(OPT_mtargetos_EQ);
+  auto *ArgMinTargetOS = Args.getLastArgNoClaim(drv::OPT_mtargetos_EQ);
   if ((ArgTarget || ArgTargetVariant) && ArgMinTargetOS) {
     Diags->Report(clang::diag::err_drv_cannot_mix_options)
         << ArgTarget->getAsString(Args) << ArgMinTargetOS->getAsString(Args);
@@ -55,7 +105,7 @@ bool Options::processDriverOptions(InputArgList &Args) {
 
   // Capture target triples first.
   if (ArgTarget) {
-    for (const Arg *A : Args.filtered(OPT_target)) {
+    for (const Arg *A : Args.filtered(drv::OPT_target)) {
       A->claim();
       llvm::Triple TargetTriple(A->getValue());
       Target TAPITarget = Target(TargetTriple);
@@ -69,30 +119,32 @@ bool Options::processDriverOptions(InputArgList &Args) {
     }
   }
 
-  DriverOpts.Verbose = Args.hasArgNoClaim(OPT_v);
+  DriverOpts.Verbose = Args.hasArgNoClaim(drv::OPT_v);
 
   return true;
 }
 
 bool Options::processLinkerOptions(InputArgList &Args) {
-  // TODO: add error handling.
-
-  // Required arguments.
-  if (const Arg *A = Args.getLastArg(options::OPT_install__name))
+  // Handle required arguments.
+  if (const Arg *A = Args.getLastArg(drv::OPT_install__name))
     LinkerOpts.InstallName = A->getValue();
+  if (LinkerOpts.InstallName.empty()) {
+    Diags->Report(diag::err_no_install_name);
+    return false;
+  }
 
   // Defaulted or optional arguments.
-  if (auto *Arg = Args.getLastArg(OPT_current__version))
+  if (auto *Arg = Args.getLastArg(drv::OPT_current__version))
     LinkerOpts.CurrentVersion.parse64(Arg->getValue());
 
-  if (auto *Arg = Args.getLastArg(OPT_compatibility__version))
+  if (auto *Arg = Args.getLastArg(drv::OPT_compatibility__version))
     LinkerOpts.CompatVersion.parse64(Arg->getValue());
 
-  LinkerOpts.IsDylib = Args.hasArg(OPT_dynamiclib);
+  LinkerOpts.IsDylib = Args.hasArg(drv::OPT_dynamiclib);
 
-  LinkerOpts.AppExtensionSafe =
-      Args.hasFlag(OPT_fapplication_extension, OPT_fno_application_extension,
-                   /*Default=*/LinkerOpts.AppExtensionSafe);
+  LinkerOpts.AppExtensionSafe = Args.hasFlag(
+      drv::OPT_fapplication_extension, drv::OPT_fno_application_extension,
+      /*Default=*/LinkerOpts.AppExtensionSafe);
 
   if (::getenv("LD_NO_ENCRYPT") != nullptr)
     LinkerOpts.AppExtensionSafe = true;
@@ -105,7 +157,7 @@ bool Options::processLinkerOptions(InputArgList &Args) {
 bool Options::processFrontendOptions(InputArgList &Args) {
   // Do not claim any arguments, as they will be passed along for CC1
   // invocations.
-  if (auto *A = Args.getLastArgNoClaim(OPT_x)) {
+  if (auto *A = Args.getLastArgNoClaim(drv::OPT_x)) {
     FEOpts.LangMode = llvm::StringSwitch<clang::Language>(A->getValue())
                           .Case("c", clang::Language::C)
                           .Case("c++", clang::Language::CXX)
@@ -119,8 +171,8 @@ bool Options::processFrontendOptions(InputArgList &Args) {
       return false;
     }
   }
-  for (auto *A : Args.filtered(OPT_ObjC, OPT_ObjCXX)) {
-    if (A->getOption().matches(OPT_ObjC))
+  for (auto *A : Args.filtered(drv::OPT_ObjC, drv::OPT_ObjCXX)) {
+    if (A->getOption().matches(drv::OPT_ObjC))
       FEOpts.LangMode = clang::Language::ObjC;
     else
       FEOpts.LangMode = clang::Language::ObjCXX;
@@ -129,9 +181,77 @@ bool Options::processFrontendOptions(InputArgList &Args) {
   return true;
 }
 
+std::vector<const char *>
+Options::processAndFilterOutInstallAPIOptions(ArrayRef<const char *> Args) {
+  std::unique_ptr<llvm::opt::OptTable> Table;
+  Table.reset(createDriverOptTable());
+
+  unsigned MissingArgIndex, MissingArgCount;
+  auto ParsedArgs = Table->ParseArgs(Args.slice(1), MissingArgIndex,
+                                     MissingArgCount, Visibility());
+
+  // Capture InstallAPI only driver options.
+  DriverOpts.Demangle = ParsedArgs.hasArg(OPT_demangle);
+
+  if (auto *A = ParsedArgs.getLastArg(OPT_filetype)) {
+    DriverOpts.OutFT = TextAPIWriter::parseFileType(A->getValue());
+    if (DriverOpts.OutFT == FileType::Invalid) {
+      Diags->Report(clang::diag::err_drv_invalid_value)
+          << A->getAsString(ParsedArgs) << A->getValue();
+      return {};
+    }
+  }
+
+  if (const Arg *A = ParsedArgs.getLastArg(OPT_verify_mode_EQ)) {
+    DriverOpts.VerifyMode =
+        StringSwitch<VerificationMode>(A->getValue())
+            .Case("ErrorsOnly", VerificationMode::ErrorsOnly)
+            .Case("ErrorsAndWarnings", VerificationMode::ErrorsAndWarnings)
+            .Case("Pedantic", VerificationMode::Pedantic)
+            .Default(VerificationMode::Invalid);
+
+    if (DriverOpts.VerifyMode == VerificationMode::Invalid) {
+      Diags->Report(clang::diag::err_drv_invalid_value)
+          << A->getAsString(ParsedArgs) << A->getValue();
+      return {};
+    }
+  }
+
+  if (const Arg *A = ParsedArgs.getLastArg(OPT_verify_against))
+    DriverOpts.DylibToVerify = A->getValue();
+
+  /// Any unclaimed arguments should be forwarded to the clang driver.
+  std::vector<const char *> ClangDriverArgs(ParsedArgs.size());
+  for (const Arg *A : ParsedArgs) {
+    if (A->isClaimed())
+      continue;
+    llvm::copy(A->getValues(), std::back_inserter(ClangDriverArgs));
+  }
+  return ClangDriverArgs;
+}
+
 Options::Options(DiagnosticsEngine &Diag, FileManager *FM,
-                 InputArgList &ArgList)
+                 ArrayRef<const char *> Args, const StringRef ProgName)
     : Diags(&Diag), FM(FM) {
+
+  // First process InstallAPI specific options.
+  auto DriverArgs = processAndFilterOutInstallAPIOptions(Args);
+  if (Diags->hasErrorOccurred())
+    return;
+
+  // Set up driver to parse remaining input arguments.
+  clang::driver::Driver Driver(ProgName, llvm::sys::getDefaultTargetTriple(),
+                               *Diags, "clang installapi tool");
+  auto TargetAndMode =
+      clang::driver::ToolChain::getTargetAndModeFromProgramName(ProgName);
+  Driver.setTargetAndMode(TargetAndMode);
+  bool HasError = false;
+  llvm::opt::InputArgList ArgList =
+      Driver.ParseArgStrings(DriverArgs, /*UseDriverMode=*/true, HasError);
+  if (HasError)
+    return;
+  Driver.setCheckInputsExist(false);
+
   if (!processDriverOptions(ArgList))
     return;
 
@@ -145,7 +265,6 @@ Options::Options(DiagnosticsEngine &Diag, FileManager *FM,
   for (const Arg *A : ArgList) {
     if (A->isClaimed())
       continue;
-
     FrontendArgs.emplace_back(A->getSpelling());
     llvm::copy(A->getValues(), std::back_inserter(FrontendArgs));
   }
@@ -172,16 +291,40 @@ InstallAPIContext Options::createContext() {
   for (const std::string &ListPath : DriverOpts.FileLists) {
     auto Buffer = FM->getBufferForFile(ListPath);
     if (auto Err = Buffer.getError()) {
-      Diags->Report(diag::err_cannot_open_file) << ListPath;
+      Diags->Report(diag::err_cannot_open_file) << ListPath << Err.message();
       return Ctx;
     }
     if (auto Err = FileListReader::loadHeaders(std::move(Buffer.get()),
                                                Ctx.InputHeaders)) {
-      Diags->Report(diag::err_cannot_open_file) << ListPath;
+      Diags->Report(diag::err_cannot_open_file) << ListPath << std::move(Err);
       return Ctx;
     }
   }
 
+  // Parse binary dylib and initialize verifier.
+  if (DriverOpts.DylibToVerify.empty()) {
+    Ctx.Verifier = std::make_unique<DylibVerifier>();
+    return Ctx;
+  }
+
+  auto Buffer = FM->getBufferForFile(DriverOpts.DylibToVerify);
+  if (auto Err = Buffer.getError()) {
+    Diags->Report(diag::err_cannot_open_file)
+        << DriverOpts.DylibToVerify << Err.message();
+    return Ctx;
+  }
+
+  DylibReader::ParseOption PO;
+  PO.Undefineds = false;
+  Expected<Records> Slices =
+      DylibReader::readFile((*Buffer)->getMemBufferRef(), PO);
+  if (auto Err = Slices.takeError()) {
+    Diags->Report(diag::err_cannot_open_file) << DriverOpts.DylibToVerify;
+    return Ctx;
+  }
+
+  Ctx.Verifier = std::make_unique<DylibVerifier>(
+      std::move(*Slices), Diags, DriverOpts.VerifyMode, DriverOpts.Demangle);
   return Ctx;
 }
 
diff --git a/clang/tools/clang-installapi/Options.h b/clang/tools/clang-installapi/Options.h
index e218d57b3051..2beeafc86bb0 100644
--- a/clang/tools/clang-installapi/Options.h
+++ b/clang/tools/clang-installapi/Options.h
@@ -11,8 +11,10 @@
 
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
+#include "clang/Driver/Driver.h"
 #include "clang/Frontend/FrontendOptions.h"
 #include "clang/InstallAPI/Context.h"
+#include "clang/InstallAPI/DylibVerifier.h"
 #include "clang/InstallAPI/MachO.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
@@ -32,12 +34,21 @@ struct DriverOptions {
   /// \brief Mappings of target triples & tapi targets to build for.
   std::map<llvm::MachO::Target, llvm::Triple> Targets;
 
+  /// \brief Path to binary dylib for comparing.
+  std::string DylibToVerify;
+
   /// \brief Output path.
   std::string OutputPath;
 
   /// \brief File encoding to print.
   FileType OutFT = FileType::TBD_V5;
 
+  /// \brief Verification mode for comparing symbols.
+  VerificationMode VerifyMode = VerificationMode::Pedantic;
+
+  /// \brief Print demangled symbols when reporting errors.
+  bool Demangle = false;
+
   /// \brief Print verbose output.
   bool Verbose = false;
 };
@@ -69,6 +80,8 @@ private:
   bool processDriverOptions(llvm::opt::InputArgList &Args);
   bool processLinkerOptions(llvm::opt::InputArgList &Args);
   bool processFrontendOptions(llvm::opt::InputArgList &Args);
+  std::vector<const char *>
+  processAndFilterOutInstallAPIOptions(ArrayRef<const char *> Args);
 
 public:
   /// The various options grouped together.
@@ -83,7 +96,7 @@ public:
 
   /// \brief Constructor for options.
   Options(clang::DiagnosticsEngine &Diag, FileManager *FM,
-          llvm::opt::InputArgList &Args);
+          ArrayRef<const char *> Args, const StringRef ProgName);
 
   /// \brief Get CC1 arguments after extracting out the irrelevant
   /// ones.
@@ -95,6 +108,16 @@ private:
   std::vector<std::string> FrontendArgs;
 };
 
+enum ID {
+  OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS,         \
+               VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES)                   \
+  OPT_##ID,
+#include "InstallAPIOpts.inc"
+  LastOption
+#undef OPTION
+};
+
 } // namespace installapi
 } // namespace clang
 #endif
diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp
index 376025e3605b..83b5bbb71f52 100644
--- a/clang/tools/driver/driver.cpp
+++ b/clang/tools/driver/driver.cpp
@@ -28,6 +28,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/OptTable.h"
 #include "llvm/Option/Option.h"
@@ -41,7 +42,6 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
-#include "llvm/Support/Regex.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/StringSaver.h"
 #include "llvm/Support/TargetSelect.h"
@@ -73,136 +73,8 @@ std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) {
   return llvm::sys::fs::getMainExecutable(Argv0, P);
 }
 
-static const char *GetStableCStr(std::set<std::string> &SavedStrings,
-                                 StringRef S) {
-  return SavedStrings.insert(std::string(S)).first->c_str();
-}
-
-/// ApplyOneQAOverride - Apply a list of edits to the input argument lists.
-///
-/// The input string is a space separated list of edits to perform,
-/// they are applied in order to the input argument lists. Edits
-/// should be one of the following forms:
-///
-///  '#': Silence information about the changes to the command line arguments.
-///
-///  '^': Add FOO as a new argument at the beginning of the command line.
-///
-///  '+': Add FOO as a new argument at the end of the command line.
-///
-///  's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command
-///  line.
-///
-///  'xOPTION': Removes all instances of the literal argument OPTION.
-///
-///  'XOPTION': Removes all instances of the literal argument OPTION,
-///  and the following argument.
-///
-///  'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox'
-///  at the end of the command line.
-///
-/// \param OS - The stream to write edit information to.
-/// \param Args - The vector of command line arguments.
-/// \param Edit - The override command to perform.
-/// \param SavedStrings - Set to use for storing string representations.
-static void ApplyOneQAOverride(raw_ostream &OS,
-                               SmallVectorImpl<const char*> &Args,
-                               StringRef Edit,
-                               std::set<std::string> &SavedStrings) {
-  // This does not need to be efficient.
-
-  if (Edit[0] == '^') {
-    const char *Str =
-      GetStableCStr(SavedStrings, Edit.substr(1));
-    OS << "### Adding argument " << Str << " at beginning\n";
-    Args.insert(Args.begin() + 1, Str);
-  } else if (Edit[0] == '+') {
-    const char *Str =
-      GetStableCStr(SavedStrings, Edit.substr(1));
-    OS << "### Adding argument " << Str << " at end\n";
-    Args.push_back(Str);
-  } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.ends_with("/") &&
-             Edit.slice(2, Edit.size() - 1).contains('/')) {
-    StringRef MatchPattern = Edit.substr(2).split('/').first;
-    StringRef ReplPattern = Edit.substr(2).split('/').second;
-    ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1);
-
-    for (unsigned i = 1, e = Args.size(); i != e; ++i) {
-      // Ignore end-of-line response file markers
-      if (Args[i] == nullptr)
-        continue;
-      std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]);
-
-      if (Repl != Args[i]) {
-        OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n";
-        Args[i] = GetStableCStr(SavedStrings, Repl);
-      }
-    }
-  } else if (Edit[0] == 'x' || Edit[0] == 'X') {
-    auto Option = Edit.substr(1);
-    for (unsigned i = 1; i < Args.size();) {
-      if (Option == Args[i]) {
-        OS << "### Deleting argument " << Args[i] << '\n';
-        Args.erase(Args.begin() + i);
-        if (Edit[0] == 'X') {
-          if (i < Args.size()) {
-            OS << "### Deleting argument " << Args[i] << '\n';
-            Args.erase(Args.begin() + i);
-          } else
-            OS << "### Invalid X edit, end of command line!\n";
-        }
-      } else
-        ++i;
-    }
-  } else if (Edit[0] == 'O') {
-    for (unsigned i = 1; i < Args.size();) {
-      const char *A = Args[i];
-      // Ignore end-of-line response file markers
-      if (A == nullptr)
-        continue;
-      if (A[0] == '-' && A[1] == 'O' &&
-          (A[2] == '\0' ||
-           (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' ||
-                             ('0' <= A[2] && A[2] <= '9'))))) {
-        OS << "### Deleting argument " << Args[i] << '\n';
-        Args.erase(Args.begin() + i);
-      } else
-        ++i;
-    }
-    OS << "### Adding argument " << Edit << " at end\n";
-    Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str()));
-  } else {
-    OS << "### Unrecognized edit: " << Edit << "\n";
-  }
-}
-
-/// ApplyQAOverride - Apply a space separated list of edits to the
-/// input argument lists. See ApplyOneQAOverride.
-static void ApplyQAOverride(SmallVectorImpl<const char*> &Args,
-                            const char *OverrideStr,
-                            std::set<std::string> &SavedStrings) {
-  raw_ostream *OS = &llvm::errs();
-
-  if (OverrideStr[0] == '#') {
-    ++OverrideStr;
-    OS = &llvm::nulls();
-  }
-
-  *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n";
-
-  // This does not need to be efficient.
-
-  const char *S = OverrideStr;
-  while (*S) {
-    const char *End = ::strchr(S, ' ');
-    if (!End)
-      End = S + strlen(S);
-    if (End != S)
-      ApplyOneQAOverride(*OS, Args, std::string(S, End), SavedStrings);
-    S = End;
-    if (*S != '\0')
-      ++S;
-  }
+static const char *GetStableCStr(llvm::StringSet<> &SavedStrings, StringRef S) {
+  return SavedStrings.insert(S).first->getKeyData();
 }
 
 extern int cc1_main(ArrayRef<const char *> Argv, const char *Argv0,
@@ -215,7 +87,7 @@ extern int cc1gen_reproducer_main(ArrayRef<const char *> Argv,
 
 static void insertTargetAndModeArgs(const ParsedClangName &NameParts,
                                     SmallVectorImpl<const char *> &ArgVector,
-                                    std::set<std::string> &SavedStrings) {
+                                    llvm::StringSet<> &SavedStrings) {
   // Put target and mode arguments at the start of argument list so that
   // arguments specified in command line could override them. Avoid putting
   // them at index 0, as an option like '-cc1' must remain the first.
@@ -419,12 +291,13 @@ int clang_main(int Argc, char **Argv, const llvm::ToolContext &ToolContext) {
     }
   }
 
-  std::set<std::string> SavedStrings;
+  llvm::StringSet<> SavedStrings;
   // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the
   // scenes.
   if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) {
     // FIXME: Driver shouldn't take extra initial argument.
-    ApplyQAOverride(Args, OverrideStr, SavedStrings);
+    driver::applyOverrideOptions(Args, OverrideStr, SavedStrings,
+                                 &llvm::errs());
   }
 
   std::string Path = GetExecutablePath(ToolContext.Path, CanonicalPrefixes);
diff --git a/clang/unittests/Format/.clang-format b/clang/unittests/Format/.clang-format
index f95602cab0f7..d7331b3c8cf0 100644
--- a/clang/unittests/Format/.clang-format
+++ b/clang/unittests/Format/.clang-format
@@ -1,6 +1 @@
-BasedOnStyle: LLVM
-InsertBraces: true
-InsertNewlineAtEOF: true
-LineEnding: LF
-RemoveBracesLLVM: true
-RemoveParentheses: ReturnStatement
+BasedOnStyle: clang-format
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index fc367a7a5a89..add92d3e1110 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -3802,6 +3802,27 @@ TEST_F(FormatTest, FormatsEnum) {
                "  // Comment 2\n"
                "  TWO,\n"
                "};");
+  verifyFormat("enum [[clang::enum_extensibility(open)]] E {\n"
+               "  // Comment 1\n"
+               "  ONE,\n"
+               "  // Comment 2\n"
+               "  TWO\n"
+               "};");
+  verifyFormat("enum [[nodiscard]] [[clang::enum_extensibility(open)]] E {\n"
+               "  // Comment 1\n"
+               "  ONE,\n"
+               "  // Comment 2\n"
+               "  TWO\n"
+               "};");
+  verifyFormat("enum [[clang::enum_extensibility(open)]] E { // foo\n"
+               "  A,\n"
+               "  // bar\n"
+               "  B\n"
+               "};",
+               "enum [[clang::enum_extensibility(open)]] E{// foo\n"
+               "                                           A,\n"
+               "                                           // bar\n"
+               "                                           B};");
 
   // Not enums.
   verifyFormat("enum X f() {\n"
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 21c18a03a4fc..b30ea64201bf 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2645,6 +2645,11 @@ TEST_F(TokenAnnotatorTest, StartOfName) {
   EXPECT_TOKEN(Tokens[2], tok::identifier, TT_Unknown);
   EXPECT_TOKEN(Tokens[3], tok::identifier, TT_Unknown);
   EXPECT_TOKEN(Tokens[4], tok::identifier, TT_StartOfName);
+
+  Tokens = annotate("@interface NSCoder (TestCoder)");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::at, TT_ObjCDecl);
+  EXPECT_TOKEN(Tokens[2], tok::identifier, TT_StartOfName);
 }
 
 TEST_F(TokenAnnotatorTest, BraceKind) {
diff --git a/compiler-rt/test/tsan/signal_reset.cpp b/compiler-rt/test/tsan/signal_reset.cpp
index 82758d882382..d76b7e5f3b5f 100644
--- a/compiler-rt/test/tsan/signal_reset.cpp
+++ b/compiler-rt/test/tsan/signal_reset.cpp
@@ -28,12 +28,12 @@ static void* reset(void *p) {
   struct sigaction act = {};
   for (int i = 0; i < 1000000; i++) {
     act.sa_handler = &handler;
-    if (sigaction(SIGPROF, &act, 0)) {
+    if (sigaction(SIGALRM, &act, 0)) {
       perror("sigaction");
       exit(1);
     }
     act.sa_handler = SIG_IGN;
-    if (sigaction(SIGPROF, &act, 0)) {
+    if (sigaction(SIGALRM, &act, 0)) {
       perror("sigaction");
       exit(1);
     }
@@ -44,7 +44,7 @@ static void* reset(void *p) {
 int main() {
   struct sigaction act = {};
   act.sa_handler = SIG_IGN;
-  if (sigaction(SIGPROF, &act, 0)) {
+  if (sigaction(SIGALRM, &act, 0)) {
     perror("sigaction");
     exit(1);
   }
@@ -53,7 +53,7 @@ int main() {
   t.it_value.tv_sec = 0;
   t.it_value.tv_usec = 10;
   t.it_interval = t.it_value;
-  if (setitimer(ITIMER_PROF, &t, 0)) {
+  if (setitimer(ITIMER_REAL, &t, 0)) {
     perror("setitimer");
     exit(1);
   }
diff --git a/libc/config/baremetal/api.td b/libc/config/baremetal/api.td
index d24c92e9e590..283647260779 100644
--- a/libc/config/baremetal/api.td
+++ b/libc/config/baremetal/api.td
@@ -68,7 +68,7 @@ def StdlibAPI : PublicAPI<"stdlib.h"> {
     "size_t",
     "__bsearchcompare_t",
     "__qsortcompare_t",
-    "__qsortrcompare_t",
+    "__atexithandler_t",
   ];
 }
 
diff --git a/libc/docs/gpu/building.rst b/libc/docs/gpu/building.rst
index dab21e1324d2..6d94134a407d 100644
--- a/libc/docs/gpu/building.rst
+++ b/libc/docs/gpu/building.rst
@@ -220,11 +220,15 @@ targets. This section will briefly describe their purpose.
   be used to enable host services for anyone looking to interface with the
   :ref:`RPC client<libc_gpu_rpc>`.
 
+.. _gpu_cmake_options:
+
 CMake options
 =============
 
 This section briefly lists a few of the CMake variables that specifically
-control the GPU build of the C library.
+control the GPU build of the C library. These options can be passed individually
+to each target using ``-DRUNTIMES_<target>_<variable>=<value>`` when using a
+standard runtime build.
 
 **LLVM_LIBC_FULL_BUILD**:BOOL
   This flag controls whether or not the libc build will generate its own
diff --git a/libc/docs/gpu/testing.rst b/libc/docs/gpu/testing.rst
index 9842a6752836..9f17159fb6d5 100644
--- a/libc/docs/gpu/testing.rst
+++ b/libc/docs/gpu/testing.rst
@@ -1,9 +1,9 @@
 .. _libc_gpu_testing:
 
 
-============================
-Testing the GPU libc library
-============================
+=========================
+Testing the GPU C library
+=========================
 
 .. note::
    Running GPU tests with high parallelism is likely to cause spurious failures,
@@ -14,24 +14,134 @@ Testing the GPU libc library
   :depth: 4
   :local:
 
-Testing Infrastructure
+Testing infrastructure
 ======================
 
-The testing support in LLVM's libc implementation for GPUs is designed to mimic
-the standard unit tests as much as possible. We use the :ref:`libc_gpu_rpc`
-support to provide the necessary utilities like printing from the GPU. Execution
-is performed by emitting a ``_start`` kernel from the GPU
-that is then called by an external loader utility. This is an example of how
-this can be done manually:
+The LLVM C library supports different kinds of :ref:`tests <build_and_test>`
+depending on the build configuration. The GPU target is considered a full build
+and therefore provides all of its own utilities to build and run the generated
+tests. Currently the GPU supports two kinds of tests.
+
+#. **Hermetic tests** - These are unit tests built with a test suite similar to
+   Google's ``gtest`` infrastructure. These use the same infrastructure as unit
+   tests except that the entire environment is self-hosted. This allows us to
+   run them on the GPU using our custom utilities. These are used to test the
+   majority of functional implementations.
+
+#. **Integration tests** - These are lightweight tests that simply call a
+   ``main`` function and checks if it returns non-zero. These are primarily used
+   to test interfaces that are sensitive to threading.
+
+The GPU uses the same testing infrastructure as the other supported ``libc``
+targets. We do this by treating the GPU as a standard hosted environment capable
+of launching a ``main`` function. Effectively, this means building our own
+startup libraries and loader.
+
+Testing utilities
+=================
+
+We provide two utilities to execute arbitrary programs on the GPU. That is the
+``loader`` and the ``start`` object.
+
+Startup object
+--------------
+
+This object mimics the standard object used by existing C library
+implementations. Its job is to perform the necessary setup prior to calling the
+``main`` function. In the GPU case, this means exporting GPU kernels that will
+perform the necessary operations. Here we use ``_begin`` and ``_end`` to handle
+calling global constructors and destructors while ``_start`` begins the standard
+execution. The following code block shows the implementation for AMDGPU
+architectures.
+
+.. code-block:: c++
+
+  extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
+  _begin(int argc, char **argv, char **env) {
+    LIBC_NAMESPACE::atexit(&LIBC_NAMESPACE::call_fini_array_callbacks);
+    LIBC_NAMESPACE::call_init_array_callbacks(argc, argv, env);
+  }
+
+  extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
+  _start(int argc, char **argv, char **envp, int *ret) {
+    __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
+  }
+
+  extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
+  _end(int retval) {
+    LIBC_NAMESPACE::exit(retval);
+  }
+
+Loader runtime
+--------------
+
+The startup object provides a GPU executable with callable kernels for the
+respective runtime. We can then define a minimal runtime that will launch these
+kernels on the given device. Currently we provide the ``amdhsa-loader`` and
+``nvptx-loader`` targeting the AMD HSA runtime and CUDA driver runtime
+respectively. By default these will launch with a single thread on the GPU.
 
 .. code-block:: sh
 
-   $> clang++ crt1.o test.cpp --target=amdgcn-amd-amdhsa -mcpu=gfx90a -flto
-   $> ./amdhsa_loader --threads 1 --blocks 1 a.out
+   $> clang++ crt1.o test.cpp --target=amdgcn-amd-amdhsa -mcpu=native -flto
+   $> amdhsa_loader --threads 1 --blocks 1 ./a.out
    Test Passed!
 
-Unlike the exported ``libcgpu.a``, the testing architecture can only support a
-single architecture at a time. This is either detected automatically, or set
-manually by the user using ``LIBC_GPU_TEST_ARCHITECTURE``. The latter is useful
-in cases where the user does not build LLVM's libc on machine with the GPU to
-use for testing.
+The loader utility will forward any arguments passed after the executable image
+to the program on the GPU as well as any set environment variables. The number
+of threads and blocks to be set can be controlled with ``--threads`` and
+``--blocks``. These also accept additional ``x``, ``y``, ``z`` variants for
+multidimensional grids.
+
+Running tests
+=============
+
+Tests will only be built and run if a GPU target architecture is set and the
+corresponding loader utility was built. These can be overridden with the
+``LIBC_GPU_TEST_ARCHITECTURE`` and ``LIBC_GPU_LOADER_EXECUTABLE`` :ref:`CMake
+options <gpu_cmake_options>`. Once built, they can be run like any other tests.
+The CMake target depends on how the library was built.
+
+#. **Cross build** - If the C library was built using ``LLVM_ENABLE_PROJECTS``
+   or a runtimes cross build, then the standard targets will be present in the
+   base CMake build directory.
+
+   #. All tests - You can run all supported tests with the command:
+
+      .. code-block:: sh
+
+        $> ninja check-libc
+
+   #. Hermetic tests - You can run hermetic with tests the command:
+
+      .. code-block:: sh
+
+        $> ninja libc-hermetic-tests
+
+   #. Integration tests - You can run integration tests by the command:
+
+      .. code-block:: sh
+
+        $> ninja libc-integration-tests
+
+#. **Runtimes build** - If the library was built using ``LLVM_ENABLE_RUNTIMES``
+   then the actual ``libc`` build will be in a separate directory.
+
+   #. All tests - You can run all supported tests with the command:
+
+      .. code-block:: sh
+
+        $> ninja check-libc-amdgcn-amd-amdhsa
+        $> ninja check-libc-nvptx64-nvidia-cuda
+
+   #. Specific tests - You can use the same targets as above by entering the
+      runtimes build directory.
+
+      .. code-block:: sh
+
+        $> ninja -C runtimes/runtimes-amdgcn-amd-amdhsa-bins check-libc
+        $> ninja -C runtimes/runtimes-nvptx64-nvidia-cuda-bins check-libc
+        $> cd runtimes/runtimes-amdgcn-amd-amdhsa-bins && ninja check-libc
+        $> cd runtimes/runtimes-nvptx64-nvidia-cuda-bins && ninja check-libc
+
+Tests can also be built and run manually using the respective loader utility.
diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst
index 11a00cd620d8..1a9446eeb113 100644
--- a/libc/docs/gpu/using.rst
+++ b/libc/docs/gpu/using.rst
@@ -159,17 +159,21 @@ GPUs.
   }
 
 We can then compile this for both NVPTX and AMDGPU into LLVM-IR using the
-following commands.
+following commands. This will yield valid LLVM-IR for the given target just like
+if we were using CUDA, OpenCL, or OpenMP.
 
 .. code-block:: sh
 
   $> clang id.c --target=amdgcn-amd-amdhsa -mcpu=native -nogpulib -flto -c
   $> clang id.c --target=nvptx64-nvidia-cuda -march=native -nogpulib -flto -c
 
-We use this support to treat the GPU as a hosted environment by providing a C
-library and startup object just like a standard C library running on the host
-machine. Then, in order to execute these programs, we provide a loader utility
-to launch the executable on the GPU similar to a cross-compiling emulator.
+We can also use this support to treat the GPU as a hosted environment by
+providing a C library and startup object just like a standard C library running
+on the host machine. Then, in order to execute these programs, we provide a
+loader utility to launch the executable on the GPU similar to a cross-compiling
+emulator. This is how we run :ref:`unit tests <libc_gpu_testing>` targeting the
+GPU. This is clearly not the most efficient way to use a GPU, but it provides a
+simple method to test execution on a GPU for debugging or development.
 
 Building for AMDGPU targets
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv
index 56e1468b4ca1..80547c5c1f3f 100644
--- a/libcxx/docs/Status/Cxx23Papers.csv
+++ b/libcxx/docs/Status/Cxx23Papers.csv
@@ -100,7 +100,7 @@
 "`P2396R1 <https://wg21.link/P2396R1>`__","LWG", "Concurrency TS 2 fixes ", "November 2022","","","|concurrency TS|"
 "`P2505R5 <https://wg21.link/P2505R5>`__","LWG", "Monadic Functions for ``std::expected``", "November 2022","|Complete|","17.0",""
 "`P2539R4 <https://wg21.link/P2539R4>`__","LWG", "Should the output of ``std::print`` to a terminal be synchronized with the underlying stream?", "November 2022","|Complete|","18.0","|format|"
-"`P2602R2 <https://wg21.link/P2602R2>`__","LWG", "Poison Pills are Too Toxic", "November 2022","","","|ranges|"
+"`P2602R2 <https://wg21.link/P2602R2>`__","LWG", "Poison Pills are Too Toxic", "November 2022","|Complete|","19.0","|ranges|"
 "`P2708R1 <https://wg21.link/P2708R1>`__","LWG", "No Further Fundamentals TSes", "November 2022","|Nothing to do|","",""
 "","","","","","",""
 "`P0290R4 <https://wg21.link/P0290R4>`__","LWG", "``apply()`` for ``synchronized_value<T>``","February 2023","","","|concurrency TS|"
diff --git a/libcxx/include/__compare/partial_order.h b/libcxx/include/__compare/partial_order.h
index f3ed4900fbff..1d2fae63e5f2 100644
--- a/libcxx/include/__compare/partial_order.h
+++ b/libcxx/include/__compare/partial_order.h
@@ -28,6 +28,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 // [cmp.alg]
 namespace __partial_order {
+void partial_order() = delete;
+
 struct __fn {
   // NOLINTBEGIN(libcpp-robust-against-adl) partial_order should use ADL, but only here
   template <class _Tp, class _Up>
diff --git a/libcxx/include/__compare/strong_order.h b/libcxx/include/__compare/strong_order.h
index 3dc819e64251..8c363b563822 100644
--- a/libcxx/include/__compare/strong_order.h
+++ b/libcxx/include/__compare/strong_order.h
@@ -37,6 +37,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 // [cmp.alg]
 namespace __strong_order {
+void strong_order() = delete;
+
 struct __fn {
   // NOLINTBEGIN(libcpp-robust-against-adl) strong_order should use ADL, but only here
   template <class _Tp, class _Up>
diff --git a/libcxx/include/__compare/weak_order.h b/libcxx/include/__compare/weak_order.h
index b82a708c29a1..1a3e85feb233 100644
--- a/libcxx/include/__compare/weak_order.h
+++ b/libcxx/include/__compare/weak_order.h
@@ -30,6 +30,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 // [cmp.alg]
 namespace __weak_order {
+void weak_order() = delete;
+
 struct __fn {
   // NOLINTBEGIN(libcpp-robust-against-adl) weak_order should use ADL, but only here
   template <class _Tp, class _Up>
diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h
index d131e942aca6..bf603c5c62d9 100644
--- a/libcxx/include/__format/format_context.h
+++ b/libcxx/include/__format/format_context.h
@@ -27,7 +27,7 @@
 #include <cstddef>
 
 #ifndef _LIBCPP_HAS_NO_LOCALIZATION
-#  include <locale>
+#  include <__locale>
 #  include <optional>
 #endif
 
diff --git a/libcxx/include/__format/format_functions.h b/libcxx/include/__format/format_functions.h
index 3ee53539f4ee..c7810140105a 100644
--- a/libcxx/include/__format/format_functions.h
+++ b/libcxx/include/__format/format_functions.h
@@ -41,7 +41,7 @@
 #include <string_view>
 
 #ifndef _LIBCPP_HAS_NO_LOCALIZATION
-#  include <locale>
+#  include <__locale>
 #endif
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h
index f01d323efff5..1d94cc349c0d 100644
--- a/libcxx/include/__format/formatter_floating_point.h
+++ b/libcxx/include/__format/formatter_floating_point.h
@@ -39,7 +39,7 @@
 #include <cstddef>
 
 #ifndef _LIBCPP_HAS_NO_LOCALIZATION
-#  include <locale>
+#  include <__locale>
 #endif
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/include/__iterator/iter_move.h b/libcxx/include/__iterator/iter_move.h
index 202b94cccc5a..ba8aed3c0ffb 100644
--- a/libcxx/include/__iterator/iter_move.h
+++ b/libcxx/include/__iterator/iter_move.h
@@ -35,7 +35,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 namespace ranges {
 namespace __iter_move {
 
-void iter_move();
+void iter_move() = delete;
 
 template <class _Tp>
 concept __unqualified_iter_move = __class_or_enum<remove_cvref_t<_Tp>> && requires(_Tp&& __t) {
diff --git a/libcxx/include/__ranges/access.h b/libcxx/include/__ranges/access.h
index 263fdd637fd9..218b3928ecdc 100644
--- a/libcxx/include/__ranges/access.h
+++ b/libcxx/include/__ranges/access.h
@@ -45,8 +45,7 @@ concept __member_begin = __can_borrow<_Tp> && __workaround_52970<_Tp> && require
   { _LIBCPP_AUTO_CAST(__t.begin()) } -> input_or_output_iterator;
 };
 
-void begin(auto&)       = delete;
-void begin(const auto&) = delete;
+void begin() = delete;
 
 template <class _Tp>
 concept __unqualified_begin =
@@ -109,8 +108,7 @@ concept __member_end = __can_borrow<_Tp> && __workaround_52970<_Tp> && requires(
   { _LIBCPP_AUTO_CAST(__t.end()) } -> sentinel_for<iterator_t<_Tp>>;
 };
 
-void end(auto&)       = delete;
-void end(const auto&) = delete;
+void end() = delete;
 
 template <class _Tp>
 concept __unqualified_end =
diff --git a/libcxx/include/__ranges/rbegin.h b/libcxx/include/__ranges/rbegin.h
index 7111201ae7d6..947e428f00cd 100644
--- a/libcxx/include/__ranges/rbegin.h
+++ b/libcxx/include/__ranges/rbegin.h
@@ -40,8 +40,7 @@ concept __member_rbegin = __can_borrow<_Tp> && __workaround_52970<_Tp> && requir
   { _LIBCPP_AUTO_CAST(__t.rbegin()) } -> input_or_output_iterator;
 };
 
-void rbegin(auto&)       = delete;
-void rbegin(const auto&) = delete;
+void rbegin() = delete;
 
 template <class _Tp>
 concept __unqualified_rbegin =
diff --git a/libcxx/include/__ranges/rend.h b/libcxx/include/__ranges/rend.h
index 58d98aafd264..1b6be58fea24 100644
--- a/libcxx/include/__ranges/rend.h
+++ b/libcxx/include/__ranges/rend.h
@@ -42,8 +42,7 @@ concept __member_rend = __can_borrow<_Tp> && __workaround_52970<_Tp> && requires
   { _LIBCPP_AUTO_CAST(__t.rend()) } -> sentinel_for<decltype(ranges::rbegin(__t))>;
 };
 
-void rend(auto&)       = delete;
-void rend(const auto&) = delete;
+void rend() = delete;
 
 template <class _Tp>
 concept __unqualified_rend =
diff --git a/libcxx/include/__ranges/size.h b/libcxx/include/__ranges/size.h
index 14e21aae6bf1..59ca2b11ee38 100644
--- a/libcxx/include/__ranges/size.h
+++ b/libcxx/include/__ranges/size.h
@@ -41,8 +41,7 @@ inline constexpr bool disable_sized_range = false;
 
 namespace ranges {
 namespace __size {
-void size(auto&)       = delete;
-void size(const auto&) = delete;
+void size() = delete;
 
 template <class _Tp>
 concept __size_enabled = !disable_sized_range<remove_cvref_t<_Tp>>;
diff --git a/libcxx/include/chrono b/libcxx/include/chrono
index 0320c1dc4c2f..5bab3f8ad5cf 100644
--- a/libcxx/include/chrono
+++ b/libcxx/include/chrono
@@ -883,6 +883,7 @@ constexpr chrono::year                                  operator ""y(unsigned lo
 
 #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER == 20
 #  include <charconv>
+#  include <locale>
 #endif
 
 #endif // _LIBCPP_CHRONO
diff --git a/libcxx/include/complex b/libcxx/include/complex
index e996485a38ae..a81f968143c4 100644
--- a/libcxx/include/complex
+++ b/libcxx/include/complex
@@ -258,6 +258,7 @@ template<class T> complex<T> tanh (const complex<T>&);
 
 #include <__config>
 #include <__fwd/complex.h>
+#include <__fwd/tuple.h>
 #include <__tuple/tuple_element.h>
 #include <__tuple/tuple_size.h>
 #include <__utility/move.h>
@@ -1444,15 +1445,9 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const complex<_Tp>& __x) {
 // [complex.tuple], tuple interface
 
 template <class _Tp>
-struct tuple_size;
-
-template <class _Tp>
 struct tuple_size<complex<_Tp>> : integral_constant<size_t, 2> {};
 
 template <size_t _Ip, class _Tp>
-struct tuple_element;
-
-template <size_t _Ip, class _Tp>
 struct tuple_element<_Ip, complex<_Tp>> {
   static_assert(_Ip < 2, "Index value is out of range.");
   using type = _Tp;
diff --git a/libcxx/include/format b/libcxx/include/format
index c0485c5a1035..146613464534 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -221,4 +221,8 @@ namespace std {
 #  pragma GCC system_header
 #endif
 
+#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+#  include <locale>
+#endif
+
 #endif // _LIBCPP_FORMAT
diff --git a/libcxx/include/mutex b/libcxx/include/mutex
index ea56e3051908..12fae9a88b9d 100644
--- a/libcxx/include/mutex
+++ b/libcxx/include/mutex
@@ -418,24 +418,6 @@ inline _LIBCPP_HIDE_FROM_ABI void lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&...
   std::__lock_first(0, __l0, __l1, __l2, __l3...);
 }
 
-template <class _L0>
-inline _LIBCPP_HIDE_FROM_ABI void __unlock(_L0& __l0) {
-  __l0.unlock();
-}
-
-template <class _L0, class _L1>
-inline _LIBCPP_HIDE_FROM_ABI void __unlock(_L0& __l0, _L1& __l1) {
-  __l0.unlock();
-  __l1.unlock();
-}
-
-template <class _L0, class _L1, class _L2, class... _L3>
-inline _LIBCPP_HIDE_FROM_ABI void __unlock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) {
-  __l0.unlock();
-  __l1.unlock();
-  std::__unlock(__l2, __l3...);
-}
-
 #  endif // _LIBCPP_CXX03_LANG
 
 #  if _LIBCPP_STD_VER >= 17
@@ -498,7 +480,7 @@ public:
 private:
   template <size_t... _Indx>
   _LIBCPP_HIDE_FROM_ABI static void __unlock_unpack(__tuple_indices<_Indx...>, _MutexTuple& __mt) {
-    std::__unlock(std::get<_Indx>(__mt)...);
+    (std::get<_Indx>(__mt).unlock(), ...);
   }
 
   _MutexTuple __t_;
diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv
index 678a986e522a..c65b9b9d705e 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx03.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv
@@ -266,9 +266,14 @@ filesystem system_error
 filesystem type_traits
 filesystem version
 format array
+format cctype
+format clocale
 format cmath
 format cstddef
 format cstdint
+format cstdlib
+format cstring
+format cwchar
 format initializer_list
 format limits
 format locale
diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv
index c3875fa2cfc0..b3d9e327fc7a 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx11.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv
@@ -267,9 +267,14 @@ filesystem system_error
 filesystem type_traits
 filesystem version
 format array
+format cctype
+format clocale
 format cmath
 format cstddef
 format cstdint
+format cstdlib
+format cstring
+format cwchar
 format initializer_list
 format limits
 format locale
diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv
index e28e0cd44fed..d723409422a3 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx14.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv
@@ -269,9 +269,14 @@ filesystem system_error
 filesystem type_traits
 filesystem version
 format array
+format cctype
+format clocale
 format cmath
 format cstddef
 format cstdint
+format cstdlib
+format cstring
+format cwchar
 format initializer_list
 format limits
 format locale
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index e28e0cd44fed..d723409422a3 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -269,9 +269,14 @@ filesystem system_error
 filesystem type_traits
 filesystem version
 format array
+format cctype
+format clocale
 format cmath
 format cstddef
 format cstdint
+format cstdlib
+format cstring
+format cwchar
 format initializer_list
 format limits
 format locale
diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index eec71f4fc628..03b4eda8b4d8 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -113,14 +113,19 @@ charconv type_traits
 charconv version
 chrono array
 chrono bit
+chrono cctype
+chrono cerrno
 chrono charconv
+chrono clocale
 chrono cmath
 chrono compare
 chrono concepts
 chrono cstddef
 chrono cstdint
+chrono cstdlib
 chrono cstring
 chrono ctime
+chrono cwchar
 chrono forward_list
 chrono limits
 chrono locale
@@ -275,9 +280,14 @@ filesystem system_error
 filesystem type_traits
 filesystem version
 format array
+format cctype
+format clocale
 format cmath
 format cstddef
 format cstdint
+format cstdlib
+format cstring
+format cwchar
 format initializer_list
 format limits
 format locale
diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv
index ccba100af20e..8150f0935900 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx23.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv
@@ -68,15 +68,20 @@ charconv limits
 charconv new
 charconv version
 chrono array
+chrono cctype
+chrono cerrno
+chrono clocale
 chrono cmath
 chrono compare
 chrono cstddef
 chrono cstdint
+chrono cstdlib
+chrono cstring
 chrono ctime
+chrono cwchar
 chrono forward_list
 chrono initializer_list
 chrono limits
-chrono locale
 chrono new
 chrono optional
 chrono ostream
@@ -85,6 +90,8 @@ chrono sstream
 chrono stdexcept
 chrono string
 chrono string_view
+chrono tuple
+chrono typeinfo
 chrono vector
 chrono version
 cinttypes cstdint
@@ -184,12 +191,16 @@ filesystem string
 filesystem string_view
 filesystem version
 format array
+format cctype
+format clocale
 format cmath
 format cstddef
 format cstdint
+format cstdlib
+format cstring
+format cwchar
 format initializer_list
 format limits
-format locale
 format new
 format optional
 format queue
@@ -198,6 +209,7 @@ format stdexcept
 format string
 format string_view
 format tuple
+format typeinfo
 format version
 forward_list compare
 forward_list cstddef
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index ccba100af20e..8150f0935900 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -68,15 +68,20 @@ charconv limits
 charconv new
 charconv version
 chrono array
+chrono cctype
+chrono cerrno
+chrono clocale
 chrono cmath
 chrono compare
 chrono cstddef
 chrono cstdint
+chrono cstdlib
+chrono cstring
 chrono ctime
+chrono cwchar
 chrono forward_list
 chrono initializer_list
 chrono limits
-chrono locale
 chrono new
 chrono optional
 chrono ostream
@@ -85,6 +90,8 @@ chrono sstream
 chrono stdexcept
 chrono string
 chrono string_view
+chrono tuple
+chrono typeinfo
 chrono vector
 chrono version
 cinttypes cstdint
@@ -184,12 +191,16 @@ filesystem string
 filesystem string_view
 filesystem version
 format array
+format cctype
+format clocale
 format cmath
 format cstddef
 format cstdint
+format cstdlib
+format cstring
+format cwchar
 format initializer_list
 format limits
-format locale
 format new
 format optional
 format queue
@@ -198,6 +209,7 @@ format stdexcept
 format string
 format string_view
 format tuple
+format typeinfo
 format version
 forward_list compare
 forward_list cstddef
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp
index d4bbde75ae88..068202c6e415 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp
@@ -44,19 +44,14 @@ int main(int, char**)
         errno = E2BIG; // something that message will never generate
         const std::error_category& e_cat1 = std::generic_category();
         const std::string msg = e_cat1.message(-1);
-        // Exact message format varies by platform.  We can't detect
-        // some of these (Musl in particular) using the preprocessor,
-        // so accept a few sensible messages.  Newlib unfortunately
-        // responds with an empty message, which we probably want to
-        // treat as a failure code otherwise, but we can detect that
-        // with the preprocessor.
-        LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0       // AIX
-                      || msg.rfind("No error information", 0) == 0 // Musl
-                      || msg.rfind("Unknown error", 0) == 0        // Glibc
-#if defined(_NEWLIB_VERSION)
-                      || msg.empty()
+        // Exact message format varies by platform.
+#if defined(_AIX)
+        LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0);
+#elif defined(_NEWLIB_VERSION)
+        LIBCPP_ASSERT(msg.empty());
+#else
+        LIBCPP_ASSERT(msg.rfind("Unknown error", 0) == 0);
 #endif
-        );
         assert(errno == E2BIG);
     }
 
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp
index eefbddd27a7f..42fdd1cb3b91 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp
@@ -50,19 +50,14 @@ int main(int, char**) {
     errno                             = E2BIG; // something that message will never generate
     const std::error_category& e_cat1 = std::system_category();
     const std::string msg             = e_cat1.message(-1);
-    // Exact message format varies by platform.  We can't detect
-    // some of these (Musl in particular) using the preprocessor,
-    // so accept a few sensible messages.  Newlib unfortunately
-    // responds with an empty message, which we probably want to
-    // treat as a failure code otherwise, but we can detect that
-    // with the preprocessor.
-    LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0       // AIX
-                  || msg.rfind("No error information", 0) == 0 // Musl
-                  || msg.rfind("Unknown error", 0) == 0        // Glibc
-#if defined(_NEWLIB_VERSION)
-                  || msg.empty()
+    // Exact message format varies by platform.
+#if defined(_AIX)
+    LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0);
+#elif defined(_NEWLIB_VERSION)
+    LIBCPP_ASSERT(msg.empty());
+#else
+    LIBCPP_ASSERT(msg.rfind("Unknown error", 0) == 0);
 #endif
-    );
     assert(errno == E2BIG);
   }
 
diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp
index dd78707f2540..e6507f7e7767 100644
--- a/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp
@@ -47,6 +47,13 @@ static_assert( std::is_invocable_v<IterSwapT&&, HasIterSwap&, HasIterSwap&>);
 static_assert( std::is_invocable_v<IterSwapT&&, HasIterSwap&, int&>);
 static_assert(!std::is_invocable_v<IterSwapT&&, int&, HasIterSwap&>);
 
+struct StructWithNotMoreSpecializedIterSwap {
+  friend void iter_swap(auto&, auto&);
+};
+
+static_assert(
+    !std::is_invocable_v<IterSwapT, StructWithNotMoreSpecializedIterSwap&, StructWithNotMoreSpecializedIterSwap&>);
+
 struct NodiscardIterSwap {
   [[nodiscard]] friend int iter_swap(NodiscardIterSwap&, NodiscardIterSwap&) { return 0; }
 };
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp
index 0258ebf87243..8637a933008f 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp
@@ -22,7 +22,6 @@
 #include <locale>
 #include <ios>
 #include <cassert>
-#include <cstdio>
 #include <streambuf>
 #include <cmath>
 #include "test_macros.h"
@@ -8935,12 +8934,11 @@ void test4()
     char str[200];
     std::locale lc = std::locale::classic();
     std::locale lg(lc, new my_numpunct);
-
-    std::string inf;
-
-    // This should match the underlying C library
-    std::sprintf(str, "%f", INFINITY);
-    inf = str;
+#ifdef _AIX
+    std::string inf = "INF";
+#else
+    std::string inf = "inf";
+#endif
 
     const my_facet f(1);
     {
@@ -10729,27 +10727,24 @@ void test5()
     std::locale lc = std::locale::classic();
     std::locale lg(lc, new my_numpunct);
     const my_facet f(1);
-
-    std::string nan;
-    std::string NaN;
-    std::string pnan_sign;
-
-    // The output here depends on the underlying C library, so work out what
-    // that does.
-    std::sprintf(str, "%f", std::nan(""));
-    nan = str;
-
-    std::sprintf(str, "%F", std::nan(""));
-    NaN = str;
-
-    std::sprintf(str, "%+f", std::nan(""));
-    if (str[0] == '+') {
-      pnan_sign = "+";
-    }
-
-    std::string nan_padding25  = std::string(25 - nan.length(), '*');
-    std::string pnan_padding25 = std::string(25 - nan.length() - pnan_sign.length(), '*');
-
+#if defined(_AIX)
+    std::string nan= "NaNQ";
+    std::string NaN = "NaNQ";
+    std::string nan_padding25 = "*********************";
+    std::string pnan_sign = "+";
+    std::string pnan_padding25 = "********************";
+#else
+    std::string nan= "nan";
+    std::string NaN = "NAN";
+    std::string nan_padding25 = "**********************";
+#if defined(TEST_HAS_GLIBC) || defined(_WIN32)
+    std::string pnan_sign = "+";
+    std::string pnan_padding25 = "*********************";
+#else
+    std::string pnan_sign = "";
+    std::string pnan_padding25 = "**********************";
+#endif
+#endif
     {
         long double v = std::nan("");
         std::ios ios(0);
diff --git a/libcxx/test/std/ranges/range.access/begin.pass.cpp b/libcxx/test/std/ranges/range.access/begin.pass.cpp
index ca25fc297a01..5ca3d59abb14 100644
--- a/libcxx/test/std/ranges/range.access/begin.pass.cpp
+++ b/libcxx/test/std/ranges/range.access/begin.pass.cpp
@@ -192,7 +192,7 @@ struct BeginFunction {
 };
 static_assert( std::is_invocable_v<RangeBeginT,  BeginFunction const&>);
 static_assert(!std::is_invocable_v<RangeBeginT,  BeginFunction &&>);
-static_assert(!std::is_invocable_v<RangeBeginT,  BeginFunction &>);
+static_assert(std::is_invocable_v<RangeBeginT, BeginFunction&>); // Ill-formed before P2602R2 Poison Pills are Too Toxic
 static_assert( std::is_invocable_v<RangeCBeginT, BeginFunction const&>);
 static_assert( std::is_invocable_v<RangeCBeginT, BeginFunction &>);
 
@@ -245,7 +245,7 @@ private:
 constexpr bool testBeginFunction() {
   BeginFunction a{};
   const BeginFunction aa{};
-  static_assert(!std::invocable<RangeBeginT, decltype((a))>);
+  assert(std::ranges::begin(a) == &a.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::cbegin(a) == &a.x);
   assert(std::ranges::begin(aa) == &aa.x);
   assert(std::ranges::cbegin(aa) == &aa.x);
@@ -266,21 +266,21 @@ constexpr bool testBeginFunction() {
 
   BeginFunctionReturnsEmptyPtr d{};
   const BeginFunctionReturnsEmptyPtr dd{};
-  static_assert(!std::invocable<RangeBeginT, decltype((d))>);
+  assert(std::ranges::begin(d) == &d.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::cbegin(d) == &d.x);
   assert(std::ranges::begin(dd) == &dd.x);
   assert(std::ranges::cbegin(dd) == &dd.x);
 
   BeginFunctionWithDataMember e{};
   const BeginFunctionWithDataMember ee{};
-  static_assert(!std::invocable<RangeBeginT, decltype((e))>);
+  assert(std::ranges::begin(e) == &e.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::begin(ee) == &ee.x);
   assert(std::ranges::cbegin(e) == &e.x);
   assert(std::ranges::cbegin(ee) == &ee.x);
 
   BeginFunctionWithPrivateBeginMember f{};
   const BeginFunctionWithPrivateBeginMember ff{};
-  static_assert(!std::invocable<RangeBeginT, decltype((f))>);
+  assert(std::ranges::begin(f) == &f.y); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::cbegin(f) == &f.y);
   assert(std::ranges::begin(ff) == &ff.y);
   assert(std::ranges::cbegin(ff) == &ff.y);
diff --git a/libcxx/test/std/ranges/range.access/end.pass.cpp b/libcxx/test/std/ranges/range.access/end.pass.cpp
index 21d97354ef08..3e465b357e98 100644
--- a/libcxx/test/std/ranges/range.access/end.pass.cpp
+++ b/libcxx/test/std/ranges/range.access/end.pass.cpp
@@ -193,7 +193,7 @@ static_assert(!std::is_invocable_v<RangeEndT, EndFunction &&>);
 
 static_assert( std::is_invocable_v<RangeEndT,  EndFunction const&>);
 static_assert(!std::is_invocable_v<RangeEndT,  EndFunction &&>);
-static_assert(!std::is_invocable_v<RangeEndT,  EndFunction &>);
+static_assert(std::is_invocable_v<RangeEndT, EndFunction&>); // Ill-formed before P2602R2 Poison Pills are Too Toxic
 static_assert( std::is_invocable_v<RangeCEndT, EndFunction const&>);
 static_assert( std::is_invocable_v<RangeCEndT, EndFunction &>);
 
@@ -271,7 +271,7 @@ constexpr bool testEndFunction() {
   assert(std::ranges::end(a) == &a.x);
   assert(std::ranges::cend(a) == &a.x);
   EndFunction aa{};
-  static_assert(!std::is_invocable_v<RangeEndT, decltype((aa))>);
+  assert(std::ranges::end(aa) == &aa.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::cend(aa) == &aa.x);
 
   EndFunctionByValue b;
@@ -286,28 +286,28 @@ constexpr bool testEndFunction() {
   assert(std::ranges::end(d) == &d.x);
   assert(std::ranges::cend(d) == &d.x);
   EndFunctionReturnsEmptyPtr dd{};
-  static_assert(!std::is_invocable_v<RangeEndT, decltype((dd))>);
+  assert(std::ranges::end(dd) == &dd.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::cend(dd) == &dd.x);
 
   const EndFunctionWithDataMember e{};
   assert(std::ranges::end(e) == &e.x);
   assert(std::ranges::cend(e) == &e.x);
   EndFunctionWithDataMember ee{};
-  static_assert(!std::is_invocable_v<RangeEndT, decltype((ee))>);
+  assert(std::ranges::end(ee) == &ee.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::cend(ee) == &ee.x);
 
   const EndFunctionWithPrivateEndMember f{};
   assert(std::ranges::end(f) == &f.y);
   assert(std::ranges::cend(f) == &f.y);
   EndFunctionWithPrivateEndMember ff{};
-  static_assert(!std::is_invocable_v<RangeEndT, decltype((ff))>);
+  assert(std::ranges::end(ff) == &ff.y); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::cend(ff) == &ff.y);
 
   const BeginMemberEndFunction g{};
   assert(std::ranges::end(g) == &g.x);
   assert(std::ranges::cend(g) == &g.x);
   BeginMemberEndFunction gg{};
-  static_assert(!std::is_invocable_v<RangeEndT, decltype((gg))>);
+  assert(std::ranges::end(gg) == &gg.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::cend(gg) == &gg.x);
 
   return true;
diff --git a/libcxx/test/std/ranges/range.access/rbegin.pass.cpp b/libcxx/test/std/ranges/range.access/rbegin.pass.cpp
index e1a564c94ed9..3997f38efd02 100644
--- a/libcxx/test/std/ranges/range.access/rbegin.pass.cpp
+++ b/libcxx/test/std/ranges/range.access/rbegin.pass.cpp
@@ -187,7 +187,8 @@ struct RBeginFunction {
 };
 static_assert( std::is_invocable_v<RangeRBeginT,  RBeginFunction const&>);
 static_assert(!std::is_invocable_v<RangeRBeginT,  RBeginFunction &&>);
-static_assert(!std::is_invocable_v<RangeRBeginT,  RBeginFunction &>);
+static_assert(
+    std::is_invocable_v<RangeRBeginT, RBeginFunction&>); // Ill-formed before P2602R2 Poison Pills are Too Toxic
 static_assert( std::is_invocable_v<RangeCRBeginT, RBeginFunction const&>);
 static_assert( std::is_invocable_v<RangeCRBeginT, RBeginFunction &>);
 
@@ -246,7 +247,7 @@ private:
 constexpr bool testRBeginFunction() {
   RBeginFunction a{};
   const RBeginFunction aa{};
-  static_assert(!std::invocable<RangeRBeginT, decltype((a))>);
+  assert(std::ranges::rbegin(a) == &a.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::crbegin(a) == &a.x);
   assert(std::ranges::rbegin(aa) == &aa.x);
   assert(std::ranges::crbegin(aa) == &aa.x);
@@ -267,21 +268,21 @@ constexpr bool testRBeginFunction() {
 
   RBeginFunctionReturnsEmptyPtr d{};
   const RBeginFunctionReturnsEmptyPtr dd{};
-  static_assert(!std::invocable<RangeRBeginT, decltype((d))>);
+  assert(std::ranges::rbegin(d) == &d.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::crbegin(d) == &d.x);
   assert(std::ranges::rbegin(dd) == &dd.x);
   assert(std::ranges::crbegin(dd) == &dd.x);
 
   RBeginFunctionWithDataMember e{};
   const RBeginFunctionWithDataMember ee{};
-  static_assert(!std::invocable<RangeRBeginT, decltype((e))>);
+  assert(std::ranges::rbegin(e) == &e.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::rbegin(ee) == &ee.x);
   assert(std::ranges::crbegin(e) == &e.x);
   assert(std::ranges::crbegin(ee) == &ee.x);
 
   RBeginFunctionWithPrivateBeginMember f{};
   const RBeginFunctionWithPrivateBeginMember ff{};
-  static_assert(!std::invocable<RangeRBeginT, decltype((f))>);
+  assert(std::ranges::rbegin(f) == &f.y); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::crbegin(f) == &f.y);
   assert(std::ranges::rbegin(ff) == &ff.y);
   assert(std::ranges::crbegin(ff) == &ff.y);
diff --git a/libcxx/test/std/ranges/range.access/rend.pass.cpp b/libcxx/test/std/ranges/range.access/rend.pass.cpp
index 5ba244b6b18c..f5f59edf1939 100644
--- a/libcxx/test/std/ranges/range.access/rend.pass.cpp
+++ b/libcxx/test/std/ranges/range.access/rend.pass.cpp
@@ -196,7 +196,7 @@ static_assert(!std::is_invocable_v<RangeREndT, REndFunction &&>);
 
 static_assert( std::is_invocable_v<RangeREndT,  REndFunction const&>);
 static_assert(!std::is_invocable_v<RangeREndT,  REndFunction &&>);
-static_assert(!std::is_invocable_v<RangeREndT,  REndFunction &>);
+static_assert(std::is_invocable_v<RangeREndT, REndFunction&>); // Ill-formed before P2602R2 Poison Pills are Too Toxic
 static_assert( std::is_invocable_v<RangeCREndT, REndFunction const&>);
 static_assert( std::is_invocable_v<RangeCREndT, REndFunction &>);
 
@@ -272,7 +272,7 @@ constexpr bool testREndFunction() {
   assert(std::ranges::rend(a) == &a.x);
   assert(std::ranges::crend(a) == &a.x);
   REndFunction aa{};
-  static_assert(!std::is_invocable_v<RangeREndT, decltype((aa))>);
+  assert(std::ranges::rend(aa) == &aa.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::crend(aa) == &aa.x);
 
   REndFunctionByValue b;
@@ -287,28 +287,28 @@ constexpr bool testREndFunction() {
   assert(std::ranges::rend(d) == &d.x);
   assert(std::ranges::crend(d) == &d.x);
   REndFunctionReturnsEmptyPtr dd{};
-  static_assert(!std::is_invocable_v<RangeREndT, decltype((dd))>);
+  assert(std::ranges::rend(dd) == &dd.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::crend(dd) == &dd.x);
 
   const REndFunctionWithDataMember e{};
   assert(std::ranges::rend(e) == &e.x);
   assert(std::ranges::crend(e) == &e.x);
   REndFunctionWithDataMember ee{};
-  static_assert(!std::is_invocable_v<RangeREndT, decltype((ee))>);
+  assert(std::ranges::rend(ee) == &ee.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::crend(ee) == &ee.x);
 
   const REndFunctionWithPrivateEndMember f{};
   assert(std::ranges::rend(f) == &f.y);
   assert(std::ranges::crend(f) == &f.y);
   REndFunctionWithPrivateEndMember ff{};
-  static_assert(!std::is_invocable_v<RangeREndT, decltype((ff))>);
+  assert(std::ranges::rend(ff) == &ff.y); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::crend(ff) == &ff.y);
 
   const RBeginMemberEndFunction g{};
   assert(std::ranges::rend(g) == &g.x);
   assert(std::ranges::crend(g) == &g.x);
   RBeginMemberEndFunction gg{};
-  static_assert(!std::is_invocable_v<RangeREndT, decltype((gg))>);
+  assert(std::ranges::rend(gg) == &gg.x); // Ill-formed before P2602R2 Poison Pills are Too Toxic
   assert(std::ranges::crend(gg) == &gg.x);
 
   return true;
diff --git a/libcxx/test/std/ranges/range.access/size.pass.cpp b/libcxx/test/std/ranges/range.access/size.pass.cpp
index fd7d0a8b9975..ee44aa815ba9 100644
--- a/libcxx/test/std/ranges/range.access/size.pass.cpp
+++ b/libcxx/test/std/ranges/range.access/size.pass.cpp
@@ -219,7 +219,8 @@ inline constexpr bool std::ranges::disable_sized_range<const ImproperlyDisabledF
 
 static_assert( std::is_invocable_v<RangeSizeT, ImproperlyDisabledMember&>);
 static_assert( std::is_invocable_v<RangeSizeT, const ImproperlyDisabledMember&>);
-static_assert(!std::is_invocable_v<RangeSizeT, ImproperlyDisabledFunction&>);
+static_assert(std::is_invocable_v<RangeSizeT,
+                                  ImproperlyDisabledFunction&>); // Ill-formed before P2602R2 Poison Pills are Too Toxic
 static_assert( std::is_invocable_v<RangeSizeT, const ImproperlyDisabledFunction&>);
 
 // No begin end.
diff --git a/libcxx/test/std/ranges/robust_against_poison_pills.compile.pass.cpp b/libcxx/test/std/ranges/robust_against_poison_pills.compile.pass.cpp
new file mode 100644
index 000000000000..1b3da814d080
--- /dev/null
+++ b/libcxx/test/std/ranges/robust_against_poison_pills.compile.pass.cpp
@@ -0,0 +1,102 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+// <compare>, <iterator>, <ranges>
+
+// ADL should be performed. Ordinary unqualified lookup should not be performed.
+
+namespace ns {
+struct StructWithGlobalFunctions {};
+} // namespace ns
+
+struct ConvertibleToCmpType;
+ConvertibleToCmpType strong_order(const ns::StructWithGlobalFunctions&, const ns::StructWithGlobalFunctions&);
+ConvertibleToCmpType weak_order(const ns::StructWithGlobalFunctions&, const ns::StructWithGlobalFunctions&);
+ConvertibleToCmpType partial_order(const ns::StructWithGlobalFunctions&, const ns::StructWithGlobalFunctions&);
+
+int&& iter_move(const ns::StructWithGlobalFunctions&);
+void iter_swap(const ns::StructWithGlobalFunctions&, const ns::StructWithGlobalFunctions&);
+
+int* begin(const ns::StructWithGlobalFunctions&);
+int* end(const ns::StructWithGlobalFunctions&);
+int* rbegin(const ns::StructWithGlobalFunctions&);
+int* rend(const ns::StructWithGlobalFunctions&);
+unsigned int size(const ns::StructWithGlobalFunctions&);
+
+#include <compare>
+#include <ranges>
+#include <type_traits>
+
+struct ConvertibleToCmpType {
+  operator std::strong_ordering() const;
+  operator std::weak_ordering() const;
+  operator std::partial_ordering() const;
+};
+
+struct StructWithHiddenFriends {
+  friend ConvertibleToCmpType strong_order(const StructWithHiddenFriends&, const StructWithHiddenFriends&);
+  friend ConvertibleToCmpType weak_order(const StructWithHiddenFriends&, const StructWithHiddenFriends&);
+  friend ConvertibleToCmpType partial_order(const StructWithHiddenFriends&, const StructWithHiddenFriends&);
+
+  friend int&& iter_move(const StructWithHiddenFriends&);
+  friend void iter_swap(const StructWithHiddenFriends&, const StructWithHiddenFriends&);
+
+  friend int* begin(const StructWithHiddenFriends&);
+  friend int* end(const StructWithHiddenFriends&);
+  friend int* rbegin(const StructWithHiddenFriends&);
+  friend int* rend(const StructWithHiddenFriends&);
+  friend unsigned int size(const StructWithHiddenFriends&);
+};
+
+// [cmp.alg] ADL should be performed.
+static_assert(std::is_invocable_v<decltype(std::strong_order), StructWithHiddenFriends&, StructWithHiddenFriends&>);
+static_assert(std::is_invocable_v<decltype(std::weak_order), StructWithHiddenFriends&, StructWithHiddenFriends&>);
+static_assert(std::is_invocable_v<decltype(std::partial_order), StructWithHiddenFriends&, StructWithHiddenFriends&>);
+
+// [cmp.alg] Ordinary unqualified lookup should not be performed.
+static_assert(
+    !std::is_invocable_v<decltype(std::strong_order), ns::StructWithGlobalFunctions&, ns::StructWithGlobalFunctions&>);
+static_assert(
+    !std::is_invocable_v<decltype(std::weak_order), ns::StructWithGlobalFunctions&, ns::StructWithGlobalFunctions&>);
+static_assert(
+    !std::is_invocable_v<decltype(std::partial_order), ns::StructWithGlobalFunctions&, ns::StructWithGlobalFunctions&>);
+
+// [iterator.cust] ADL should be performed.
+static_assert(std::is_invocable_v<decltype(std::ranges::iter_move), StructWithHiddenFriends&>);
+static_assert(
+    std::is_invocable_v<decltype(std::ranges::iter_swap), StructWithHiddenFriends&, StructWithHiddenFriends&>);
+
+// [iterator.cust] Ordinary unqualified lookup should not be performed.
+static_assert(!std::is_invocable_v<decltype(std::ranges::iter_move), ns::StructWithGlobalFunctions&>);
+static_assert(!std::is_invocable_v<decltype(std::ranges::iter_swap),
+                                   ns::StructWithGlobalFunctions&,
+                                   ns::StructWithGlobalFunctions&>);
+
+// [range.access] ADL should be performed.
+static_assert(std::is_invocable_v<decltype(std::ranges::begin), StructWithHiddenFriends&>);
+static_assert(std::is_invocable_v<decltype(std::ranges::cbegin), StructWithHiddenFriends&>);
+static_assert(std::is_invocable_v<decltype(std::ranges::end), StructWithHiddenFriends&>);
+static_assert(std::is_invocable_v<decltype(std::ranges::cend), StructWithHiddenFriends&>);
+static_assert(std::is_invocable_v<decltype(std::ranges::rbegin), StructWithHiddenFriends&>);
+static_assert(std::is_invocable_v<decltype(std::ranges::crbegin), StructWithHiddenFriends&>);
+static_assert(std::is_invocable_v<decltype(std::ranges::rend), StructWithHiddenFriends&>);
+static_assert(std::is_invocable_v<decltype(std::ranges::crend), StructWithHiddenFriends&>);
+static_assert(std::is_invocable_v<decltype(std::ranges::size), StructWithHiddenFriends&>);
+
+// [range.access] Ordinary unqualified lookup should not be performed.
+static_assert(!std::is_invocable_v<decltype(std::ranges::begin), ns::StructWithGlobalFunctions&>);
+static_assert(!std::is_invocable_v<decltype(std::ranges::cbegin), ns::StructWithGlobalFunctions&>);
+static_assert(!std::is_invocable_v<decltype(std::ranges::end), ns::StructWithGlobalFunctions&>);
+static_assert(!std::is_invocable_v<decltype(std::ranges::cend), ns::StructWithGlobalFunctions&>);
+static_assert(!std::is_invocable_v<decltype(std::ranges::rbegin), ns::StructWithGlobalFunctions&>);
+static_assert(!std::is_invocable_v<decltype(std::ranges::crbegin), ns::StructWithGlobalFunctions&>);
+static_assert(!std::is_invocable_v<decltype(std::ranges::rend), ns::StructWithGlobalFunctions&>);
+static_assert(!std::is_invocable_v<decltype(std::ranges::crend), ns::StructWithGlobalFunctions&>);
+static_assert(!std::is_invocable_v<decltype(std::ranges::size), ns::StructWithGlobalFunctions&>);
diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp
index b183cb423111..a88da986bb38 100644
--- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp
+++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp
@@ -23,7 +23,6 @@
 #include "lldb/lldb-enumerations.h"
 #include "lldb/lldb-forward.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/FormatVariadic.h"
 
 #include <regex>
 
@@ -161,7 +160,17 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command,
     }
   }
 
-  // Second, also lastly, try `expr` as a source expression to evaluate.
+  // Second, try `expr` as a persistent variable.
+  if (expr.starts_with("$"))
+    if (auto *state = target.GetPersistentExpressionStateForLanguage(language))
+      if (auto var_sp = state->GetVariable(expr))
+        if (auto valobj_sp = var_sp->GetValueObject()) {
+          valobj_sp->Dump(result.GetOutputStream(), dump_options);
+          result.SetStatus(eReturnStatusSuccessFinishResult);
+          return;
+        }
+
+  // Third, and lastly, try `expr` as a source expression to evaluate.
   {
     auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope();
     ValueObjectSP valobj_sp;
diff --git a/lldb/source/Host/common/Alarm.cpp b/lldb/source/Host/common/Alarm.cpp
index 80c544773d76..245cdc7ae5c2 100644
--- a/lldb/source/Host/common/Alarm.cpp
+++ b/lldb/source/Host/common/Alarm.cpp
@@ -34,7 +34,7 @@ Alarm::Handle Alarm::Create(std::function<void()> callback) {
   Handle handle = INVALID_HANDLE;
 
   {
-    std::lock_guard alarm_guard(m_alarm_mutex);
+    std::lock_guard<std::mutex> alarm_guard(m_alarm_mutex);
 
     // Create a new unique entry and remember its handle.
     m_entries.emplace_back(callback, expiration);
@@ -59,7 +59,7 @@ bool Alarm::Restart(Handle handle) {
   const TimePoint expiration = GetNextExpiration();
 
   {
-    std::lock_guard alarm_guard(m_alarm_mutex);
+    std::lock_guard<std::mutex> alarm_guard(m_alarm_mutex);
 
     // Find the entry corresponding to the given handle.
     const auto it =
@@ -86,7 +86,7 @@ bool Alarm::Cancel(Handle handle) {
     return false;
 
   {
-    std::lock_guard alarm_guard(m_alarm_mutex);
+    std::lock_guard<std::mutex> alarm_guard(m_alarm_mutex);
 
     const auto it =
         std::find_if(m_entries.begin(), m_entries.end(),
@@ -126,7 +126,7 @@ void Alarm::StartAlarmThread() {
 void Alarm::StopAlarmThread() {
   if (m_alarm_thread.IsJoinable()) {
     {
-      std::lock_guard alarm_guard(m_alarm_mutex);
+      std::lock_guard<std::mutex> alarm_guard(m_alarm_mutex);
       m_exit = true;
     }
     m_alarm_cv.notify_one();
@@ -154,7 +154,7 @@ lldb::thread_result_t Alarm::AlarmThread() {
     //
     // Below we only deal with the timeout expiring and fall through for dealing
     // with the rest.
-    std::unique_lock alarm_lock(m_alarm_mutex);
+    std::unique_lock<std::mutex> alarm_lock(m_alarm_mutex);
     if (next_alarm) {
       if (!m_alarm_cv.wait_until(alarm_lock, *next_alarm, predicate)) {
         // The timeout for the next alarm expired.
diff --git a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py
index 040632096c70..c650b1e3533e 100644
--- a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py
+++ b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py
@@ -146,3 +146,15 @@ class TestCase(TestBase):
             self, "// break here", lldb.SBFileSpec("main.c")
         )
         self.expect("dwim-print (void)15", matching=False, patterns=["(?i)error"])
+
+    def test_preserves_persistent_variables(self):
+        """Test dwim-print does not delete persistent variables."""
+        self.build()
+        lldbutil.run_to_source_breakpoint(
+            self, "// break here", lldb.SBFileSpec("main.c")
+        )
+        self.expect("dwim-print int $i = 15")
+        # Run the same expression twice and verify success. This ensures the
+        # first command does not delete the persistent variable.
+        for _ in range(2):
+            self.expect("dwim-print $i", startstr="(int) 15")
diff --git a/lldb/unittests/Host/AlarmTest.cpp b/lldb/unittests/Host/AlarmTest.cpp
index e5895574376e..9f6ad189dee9 100644
--- a/lldb/unittests/Host/AlarmTest.cpp
+++ b/lldb/unittests/Host/AlarmTest.cpp
@@ -46,7 +46,7 @@ TEST(AlarmTest, Create) {
                                     ALARM_TIMEOUT);
 
     alarm.Create([&callbacks_actual, &m, i]() {
-      std::lock_guard guard(m);
+      std::lock_guard<std::mutex> guard(m);
       callbacks_actual[i] = std::chrono::system_clock::now();
     });
 
@@ -75,7 +75,7 @@ TEST(AlarmTest, Exit) {
       callbacks.emplace_back(false);
 
       handles.push_back(alarm.Create([&callbacks, &m, i]() {
-        std::lock_guard guard(m);
+        std::lock_guard<std::mutex> guard(m);
         callbacks[i] = true;
       }));
     }
@@ -101,7 +101,7 @@ TEST(AlarmTest, Cancel) {
     callbacks.emplace_back(false);
 
     handles.push_back(alarm.Create([&callbacks, &m, i]() {
-      std::lock_guard guard(m);
+      std::lock_guard<std::mutex> guard(m);
       callbacks[i] = true;
     }));
   }
@@ -137,7 +137,7 @@ TEST(AlarmTest, Restart) {
                                     ALARM_TIMEOUT);
 
     handles.push_back(alarm.Create([&callbacks_actual, &m, i]() {
-      std::lock_guard guard(m);
+      std::lock_guard<std::mutex> guard(m);
       callbacks_actual[i] = std::chrono::system_clock::now();
     }));
 
diff --git a/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp b/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp
index f237dd63ab1c..4379ffac9d74 100644
--- a/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp
+++ b/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp
@@ -102,7 +102,7 @@ protected:
     EXPECT_EQ(line, entry.line);
     EXPECT_EQ(address, entry.range.GetBaseAddress());
 
-    EXPECT_TRUE(FileSpecMatchesAsBaseOrFull(spec, entry.file));
+    EXPECT_TRUE(FileSpecMatchesAsBaseOrFull(spec, entry.GetFile()));
   }
 
   bool ContainsCompileUnit(const SymbolContextList &sc_list,
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 5610880da710..d84d9d7cca68 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -1637,8 +1637,14 @@ function(add_unittest test_suite test_name)
   # The runtime benefits of LTO don't outweight the compile time costs for tests.
   if(LLVM_ENABLE_LTO)
     if((UNIX OR MINGW) AND LINKER_IS_LLD)
-      set_property(TARGET ${test_name} APPEND_STRING PROPERTY
-                    LINK_FLAGS " -Wl,--lto-O0")
+      if(LLVM_ENABLE_FATLTO)
+        # When using FatLTO, just use relocatable linking.
+        set_property(TARGET ${test_name} APPEND_STRING PROPERTY
+                      LINK_FLAGS " -Wl,--no-fat-lto-objects")
+      else()
+        set_property(TARGET ${test_name} APPEND_STRING PROPERTY
+                      LINK_FLAGS " -Wl,--lto-O0")
+      endif()
     elseif(LINKER_IS_LLD_LINK)
       set_property(TARGET ${test_name} APPEND_STRING PROPERTY
                     LINK_FLAGS " /opt:lldlto=0")
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 745a8354f118..92fa9839db65 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -32,6 +32,8 @@ endif()
 set(LLVM_ENABLE_LTO OFF CACHE STRING "Build LLVM with LTO. May be specified as Thin or Full to use a particular kind of LTO")
 string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO)
 
+option(LLVM_ENABLE_FATLTO "Build LLVM with -ffat-lto-objects." OFF)
+
 # Ninja Job Pool support
 # The following only works with the Ninja generator in CMake >= 3.0.
 set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
@@ -1280,6 +1282,13 @@ elseif(LLVM_ENABLE_LTO)
   endif()
 endif()
 
+if(LLVM_ENABLE_FATLTO AND (FUCHSIA OR UNIX))
+  append("-ffat-lto-objects" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+  if(NOT LINKER_IS_LLD_LINK)
+    append("-ffat-lto-objects" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS)
+  endif()
+endif()
+
 # Set an AIX default for LLVM_EXPORT_SYMBOLS_FOR_PLUGINS based on whether we are
 # doing dynamic linking (see below).
 set(LLVM_EXPORT_SYMBOLS_FOR_PLUGINS_AIX_default OFF)
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 00536c71c3e2..5c72f06f96ed 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -21,7 +21,8 @@ def int_dx_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">,
     Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>;
 
 def int_dx_any  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>;
-
+def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
+def int_dx_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; 
 def int_dx_dot : 
     Intrinsic<[LLVMVectorElementType<0>], 
     [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 6822cfdb4957..8817a2585646 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -626,6 +626,52 @@ public:
   void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM,
                                                     OptimizationLevel Level);
 
+  static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
+    if (!Name.consume_front(PassName))
+      return false;
+    // normal pass name w/o parameters == default parameters
+    if (Name.empty())
+      return true;
+    return Name.starts_with("<") && Name.ends_with(">");
+  }
+
+  /// This performs customized parsing of pass name with parameters.
+  ///
+  /// We do not need parametrization of passes in textual pipeline very often,
+  /// yet on a rare occasion ability to specify parameters right there can be
+  /// useful.
+  ///
+  /// \p Name - parameterized specification of a pass from a textual pipeline
+  /// is a string in a form of :
+  ///      PassName '<' parameter-list '>'
+  ///
+  /// Parameter list is being parsed by the parser callable argument, \p Parser,
+  /// It takes a string-ref of parameters and returns either StringError or a
+  /// parameter list in a form of a custom parameters type, all wrapped into
+  /// Expected<> template class.
+  ///
+  template <typename ParametersParseCallableT>
+  static auto parsePassParameters(ParametersParseCallableT &&Parser,
+                                  StringRef Name, StringRef PassName)
+      -> decltype(Parser(StringRef{})) {
+    using ParametersT = typename decltype(Parser(StringRef{}))::value_type;
+
+    StringRef Params = Name;
+    if (!Params.consume_front(PassName)) {
+      llvm_unreachable(
+          "unable to strip pass name from parametrized pass specification");
+    }
+    if (!Params.empty() &&
+        (!Params.consume_front("<") || !Params.consume_back(">"))) {
+      llvm_unreachable("invalid format for parametrized pass name");
+    }
+
+    Expected<ParametersT> Result = Parser(Params);
+    assert((Result || Result.template errorIsA<StringError>()) &&
+           "Pass parameter parser can only return StringErrors.");
+    return Result;
+  }
+
 private:
   // O1 pass pipeline
   FunctionPassManager
diff --git a/llvm/include/llvm/TextAPI/Record.h b/llvm/include/llvm/TextAPI/Record.h
index 98639b064eaa..ef152ce43387 100644
--- a/llvm/include/llvm/TextAPI/Record.h
+++ b/llvm/include/llvm/TextAPI/Record.h
@@ -51,7 +51,8 @@ class Record {
 public:
   Record() = default;
   Record(StringRef Name, RecordLinkage Linkage, SymbolFlags Flags)
-      : Name(Name), Linkage(Linkage), Flags(mergeFlags(Flags, Linkage)) {}
+      : Name(Name), Linkage(Linkage), Flags(mergeFlags(Flags, Linkage)),
+        Verified(false) {}
 
   bool isWeakDefined() const {
     return (Flags & SymbolFlags::WeakDefined) == SymbolFlags::WeakDefined;
@@ -79,6 +80,9 @@ public:
   bool isExported() const { return Linkage >= RecordLinkage::Rexported; }
   bool isRexported() const { return Linkage == RecordLinkage::Rexported; }
 
+  bool isVerified() const { return Verified; }
+  void setVerify(bool V = true) { Verified = V; }
+
   StringRef getName() const { return Name; }
   SymbolFlags getFlags() const { return Flags; }
 
@@ -89,6 +93,7 @@ protected:
   StringRef Name;
   RecordLinkage Linkage;
   SymbolFlags Flags;
+  bool Verified;
 
   friend class RecordsSlice;
 };
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 4d1eb10d2d41..0204730f750a 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -503,15 +503,6 @@ static std::optional<int> parseDevirtPassName(StringRef Name) {
   return Count;
 }
 
-static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
-  if (!Name.consume_front(PassName))
-    return false;
-  // normal pass name w/o parameters == default parameters
-  if (Name.empty())
-    return true;
-  return Name.starts_with("<") && Name.ends_with(">");
-}
-
 static std::optional<OptimizationLevel> parseOptLevel(StringRef S) {
   return StringSwitch<std::optional<OptimizationLevel>>(S)
       .Case("O0", OptimizationLevel::O0)
@@ -525,42 +516,6 @@ static std::optional<OptimizationLevel> parseOptLevel(StringRef S) {
 
 namespace {
 
-/// This performs customized parsing of pass name with parameters.
-///
-/// We do not need parametrization of passes in textual pipeline very often,
-/// yet on a rare occasion ability to specify parameters right there can be
-/// useful.
-///
-/// \p Name - parameterized specification of a pass from a textual pipeline
-/// is a string in a form of :
-///      PassName '<' parameter-list '>'
-///
-/// Parameter list is being parsed by the parser callable argument, \p Parser,
-/// It takes a string-ref of parameters and returns either StringError or a
-/// parameter list in a form of a custom parameters type, all wrapped into
-/// Expected<> template class.
-///
-template <typename ParametersParseCallableT>
-auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
-                         StringRef PassName) -> decltype(Parser(StringRef{})) {
-  using ParametersT = typename decltype(Parser(StringRef{}))::value_type;
-
-  StringRef Params = Name;
-  if (!Params.consume_front(PassName)) {
-    assert(false &&
-           "unable to strip pass name from parametrized pass specification");
-  }
-  if (!Params.empty() &&
-      (!Params.consume_front("<") || !Params.consume_back(">"))) {
-    assert(false && "invalid format for parametrized pass name");
-  }
-
-  Expected<ParametersT> Result = Parser(Params);
-  assert((Result || Result.template errorIsA<StringError>()) &&
-         "Pass parameter parser can only return StringErrors.");
-  return Result;
-}
-
 /// Parser of parameters for HardwareLoops  pass.
 Expected<HardwareLoopOptions> parseHardwareLoopOptions(StringRef Params) {
   HardwareLoopOptions HardwareLoopOpts;
@@ -1196,7 +1151,7 @@ static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
   if (Name == NAME)                                                            \
     return true;
 #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)      \
-  if (checkParametrizedPassName(Name, NAME))                                   \
+  if (PassBuilder::checkParametrizedPassName(Name, NAME))                      \
     return true;
 #define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
   if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
@@ -1225,7 +1180,7 @@ static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
   if (Name == NAME)                                                            \
     return true;
 #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)       \
-  if (checkParametrizedPassName(Name, NAME))                                   \
+  if (PassBuilder::checkParametrizedPassName(Name, NAME))                      \
     return true;
 #define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
   if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
@@ -1252,7 +1207,7 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
   if (Name == NAME)                                                            \
     return true;
 #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)    \
-  if (checkParametrizedPassName(Name, NAME))                                   \
+  if (PassBuilder::checkParametrizedPassName(Name, NAME))                      \
     return true;
 #define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
   if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
@@ -1293,7 +1248,7 @@ static bool isLoopNestPassName(StringRef Name, CallbacksT &Callbacks,
   if (parseRepeatPassName(Name))
     return true;
 
-  if (checkParametrizedPassName(Name, "lnicm")) {
+  if (PassBuilder::checkParametrizedPassName(Name, "lnicm")) {
     UseMemorySSA = true;
     return true;
   }
@@ -1315,7 +1270,7 @@ static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks,
   if (parseRepeatPassName(Name))
     return true;
 
-  if (checkParametrizedPassName(Name, "licm")) {
+  if (PassBuilder::checkParametrizedPassName(Name, "licm")) {
     UseMemorySSA = true;
     return true;
   }
@@ -1324,7 +1279,7 @@ static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks,
   if (Name == NAME)                                                            \
     return true;
 #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)        \
-  if (checkParametrizedPassName(Name, NAME))                                   \
+  if (PassBuilder::checkParametrizedPassName(Name, NAME))                      \
     return true;
 #define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
   if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index b0e587d2e7ea..216fa5b10c8f 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -274,8 +274,18 @@ def Round : DXILOpMapping<26, unary, int_round,
                          "Returns the input rounded to the nearest integer"
                          "within a floating-point type.",
                          [llvm_halforfloat_ty, LLVMMatchType<0>]>;
+def FMax : DXILOpMapping<35, binary, int_maxnum,
+                         "Float maximum. FMax(a,b) = a > b ? a : b">;
+def FMin : DXILOpMapping<36, binary, int_minnum,
+                         "Float minimum. FMin(a,b) = a < b ? a : b">;
+def SMax : DXILOpMapping<37, binary, int_smax,
+                         "Signed integer maximum. SMax(a,b) = a > b ? a : b">;
+def SMin : DXILOpMapping<38, binary, int_smin,
+                         "Signed integer minimum. SMin(a,b) = a < b ? a : b">;
 def UMax : DXILOpMapping<39, binary, int_umax,
                          "Unsigned integer maximum. UMax(a,b) = a > b ? a : b">;
+def UMin : DXILOpMapping<40, binary, int_umin,
+                         "Unsigned integer minimum. UMin(a,b) = a < b ? a : b">;
 def FMad : DXILOpMapping<46, tertiary, int_fmuladd,
                          "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m * a + b.">;
 def IMad : DXILOpMapping<48, tertiary, int_dx_imad,
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 0461f0490017..bc38c10a1fce 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -35,6 +35,8 @@ static bool isIntrinsicExpansion(Function &F) {
   switch (F.getIntrinsicID()) {
   case Intrinsic::exp:
   case Intrinsic::dx_any:
+  case Intrinsic::dx_clamp:
+  case Intrinsic::dx_uclamp:
   case Intrinsic::dx_lerp:
   case Intrinsic::dx_rcp:
     return true;
@@ -132,12 +134,59 @@ static bool expandRcpIntrinsic(CallInst *Orig) {
   return true;
 }
 
+static Intrinsic::ID getMaxForClamp(Type *ElemTy,
+                                    Intrinsic::ID ClampIntrinsic) {
+  if (ClampIntrinsic == Intrinsic::dx_uclamp)
+    return Intrinsic::umax;
+  assert(ClampIntrinsic == Intrinsic::dx_clamp);
+  if (ElemTy->isVectorTy())
+    ElemTy = ElemTy->getScalarType();
+  if (ElemTy->isIntegerTy())
+    return Intrinsic::smax;
+  assert(ElemTy->isFloatingPointTy());
+  return Intrinsic::maxnum;
+}
+
+static Intrinsic::ID getMinForClamp(Type *ElemTy,
+                                    Intrinsic::ID ClampIntrinsic) {
+  if (ClampIntrinsic == Intrinsic::dx_uclamp)
+    return Intrinsic::umin;
+  assert(ClampIntrinsic == Intrinsic::dx_clamp);
+  if (ElemTy->isVectorTy())
+    ElemTy = ElemTy->getScalarType();
+  if (ElemTy->isIntegerTy())
+    return Intrinsic::smin;
+  assert(ElemTy->isFloatingPointTy());
+  return Intrinsic::minnum;
+}
+
+static bool expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic) {
+  Value *X = Orig->getOperand(0);
+  Value *Min = Orig->getOperand(1);
+  Value *Max = Orig->getOperand(2);
+  Type *Ty = X->getType();
+  IRBuilder<> Builder(Orig->getParent());
+  Builder.SetInsertPoint(Orig);
+  auto *MaxCall = Builder.CreateIntrinsic(
+      Ty, getMaxForClamp(Ty, ClampIntrinsic), {X, Min}, nullptr, "dx.max");
+  auto *MinCall =
+      Builder.CreateIntrinsic(Ty, getMinForClamp(Ty, ClampIntrinsic),
+                              {MaxCall, Max}, nullptr, "dx.min");
+
+  Orig->replaceAllUsesWith(MinCall);
+  Orig->eraseFromParent();
+  return true;
+}
+
 static bool expandIntrinsic(Function &F, CallInst *Orig) {
   switch (F.getIntrinsicID()) {
   case Intrinsic::exp:
     return expandExpIntrinsic(Orig);
   case Intrinsic::dx_any:
     return expandAnyIntrinsic(Orig);
+  case Intrinsic::dx_uclamp:
+  case Intrinsic::dx_clamp:
+    return expandClampIntrinsic(Orig, F.getIntrinsicID());
   case Intrinsic::dx_lerp:
     return expandLerpIntrinsic(Orig);
   case Intrinsic::dx_rcp:
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index adfcea736158..bdf299ae8ac1 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -1255,28 +1255,6 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
     emitRRI(Mips::SD, GPReg, Mips::SP, RegOrOffset, SMLoc(), &STI);
   }
 
-#if 0
-  // We haven't support -mabicalls -mno-shared yet.
-  if (-mno-shared) {
-    MCSymbol *GPSym = MCA.getContext().getOrCreateSymbol("__gnu_local_gp");
-    const MipsMCExpr *HiExpr = MipsMCExpr::create(
-        MipsMCExpr::MEK_HI, MCSymbolRefExpr::create(GPSym, MCA.getContext()),
-        MCA.getContext());
-    const MipsMCExpr *LoExpr = MipsMCExpr::create(
-        MipsMCExpr::MEK_LO, MCSymbolRefExpr::create(GPSym, MCA.getContext()),
-        MCA.getContext());
-
-    // lui $gp, %hi(__gnu_local_gp)
-    emitRX(Mips::LUi, GPReg, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
-
-    // addiu  $gp, $gp, %lo(__gnu_local_gp)
-    emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(),
-            &STI);
-
-    return;
-  }
-#endif
-
   const MipsMCExpr *HiExpr = MipsMCExpr::createGpOff(
       MipsMCExpr::MEK_HI, MCSymbolRefExpr::create(&Sym, MCA.getContext()),
       MCA.getContext());
diff --git a/llvm/lib/TextAPI/TextStub.cpp b/llvm/lib/TextAPI/TextStub.cpp
index 24a52607a981..0f742523f820 100644
--- a/llvm/lib/TextAPI/TextStub.cpp
+++ b/llvm/lib/TextAPI/TextStub.cpp
@@ -1079,16 +1079,16 @@ Expected<FileType> TextAPIReader::canRead(MemoryBufferRef InputBuffer) {
   if (!TAPIFile.ends_with("..."))
     return createStringError(std::errc::not_supported, "unsupported file type");
 
-  if (TAPIFile.starts_with("--- !tapi-tbd\n"))
+  if (TAPIFile.starts_with("--- !tapi-tbd"))
     return FileType::TBD_V4;
 
-  if (TAPIFile.starts_with("--- !tapi-tbd-v3\n"))
+  if (TAPIFile.starts_with("--- !tapi-tbd-v3"))
     return FileType::TBD_V3;
 
-  if (TAPIFile.starts_with("--- !tapi-tbd-v2\n"))
+  if (TAPIFile.starts_with("--- !tapi-tbd-v2"))
     return FileType::TBD_V2;
 
-  if (TAPIFile.starts_with("--- !tapi-tbd-v1\n") ||
+  if (TAPIFile.starts_with("--- !tapi-tbd-v1") ||
       TAPIFile.starts_with("---\narchs:"))
     return FileType::TBD_V1;
 
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index e66a1a2ccb31..1caed93b1b66 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -96,6 +96,11 @@ static cl::opt<bool>
                     cl::desc("View the CFG before DFA Jump Threading"),
                     cl::Hidden, cl::init(false));
 
+static cl::opt<bool> EarlyExitHeuristic(
+    "dfa-early-exit-heuristic",
+    cl::desc("Exit early if an unpredictable value come from the same loop"),
+    cl::Hidden, cl::init(true));
+
 static cl::opt<unsigned> MaxPathLength(
     "dfa-max-path-length",
     cl::desc("Max number of blocks searched to find a threading path"),
@@ -405,7 +410,7 @@ private:
   ///
   /// Also, collect select instructions to unfold.
   bool isCandidate(const SwitchInst *SI) {
-    std::deque<Value *> Q;
+    std::deque<std::pair<Value *, BasicBlock *>> Q;
     SmallSet<Value *, 16> SeenValues;
     SelectInsts.clear();
 
@@ -415,25 +420,28 @@ private:
       return false;
 
     // The switch must be in a loop.
-    if (!LI->getLoopFor(SI->getParent()))
+    const Loop *L = LI->getLoopFor(SI->getParent());
+    if (!L)
       return false;
 
-    addToQueue(SICond, Q, SeenValues);
+    addToQueue(SICond, nullptr, Q, SeenValues);
 
     while (!Q.empty()) {
-      Value *Current = Q.front();
+      Value *Current = Q.front().first;
+      BasicBlock *CurrentIncomingBB = Q.front().second;
       Q.pop_front();
 
       if (auto *Phi = dyn_cast<PHINode>(Current)) {
-        for (Value *Incoming : Phi->incoming_values()) {
-          addToQueue(Incoming, Q, SeenValues);
+        for (BasicBlock *IncomingBB : Phi->blocks()) {
+          Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB);
+          addToQueue(Incoming, IncomingBB, Q, SeenValues);
         }
         LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
       } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
         if (!isValidSelectInst(SelI))
           return false;
-        addToQueue(SelI->getTrueValue(), Q, SeenValues);
-        addToQueue(SelI->getFalseValue(), Q, SeenValues);
+        addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues);
+        addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues);
         LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
         if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
           SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
@@ -446,6 +454,18 @@ private:
         // initial switch values that can be ignored (they will hit the
         // unthreaded switch) but this assumption will get checked later after
         // paths have been enumerated (in function getStateDefMap).
+
+        // If the unpredictable value comes from the same inner loop it is
+        // likely that it will also be on the enumerated paths, causing us to
+        // exit after we have enumerated all the paths. This heuristic save
+        // compile time because a search for all the paths can become expensive.
+        if (EarlyExitHeuristic &&
+            L->contains(LI->getLoopFor(CurrentIncomingBB))) {
+          LLVM_DEBUG(dbgs()
+                     << "\tExiting early due to unpredictability heuristic.\n");
+          return false;
+        }
+
         continue;
       }
     }
@@ -453,11 +473,12 @@ private:
     return true;
   }
 
-  void addToQueue(Value *Val, std::deque<Value *> &Q,
+  void addToQueue(Value *Val, BasicBlock *BB,
+                  std::deque<std::pair<Value *, BasicBlock *>> &Q,
                   SmallSet<Value *, 16> &SeenValues) {
     if (SeenValues.contains(Val))
       return;
-    Q.push_back(Val);
+    Q.push_back({Val, BB});
     SeenValues.insert(Val);
   }
 
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 67c011b747ac..e991296bd2fb 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -19,6 +19,7 @@
 
 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -990,12 +991,15 @@ public:
     bool Changed = false;
     SmallVector<CallInst *, 16> MaybeFusableInsts;
     SmallVector<Instruction *, 16> MatrixInsts;
+    SmallVector<IntrinsicInst *, 16> LifetimeEnds;
 
     // First, collect all instructions with shape information and candidates for
     // fusion (currently only matrix multiplies).
     ReversePostOrderTraversal<Function *> RPOT(&Func);
     for (auto *BB : RPOT)
       for (Instruction &I : *BB) {
+        if (match(&I, m_Intrinsic<Intrinsic::lifetime_end>()))
+          LifetimeEnds.push_back(cast<IntrinsicInst>(&I));
         if (ShapeMap.find(&I) == ShapeMap.end())
           continue;
         if (match(&I, m_Intrinsic<Intrinsic::matrix_multiply>()))
@@ -1010,7 +1014,7 @@ public:
 
     // Third, try to fuse candidates.
     for (CallInst *CI : MaybeFusableInsts)
-      LowerMatrixMultiplyFused(CI, FusedInsts);
+      LowerMatrixMultiplyFused(CI, FusedInsts, LifetimeEnds);
 
     Changed = !FusedInsts.empty();
 
@@ -1856,8 +1860,10 @@ public:
   ///
   /// Call finalizeLowering on lowered instructions.  Instructions that are
   /// completely eliminated by fusion are added to \p FusedInsts.
-  void LowerMatrixMultiplyFused(CallInst *MatMul,
-                                SmallPtrSetImpl<Instruction *> &FusedInsts) {
+  void
+  LowerMatrixMultiplyFused(CallInst *MatMul,
+                           SmallPtrSetImpl<Instruction *> &FusedInsts,
+                           SmallVector<IntrinsicInst *, 16> &LifetimeEnds) {
     if (!FuseMatrix || !DT)
       return;
 
@@ -1946,6 +1952,55 @@ public:
       for (Instruction *I : ToHoist)
         I->moveBefore(MatMul);
 
+      // Deal with lifetime.end calls that might be between Load0/Load1 and the
+      // store. To avoid introducing loads to dead objects (i.e. after the
+      // lifetime has been termined by @llvm.lifetime.end), either sink them
+      // after the store if in the same block, or remove the lifetime.end marker
+      // otherwise. This might pessimize further optimizations, by extending the
+      // lifetime of the object until the function returns, but should be
+      // conservatively correct.
+      MemoryLocation Load0Loc = MemoryLocation::get(LoadOp0);
+      MemoryLocation Load1Loc = MemoryLocation::get(LoadOp1);
+      BasicBlock *StoreParent = Store->getParent();
+      bool FusableOpsInSameBlock = LoadOp0->getParent() == StoreParent &&
+                                   LoadOp1->getParent() == StoreParent;
+      for (unsigned Idx = 0; Idx != LifetimeEnds.size();) {
+        IntrinsicInst *End = LifetimeEnds[Idx];
+        auto Inc = make_scope_exit([&Idx]() { Idx++; });
+        // If the lifetime.end is guaranteed to be before the loads or after the
+        // store, it won't interfere with fusion.
+        if (DT->dominates(End, LoadOp0) && DT->dominates(End, LoadOp1))
+          continue;
+        if (DT->dominates(Store, End))
+          continue;
+        // If all fusable ops are in the same block and the lifetime.end is in a
+        // different block, it won't interfere with fusion.
+        if (FusableOpsInSameBlock && End->getParent() != StoreParent)
+          continue;
+
+        // If the loads don't alias the lifetime.end, it won't interfere with
+        // fusion.
+        MemoryLocation EndLoc = MemoryLocation::getForArgument(End, 1, nullptr);
+        if (!EndLoc.Ptr)
+          continue;
+        if (AA->isNoAlias(Load0Loc, EndLoc) && AA->isNoAlias(Load1Loc, EndLoc))
+          continue;
+
+        // If both lifetime.end and the store are in the same block, extend the
+        // lifetime until after the store, so the new lifetime covers the loads
+        // we introduce later.
+        if (End->getParent() == StoreParent) {
+          End->moveAfter(Store);
+          continue;
+        }
+
+        // Otherwise remove the conflicting lifetime.end marker.
+        ToRemove.push_back(End);
+        std::swap(LifetimeEnds[Idx], LifetimeEnds.back());
+        LifetimeEnds.pop_back();
+        Inc.release();
+      }
+
       emitSIMDTiling(MatMul, LoadOp0, LoadOp1, Store, FusedInsts);
       return;
     }
diff --git a/llvm/test/CodeGen/DirectX/clamp-vec.ll b/llvm/test/CodeGen/DirectX/clamp-vec.ll
new file mode 100644
index 000000000000..d4f33a18b715
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/clamp-vec.ll
@@ -0,0 +1,74 @@
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s
+
+; Make sure dxil operation function calls for clamp are generated for float/int/uint vectors.
+
+; CHECK-LABEL: clamp_half3
+define noundef <3 x half> @clamp_half3(<3 x half> noundef %a, <3 x half> noundef %b, <3 x half> noundef %c) {
+entry:
+  ; CHECK: call <3 x half> @llvm.maxnum.v3f16(<3 x half>  %a, <3 x half>  %b)
+  ; CHECK: call <3 x half> @llvm.minnum.v3f16(<3 x half>  %{{.*}}, <3 x half>  %c)
+  %dx.clamp = call <3 x half> @llvm.dx.clamp.v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c)
+  ret <3 x half> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_float4
+define noundef <4 x float> @clamp_float4(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) {
+entry:
+  ; CHECK: call <4 x float> @llvm.maxnum.v4f32(<4 x float>  %a, <4 x float>  %b)
+  ; CHECK: call <4 x float> @llvm.minnum.v4f32(<4 x float>  %{{.*}}, <4 x float>  %c)
+  %dx.clamp = call <4 x float> @llvm.dx.clamp.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+  ret <4 x float> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_double2
+define noundef <2 x double> @clamp_double2(<2 x double> noundef %a, <2 x double> noundef %b, <2 x double> noundef %c) {
+entry:
+  ; CHECK: call <2 x double> @llvm.maxnum.v2f64(<2 x double>  %a, <2 x double>  %b)
+  ; CHECK: call <2 x double> @llvm.minnum.v2f64(<2 x double>  %{{.*}}, <2 x double>  %c)
+  %dx.clamp = call <2 x double> @llvm.dx.clamp.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+  ret <2 x double> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_int4
+define noundef <4 x i32> @clamp_int4(<4 x i32> noundef %a, <4 x i32> noundef %b, <4 x i32> noundef %c) {
+entry:
+  ; CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
+  ; CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %c)
+  %dx.clamp = call <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_uint16_t3
+define noundef <3 x i16> @clamp_uint16_t3(<3 x i16> noundef %a, <3 x i16> noundef %b, <3 x i16> noundef %c) {
+entry:
+  ; CHECK: call <3 x i16> @llvm.umax.v3i16(<3 x i16>  %a, <3 x i16>  %b)
+  ; CHECK: call <3 x i16> @llvm.umin.v3i16(<3 x i16>  %{{.*}}, <3 x i16>  %c)
+  %dx.clamp = call <3 x i16> @llvm.dx.uclamp.v3i16(<3 x i16> %a, <3 x i16> %b, <3 x i16> %c)
+  ret <3 x i16> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_uint4
+define noundef <4 x i32> @clamp_uint4(<4 x i32> noundef %a, <4 x i32> noundef %b, <4 x i32> noundef %c) {
+entry:
+  ; CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32>  %a, <4 x i32>  %b)
+  ; CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32>  %{{.*}}, <4 x i32>  %c)
+  %dx.clamp = call <4 x i32> @llvm.dx.uclamp.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_uint64_t4
+define noundef <2 x i64> @clamp_uint64_t4(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) {
+entry:
+  ; CHECK: call <2 x i64> @llvm.umax.v2i64(<2 x i64>  %a, <2 x i64>  %b)
+  ; CHECK: call <2 x i64> @llvm.umin.v2i64(<2 x i64>  %{{.*}}, <2 x i64>  %c)
+  %dx.clamp = call <2 x i64> @llvm.dx.uclamp.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+  ret <2 x i64> %dx.clamp
+}
+
+declare <3 x half> @llvm.dx.clamp.v3f16(<3 x half>, <3 x half>, <3 x half>)
+declare <4 x float> @llvm.dx.clamp.v4f32(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.dx.clamp.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <3 x i16> @llvm.dx.uclamp.v3i32(<3 x i16>, <3 x i32>, <3 x i16>)
+declare <4 x i32> @llvm.dx.uclamp.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.dx.uclamp.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
diff --git a/llvm/test/CodeGen/DirectX/clamp.ll b/llvm/test/CodeGen/DirectX/clamp.ll
new file mode 100644
index 000000000000..f122313b8d7d
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/clamp.ll
@@ -0,0 +1,94 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for clamp/uclamp are generated for half/float/double/i16/i32/i64.
+
+; CHECK-LABEL:test_clamp_i16
+define noundef i16 @test_clamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.dx.clamp.i16(i16 %a, i16 %b, i16 %c)
+  ret i16 %0
+}
+
+; CHECK-LABEL:test_clamp_i32
+define noundef i32 @test_clamp_i32(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}})
+  %0 = call i32 @llvm.dx.clamp.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %0
+}
+
+; CHECK-LABEL:test_clamp_i64
+define noundef i64 @test_clamp_i64(i64 noundef %a, i64 noundef %b, i64 noundef %c) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %a, i64 %b)
+; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %c)
+  %0 = call i64 @llvm.dx.clamp.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %0
+}
+
+; CHECK-LABEL:test_clamp_half
+define noundef half @test_clamp_half(half noundef %a, half noundef %b, half noundef %c) {
+entry:
+; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}})
+; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}})
+  %0 = call half @llvm.dx.clamp.f16(half %a, half %b, half %c)
+  ret half %0
+}
+
+; CHECK-LABEL:test_clamp_float
+define noundef float @test_clamp_float(float noundef %a, float noundef %b, float noundef %c) {
+entry:
+; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}})
+; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}})
+  %0 = call float @llvm.dx.clamp.f32(float %a, float %b, float %c)
+  ret float %0
+}
+
+; CHECK-LABEL:test_clamp_double
+define noundef double @test_clamp_double(double noundef %a, double noundef %b, double noundef %c) {
+entry:
+; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}})
+; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}})
+  %0 = call double @llvm.dx.clamp.f64(double %a, double %b, double %c)
+  ret double %0
+}
+
+; CHECK-LABEL:test_uclamp_i16
+define noundef i16 @test_uclamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.dx.uclamp.i16(i16 %a, i16 %b, i16 %c)
+  ret i16 %0
+}
+
+; CHECK-LABEL:test_uclamp_i32
+define noundef i32 @test_uclamp_i32(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}})
+  %0 = call i32 @llvm.dx.uclamp.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %0
+}
+
+; CHECK-LABEL:test_uclamp_i64
+define noundef i64 @test_uclamp_i64(i64 noundef %a, i64 noundef %b, i64 noundef %c) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %a, i64 %b)
+; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %c)
+  %0 = call i64 @llvm.dx.uclamp.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %0
+}
+
+declare half @llvm.dx.clamp.f16(half, half, half)
+declare float @llvm.dx.clamp.f32(float, float, float)
+declare double @llvm.dx.clamp.f64(double, double, double)
+declare i16 @llvm.dx.clamp.i16(i16, i16, i16)
+declare i32 @llvm.dx.clamp.i32(i32, i32, i32)
+declare i64 @llvm.dx.clamp.i64(i64, i64, i64)
+declare i16 @llvm.dx.uclamp.i16(i16, i16, i16)
+declare i32 @llvm.dx.uclamp.i32(i32, i32, i32)
+declare i64 @llvm.dx.uclamp.i64(i64, i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/fmax.ll b/llvm/test/CodeGen/DirectX/fmax.ll
new file mode 100644
index 000000000000..aff722c29309
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/fmax.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for fmax are generated for half/float/double.
+
+; CHECK-LABEL:test_fmax_half
+define noundef half @test_fmax_half(half noundef %a, half noundef %b) {
+entry:
+; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}})
+  %0 = call half @llvm.maxnum.f16(half %a, half %b)
+  ret half %0
+}
+
+; CHECK-LABEL:test_fmax_float
+define noundef float @test_fmax_float(float noundef %a, float noundef %b) {
+entry:
+; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}})
+  %0 = call float @llvm.maxnum.f32(float %a, float %b)
+  ret float %0
+}
+
+; CHECK-LABEL:test_fmax_double
+define noundef double @test_fmax_double(double noundef %a, double noundef %b) {
+entry:
+; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}})
+  %0 = call double @llvm.maxnum.f64(double %a, double %b)
+  ret double %0
+}
+
+declare half @llvm.maxnum.f16(half, half)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.maxnum.f64(double, double)
diff --git a/llvm/test/CodeGen/DirectX/fmin.ll b/llvm/test/CodeGen/DirectX/fmin.ll
new file mode 100644
index 000000000000..2f7c209f0278
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/fmin.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for fmin are generated for half/float/double.
+
+; CHECK-LABEL:test_fmin_half
+define noundef half @test_fmin_half(half noundef %a, half noundef %b) {
+entry:
+; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}})
+  %0 = call half @llvm.minnum.f16(half %a, half %b)
+  ret half %0
+}
+
+; CHECK-LABEL:test_fmin_float
+define noundef float @test_fmin_float(float noundef %a, float noundef %b) {
+entry:
+; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}})
+  %0 = call float @llvm.minnum.f32(float %a, float %b)
+  ret float %0
+}
+
+; CHECK-LABEL:test_fmin_double
+define noundef double @test_fmin_double(double noundef %a, double noundef %b) {
+entry:
+; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}})
+  %0 = call double @llvm.minnum.f64(double %a, double %b)
+  ret double %0
+}
+
+declare half @llvm.minnum.f16(half, half)
+declare float @llvm.minnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
diff --git a/llvm/test/CodeGen/DirectX/smax.ll b/llvm/test/CodeGen/DirectX/smax.ll
new file mode 100644
index 000000000000..8b2406782c09
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/smax.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for smax are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_smax_i16
+define noundef i16 @test_smax_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.smax.i16(i16 %a, i16 %b)
+  ret i16 %0
+}
+
+; CHECK-LABEL:test_smax_i32
+define noundef i32 @test_smax_i32(i32 noundef %a, i32 noundef %b) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}})
+  %0 = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+  ret i32 %0
+}
+
+; CHECK-LABEL:test_smax_i64
+define noundef i64 @test_smax_i64(i64 noundef %a, i64 noundef %b) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %{{.*}}, i64 %{{.*}})
+  %0 = call i64 @llvm.smax.i64(i64 %a, i64 %b)
+  ret i64 %0
+}
+
+declare i16 @llvm.smax.i16(i16, i16)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i64 @llvm.smax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/smin.ll b/llvm/test/CodeGen/DirectX/smin.ll
new file mode 100644
index 000000000000..b2b40a1b6243
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/smin.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for smin are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_smin_i16
+define noundef i16 @test_smin_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.smin.i16(i16 %a, i16 %b)
+  ret i16 %0
+}
+
+; CHECK-LABEL:test_smin_i32
+define noundef i32 @test_smin_i32(i32 noundef %a, i32 noundef %b) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}})
+  %0 = call i32 @llvm.smin.i32(i32 %a, i32 %b)
+  ret i32 %0
+}
+
+; CHECK-LABEL:test_smin_i64
+define noundef i64 @test_smin_i64(i64 noundef %a, i64 noundef %b) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %{{.*}})
+  %0 = call i64 @llvm.smin.i64(i64 %a, i64 %b)
+  ret i64 %0
+}
+
+declare i16 @llvm.smin.i16(i16, i16)
+declare i32 @llvm.smin.i32(i32, i32)
+declare i64 @llvm.smin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/umax.ll b/llvm/test/CodeGen/DirectX/umax.ll
index c7b6a8759927..be0f557fc8da 100644
--- a/llvm/test/CodeGen/DirectX/umax.ll
+++ b/llvm/test/CodeGen/DirectX/umax.ll
@@ -1,30 +1,31 @@
 ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
 
-; Make sure dxil operation function calls for umax are generated for i32/i64.
+; Make sure dxil operation function calls for umax are generated for i16/i32/i64.
 
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.7-library"
+; CHECK-LABEL:test_umax_i16
+define noundef i16 @test_umax_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.umax.i16(i16 %a, i16 %b)
+  ret i16 %0
+}
 
 ; CHECK-LABEL:test_umax_i32
-; Function Attrs: noinline nounwind optnone
-define noundef i32 @test_umax_i32(i32 noundef %a, i32 noundef %b) #0 {
+define noundef i32 @test_umax_i32(i32 noundef %a, i32 noundef %b) {
 entry:
-; CHECK:call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
   %0 = call i32 @llvm.umax.i32(i32 %a, i32 %b)
   ret i32 %0
 }
 
 ; CHECK-LABEL:test_umax_i64
-define noundef i64 @test_umax_i64(i64 noundef %a, i64 noundef %b) #0 {
+define noundef i64 @test_umax_i64(i64 noundef %a, i64 noundef %b) {
 entry:
-; CHECK:call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}})
   %0 = call i64 @llvm.umax.i64(i64 %a, i64 %b)
   ret i64 %0
 }
 
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare i32 @llvm.umax.i32(i32, i32) #1
-declare i64 @llvm.umax.i64(i64, i64) #1
-
-attributes #0 = { noinline nounwind }
-attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+declare i16 @llvm.umax.i16(i16, i16)
+declare i32 @llvm.umax.i32(i32, i32)
+declare i64 @llvm.umax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/umin.ll b/llvm/test/CodeGen/DirectX/umin.ll
new file mode 100644
index 000000000000..5051c7117448
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/umin.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for umin are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_umin_i16
+define noundef i16 @test_umin_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.umin.i16(i16 %a, i16 %b)
+  ret i16 %0
+}
+
+; CHECK-LABEL:test_umin_i32
+define noundef i32 @test_umin_i32(i32 noundef %a, i32 noundef %b) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}})
+  %0 = call i32 @llvm.umin.i32(i32 %a, i32 %b)
+  ret i32 %0
+}
+
+; CHECK-LABEL:test_umin_i64
+define noundef i64 @test_umin_i64(i64 noundef %a, i64 noundef %b) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %{{.*}})
+  %0 = call i64 @llvm.umin.i64(i64 %a, i64 %b)
+  ret i64 %0
+}
+
+declare i16 @llvm.umin.i16(i16, i16)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i64 @llvm.umin.i64(i64, i64)
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
index df725b9a7fa4..696bd55d2dfd 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false %s | FileCheck %s
 
 ; These tests check if selects are unfolded properly for jump threading
 ; opportunities. There are three different patterns to consider:
diff --git a/llvm/test/Transforms/DFAJumpThreading/unpredictable-heuristic.ll b/llvm/test/Transforms/DFAJumpThreading/unpredictable-heuristic.ll
new file mode 100644
index 000000000000..9743f0acc816
--- /dev/null
+++ b/llvm/test/Transforms/DFAJumpThreading/unpredictable-heuristic.ll
@@ -0,0 +1,124 @@
+; REQUIRES: asserts
+; RUN: opt -S -passes=dfa-jump-threading %s -debug-only=dfa-jump-threading 2>&1 | FileCheck %s
+
+; CHECK-COUNT-3: Exiting early due to unpredictability heuristic.
+
+@.str.1 = private unnamed_addr constant [3 x i8] c"10\00", align 1
+@.str.2 = private unnamed_addr constant [3 x i8] c"30\00", align 1
+@.str.3 = private unnamed_addr constant [3 x i8] c"20\00", align 1
+@.str.4 = private unnamed_addr constant [3 x i8] c"40\00", align 1
+
+define void @test1(i32 noundef %num, i32 noundef %num2) {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %sw.epilog
+  %num.addr.0 = phi i32 [ %num, %entry ], [ %num.addr.1, %sw.epilog ]
+  switch i32 %num.addr.0, label %sw.default [
+    i32 10, label %sw.bb
+    i32 30, label %sw.bb1
+    i32 20, label %sw.bb2
+    i32 40, label %sw.bb3
+  ]
+
+sw.bb:                                            ; preds = %while.body
+  %call.i = tail call i32 @bar(ptr noundef nonnull @.str.1)
+  br label %sw.epilog
+
+sw.bb1:                                           ; preds = %while.body
+  %call.i4 = tail call i32 @bar(ptr noundef nonnull @.str.2)
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %while.body
+  %call.i5 = tail call i32 @bar(ptr noundef nonnull @.str.3)
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %while.body
+  %call.i6 = tail call i32 @bar(ptr noundef nonnull @.str.4)
+  %call = tail call noundef i32 @foo()
+  %add = add nsw i32 %call, %num2
+  br label %sw.epilog
+
+sw.default:                                       ; preds = %while.body
+  ret void
+
+sw.epilog:                                        ; preds = %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
+  %num.addr.1 = phi i32 [ %add, %sw.bb3 ], [ 40, %sw.bb2 ], [ 20, %sw.bb1 ], [ 30, %sw.bb ]
+  br label %while.body
+}
+
+
+define void @test2(i32 noundef %num, i32 noundef %num2) {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %sw.epilog
+  %num.addr.0 = phi i32 [ %num, %entry ], [ %num.addr.1, %sw.epilog ]
+  switch i32 %num.addr.0, label %sw.default [
+    i32 10, label %sw.epilog
+    i32 30, label %sw.bb1
+    i32 20, label %sw.bb2
+    i32 40, label %sw.bb3
+  ]
+
+sw.bb1:                                           ; preds = %while.body
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %while.body
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %while.body
+  br label %sw.epilog
+
+sw.default:                                       ; preds = %while.body
+  ret void
+
+sw.epilog:                                        ; preds = %while.body, %sw.bb3, %sw.bb2, %sw.bb1
+  %.str.4.sink = phi ptr [ @.str.4, %sw.bb3 ], [ @.str.3, %sw.bb2 ], [ @.str.2, %sw.bb1 ], [ @.str.1, %while.body ]
+  %num.addr.1 = phi i32 [ %num2, %sw.bb3 ], [ 40, %sw.bb2 ], [ 20, %sw.bb1 ], [ 30, %while.body ]
+  %call.i6 = tail call i32 @bar(ptr noundef nonnull %.str.4.sink)
+  br label %while.body
+}
+
+
+define void @test3(i32 noundef %num, i32 noundef %num2) {
+entry:
+  %add = add nsw i32 %num2, 40
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %sw.epilog
+  %num.addr.0 = phi i32 [ %num, %entry ], [ %num.addr.1, %sw.epilog ]
+  switch i32 %num.addr.0, label %sw.default [
+    i32 10, label %sw.bb
+    i32 30, label %sw.bb1
+    i32 20, label %sw.bb2
+    i32 40, label %sw.bb3
+  ]
+
+sw.bb:                                            ; preds = %while.body
+  %call.i = tail call i32 @bar(ptr noundef nonnull @.str.1)
+  br label %sw.epilog
+
+sw.bb1:                                           ; preds = %while.body
+  %call.i5 = tail call i32 @bar(ptr noundef nonnull @.str.2)
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %while.body
+  %call.i6 = tail call i32 @bar(ptr noundef nonnull @.str.3)
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %while.body
+  %call.i7 = tail call i32 @bar(ptr noundef nonnull @.str.4)
+  br label %sw.epilog
+
+sw.default:                                       ; preds = %while.body
+  ret void
+
+sw.epilog:                                        ; preds = %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
+  %num.addr.1 = phi i32 [ %add, %sw.bb3 ], [ 40, %sw.bb2 ], [ 20, %sw.bb1 ], [ 30, %sw.bb ]
+  br label %while.body
+}
+
+
+declare noundef i32 @foo()
+declare noundef i32 @bar(ptr nocapture noundef readonly)
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-lifetime-ends.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-lifetime-ends.ll
index ef8665b79690..bdd0c6f728ae 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-lifetime-ends.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-lifetime-ends.ll
@@ -6,15 +6,11 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; Tests to make sure no loads are introduced after a lifetime.end by multiply
 ; fusion.
 
-; FIXME: Currently the tests are mis-compiled, with loads being introduced after
-;       llvm.lifetime.end calls.
-
 define void @lifetime_for_first_arg_before_multiply(ptr noalias %B, ptr noalias %C) {
 ; CHECK-LABEL: @lifetime_for_first_arg_before_multiply(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A:%.*]] = alloca <4 x double>, align 32
 ; CHECK-NEXT:    call void @init(ptr [[A]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
@@ -77,6 +73,7 @@ define void @lifetime_for_first_arg_before_multiply(ptr noalias %B, ptr noalias
 ; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
 ; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
 ; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -95,7 +92,6 @@ define void @lifetime_for_second_arg_before_multiply(ptr noalias %A, ptr noalias
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[B:%.*]] = alloca <4 x double>, align 32
 ; CHECK-NEXT:    call void @init(ptr [[B]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[B]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
@@ -158,6 +154,7 @@ define void @lifetime_for_second_arg_before_multiply(ptr noalias %A, ptr noalias
 ; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
 ; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
 ; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[B]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -177,7 +174,6 @@ define void @lifetime_for_first_arg_before_multiply_load_from_offset(ptr noalias
 ; CHECK-NEXT:    [[A:%.*]] = alloca <8 x double>, align 64
 ; CHECK-NEXT:    call void @init(ptr [[A]])
 ; CHECK-NEXT:    [[GEP_8:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[GEP_8]], i64 0
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
@@ -240,6 +236,7 @@ define void @lifetime_for_first_arg_before_multiply_load_from_offset(ptr noalias
 ; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
 ; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
 ; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -261,7 +258,6 @@ define void @lifetime_for_first_arg_before_multiply_lifetime_does_not_dominate(p
 ; CHECK-NEXT:    call void @init(ptr [[A]])
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A]])
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
@@ -352,7 +348,6 @@ define void @lifetime_for_second_arg_before_multiply_lifetime_does_not_dominate(
 ; CHECK-NEXT:    call void @init(ptr [[B]])
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[B]])
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 0
@@ -441,10 +436,9 @@ define void @lifetime_for_ptr_first_arg_before_multiply(ptr noalias %A, ptr noal
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A:%.*]])
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
 ; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
@@ -528,15 +522,104 @@ define void @lifetime_for_both_ptr_args_before_multiply(ptr noalias %A, ptr noal
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[B:%.*]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A:%.*]])
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
 ; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C1:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = load <4 x double>, ptr %A, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  br i1 %c.0, label %then, label %exit
+
+then:
+  call void @llvm.lifetime.end(i64 -1, ptr %B)
+  call void @llvm.lifetime.end(i64 -1, ptr %A)
+  br label %exit
+
+exit:
+  %m = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %m, ptr %C, align 8
+  ret void
+}
+
+define void @multiple_unrelated_lifetimes(ptr noalias %A, ptr noalias %B, ptr noalias %C, i1 %c.0) {
+; CHECK-LABEL: @multiple_unrelated_lifetimes(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ALLOC_1:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ALLOC_2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[ALLOC_1]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[ALLOC_2]])
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
@@ -597,13 +680,17 @@ define void @lifetime_for_both_ptr_args_before_multiply(ptr noalias %A, ptr noal
 ; CHECK-NEXT:    ret void
 ;
 entry:
+  %alloc.1 = alloca i32
+  %alloc.2 = alloca i32
   %a = load <4 x double>, ptr %A, align 8
   %b = load <4 x double>, ptr %B, align 8
   br i1 %c.0, label %then, label %exit
 
 then:
   call void @llvm.lifetime.end(i64 -1, ptr %B)
+  call void @llvm.lifetime.end(i64 -1, ptr %alloc.1)
   call void @llvm.lifetime.end(i64 -1, ptr %A)
+  call void @llvm.lifetime.end(i64 -1, ptr %alloc.2)
   br label %exit
 
 exit:
@@ -618,7 +705,6 @@ define void @lifetime_for_ptr_select_before_multiply(ptr noalias %A, ptr noalias
 ; CHECK-NEXT:    [[P:%.*]] = select i1 [[C_0:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]
 ; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[P]])
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[P]], i64 0
@@ -701,6 +787,374 @@ exit:
   ret void
 }
 
+define void @lifetimes_for_args_in_different_blocks(ptr noalias %B, ptr noalias %C, i1 %c.0) {
+; CHECK-LABEL: @lifetimes_for_args_in_different_blocks(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca <4 x double>, align 32
+; CHECK-NEXT:    call void @init(ptr [[A]])
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C1:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[B]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %A = alloca <4 x double>
+  call void @init(ptr %A)
+  br i1 %c.0, label %then, label %exit
+
+then:
+  %a = load <4 x double>, ptr %A, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  %m = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %m, ptr %C, align 8
+  br label %exit
+
+exit:
+  call void @llvm.lifetime.end(i64 -1, ptr %A)
+  call void @llvm.lifetime.end(i64 -1, ptr %B)
+  ret void
+}
+
+define void @lifetimes_for_args_in_different_blocks2(ptr noalias %B, ptr noalias %C, i1 %c.0) {
+; CHECK-LABEL: @lifetimes_for_args_in_different_blocks2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca <4 x double>, align 32
+; CHECK-NEXT:    call void @init(ptr [[A]])
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[A]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 -1, ptr [[B:%.*]])
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C1:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %A = alloca <4 x double>
+  call void @init(ptr %A)
+  br i1 %c.0, label %then, label %exit
+
+then:
+  call void @llvm.lifetime.end(i64 -1, ptr %A)
+  call void @llvm.lifetime.end(i64 -1, ptr %B)
+  br label %exit
+
+exit:
+  %a = load <4 x double>, ptr %A, align 8
+  %b = load <4 x double>, ptr %B, align 8
+  %m = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %m, ptr %C, align 8
+  ret void
+}
+
+define void @lifetimes_for_args_load0_in_different_block(ptr noalias %B, ptr noalias %C, i1 %c.0) {
+; CHECK-LABEL: @lifetimes_for_args_load0_in_different_block(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca <4 x double>, align 32
+; CHECK-NEXT:    call void @init(ptr [[A]])
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C1:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %A = alloca <4 x double>
+  call void @init(ptr %A)
+  %a = load <4 x double>, ptr %A, align 8
+  call void @llvm.lifetime.end(i64 -1, ptr %A)
+  br i1 %c.0, label %then, label %exit
+
+then:
+  %b = load <4 x double>, ptr %B, align 8
+  %m = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %m, ptr %C, align 8
+  br label %exit
+
+exit:
+  call void @llvm.lifetime.end(i64 -1, ptr %B)
+  ret void
+}
+
+define void @lifetimes_for_args_load1_in_different_block(ptr noalias %B, ptr noalias %C, i1 %c.0) {
+; CHECK-LABEL: @lifetimes_for_args_load1_in_different_block(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca <4 x double>, align 32
+; CHECK-NEXT:    call void @init(ptr [[A]])
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 2
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK5]], <1 x double> [[SPLAT_SPLAT7]], <1 x double> [[TMP3]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul contract <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK12]], <1 x double> [[SPLAT_SPLAT14]], <1 x double> [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul contract <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK18]], <1 x double> [[SPLAT_SPLAT20]], <1 x double> [[TMP15]])
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP18]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul contract <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
+; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP21]])
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP24]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[C1:%.*]], i64 0
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[VEC_GEP28:%.*]] = getelementptr double, ptr [[TMP26]], i64 2
+; CHECK-NEXT:    store <2 x double> [[TMP25]], ptr [[VEC_GEP28]], align 8
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %A = alloca <4 x double>
+  call void @init(ptr %A)
+  %b = load <4 x double>, ptr %B, align 8
+  call void @llvm.lifetime.end(i64 -1, ptr %B)
+  br i1 %c.0, label %then, label %exit
+
+then:
+  %a = load <4 x double>, ptr %A, align 8
+  %m = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2)
+  store <4 x double> %m, ptr %C, align 8
+  br label %exit
+
+exit:
+  call void @llvm.lifetime.end(i64 -1, ptr %A)
+  ret void
+}
+
 declare void @init(ptr)
 declare void @llvm.lifetime.end(i64, ptr)
 
diff --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/rel-method-lists-arm64.dylib b/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/rel-method-lists-arm64.dylib
new file mode 100755
index 000000000000..051e28f33d74
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/rel-method-lists-arm64.dylib
diff --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/rel-method-lists-arm64_32.dylib b/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/rel-method-lists-arm64_32.dylib
new file mode 100755
index 000000000000..d3a339057abc
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/rel-method-lists-arm64_32.dylib
diff --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/macho-relative-method-lists.test b/llvm/test/tools/llvm-objdump/MachO/AArch64/macho-relative-method-lists.test
new file mode 100644
index 000000000000..b1b96a41a329
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/MachO/AArch64/macho-relative-method-lists.test
@@ -0,0 +1,86 @@
+RUN: llvm-objdump --macho --objc-meta-data    %p/Inputs/rel-method-lists-arm64_32.dylib | FileCheck %s --check-prefix=CHK32
+RUN: llvm-otool -ov                           %p/Inputs/rel-method-lists-arm64_32.dylib | FileCheck %s --check-prefix=CHK32
+
+RUN: llvm-objdump --macho --objc-meta-data    %p/Inputs/rel-method-lists-arm64.dylib    | FileCheck %s --check-prefix=CHK64
+RUN: llvm-otool -ov                           %p/Inputs/rel-method-lists-arm64.dylib    | FileCheck %s --check-prefix=CHK64
+
+CHK32:                 baseMethods 0x660 (struct method_list_t *)
+CHK32-NEXT:                 entsize 12 (relative)
+CHK32-NEXT:                   count 3
+CHK32-NEXT:                    name 0x144 (0x{{[0-9a-f]*}}) instance_method_00
+CHK32-NEXT:                   types 0x91 (0x{{[0-9a-f]*}}) v8@0:4
+CHK32-NEXT:                     imp 0xffffff18 (0x{{[0-9a-f]*}}) -[MyClass instance_method_00]
+CHK32-NEXT:                    name 0x13c (0x{{[0-9a-f]*}}) instance_method_01
+CHK32-NEXT:                   types 0x85 (0x{{[0-9a-f]*}}) v8@0:4
+CHK32-NEXT:                     imp 0xffffff28 (0x{{[0-9a-f]*}}) -[MyClass instance_method_01]
+CHK32-NEXT:                    name 0x134 (0x{{[0-9a-f]*}}) instance_method_02
+CHK32-NEXT:                   types 0x79 (0x{{[0-9a-f]*}}) v8@0:4
+CHK32-NEXT:                     imp 0xffffff38 (0x{{[0-9a-f]*}}) -[MyClass instance_method_02]
+
+CHK32:                 baseMethods 0x630 (struct method_list_t *)
+CHK32-NEXT:                 entsize 12 (relative)
+CHK32-NEXT:                   count 3
+CHK32-NEXT:                    name 0x180 (0x{{[0-9a-f]*}}) class_method_00
+CHK32-NEXT:                   types 0xc1 (0x{{[0-9a-f]*}}) v8@0:4
+CHK32-NEXT:                     imp 0xffffff9c (0x{{[0-9a-f]*}}) +[MyClass class_method_00]
+CHK32-NEXT:                    name 0x178 (0x{{[0-9a-f]*}}) class_method_01
+CHK32-NEXT:                   types 0xb5 (0x{{[0-9a-f]*}}) v8@0:4
+CHK32-NEXT:                     imp 0xffffffac (0x{{[0-9a-f]*}}) +[MyClass class_method_01]
+CHK32-NEXT:                    name 0x170 (0x{{[0-9a-f]*}}) class_method_02
+CHK32-NEXT:                   types 0xa9 (0x{{[0-9a-f]*}}) v8@0:4
+CHK32-NEXT:                     imp 0xffffffbc (0x{{[0-9a-f]*}}) +[MyClass class_method_02]
+
+CHK64:                  baseMethods 0x6e0 (struct method_list_t *)
+CHK64-NEXT:                  entsize 12 (relative)
+CHK64-NEXT:                    count 3
+CHK64-NEXT:                     name 0x188 (0x{{[0-9a-f]*}}) instance_method_00
+CHK64-NEXT:                    types 0x91 (0x{{[0-9a-f]*}}) v16@0:8
+CHK64-NEXT:                      imp 0xffffffa8 (0x{{[0-9a-f]*}}) -[MyClass instance_method_00]
+CHK64-NEXT:                     name 0x184 (0x{{[0-9a-f]*}}) instance_method_01
+CHK64-NEXT:                    types 0x85 (0x{{[0-9a-f]*}}) v16@0:8
+CHK64-NEXT:                      imp 0xffffffa0 (0x{{[0-9a-f]*}}) -[MyClass instance_method_01]
+CHK64-NEXT:                     name 0x180 (0x{{[0-9a-f]*}}) instance_method_02
+CHK64-NEXT:                    types 0x79 (0x{{[0-9a-f]*}}) v16@0:8
+CHK64-NEXT:                      imp 0xffffff98 (0x{{[0-9a-f]*}}) -[MyClass instance_method_02]
+
+CHK64:                  baseMethods 0x6b0 (struct method_list_t *)
+CHK64-NEXT:                  entsize 12 (relative)
+CHK64-NEXT:                    count 3
+CHK64-NEXT:                     name 0x1d0 (0x{{[0-9a-f]*}}) class_method_00
+CHK64-NEXT:                    types 0xc1 (0x{{[0-9a-f]*}}) v16@0:8
+CHK64-NEXT:                      imp 0xffffffe4 (0x{{[0-9a-f]*}}) +[MyClass class_method_00]
+CHK64-NEXT:                     name 0x1cc (0x{{[0-9a-f]*}}) class_method_01
+CHK64-NEXT:                    types 0xb5 (0x{{[0-9a-f]*}}) v16@0:8
+CHK64-NEXT:                      imp 0xffffffdc (0x{{[0-9a-f]*}}) +[MyClass class_method_01]
+CHK64-NEXT:                     name 0x1c8 (0x{{[0-9a-f]*}}) class_method_02
+CHK64-NEXT:                    types 0xa9 (0x{{[0-9a-f]*}}) v16@0:8
+CHK64-NEXT:                      imp 0xffffffd4 (0x{{[0-9a-f]*}}) +[MyClass class_method_02]
+
+######## Generate rel-method-lists-arm64.dylib ########
+// clang -c main.mm -o main.o -target arm64-apple-macos -arch arm64
+// ld64.ld64 -dylib -demangle -dynamic main.o -o rel-method-lists-arm64.dylib -syslibroot MacOSX14.2.sdk -segalign 0x10 -objc_relative_method_lists
+
+######## Generate rel-method-lists-arm64_32.dylib ########
+// clang -c main.mm -o main.o -target arm64_32-apple-watchos -arch arm64_32
+// ld64.ld64 -dylib -demangle -dynamic main.o -o rel-method-lists-arm64_32.dylib -syslibroot WatchOS.sdk -segalign 0x10 -objc_relative_method_lists
+
+// ~~~~~~~~~~~~~~~~~~~~~~~~~ main.mm ~~~~~~~~~~~~~~~~~~~~~~~~~
+__attribute__((objc_root_class))
+@interface MyClass
+- (void)instance_method_00;
+- (void)instance_method_01;
+- (void)instance_method_02;
++ (void)class_method_00;
++ (void)class_method_01;
++ (void)class_method_02;
+@end
+@implementation MyClass
+- (void)instance_method_00 {}
+- (void)instance_method_01 {}
+- (void)instance_method_02 {}
++ (void)class_method_00 {}
++ (void)class_method_01 {}
++ (void)class_method_02 {}
+@end
+void *_objc_empty_cache;
+void *_objc_empty_vtable;
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index 0e6935c0ac58..1b0e5ba279d0 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -3661,6 +3661,10 @@ struct class_ro32_t {
 #define RO_ROOT (1 << 1)
 #define RO_HAS_CXX_STRUCTORS (1 << 2)
 
+/* Values for method_list{64,32}_t->entsize */
+#define ML_HAS_RELATIVE_PTRS (1 << 31)
+#define ML_ENTSIZE_MASK 0xFFFF
+
 struct method_list64_t {
   uint32_t entsize;
   uint32_t count;
@@ -3685,6 +3689,12 @@ struct method32_t {
   uint32_t imp;   /* IMP (32-bit pointer) */
 };
 
+struct method_relative_t {
+  int32_t name;  /* SEL (32-bit relative) */
+  int32_t types; /* const char * (32-bit relative) */
+  int32_t imp;   /* IMP (32-bit relative) */
+};
+
 struct protocol_list64_t {
   uint64_t count; /* uintptr_t (a 64-bit value) */
   /* struct protocol64_t * list[0];  These pointers follow inline */
@@ -3986,6 +3996,12 @@ inline void swapStruct(struct method32_t &m) {
   sys::swapByteOrder(m.imp);
 }
 
+inline void swapStruct(struct method_relative_t &m) {
+  sys::swapByteOrder(m.name);
+  sys::swapByteOrder(m.types);
+  sys::swapByteOrder(m.imp);
+}
+
 inline void swapStruct(struct protocol_list64_t &pl) {
   sys::swapByteOrder(pl.count);
 }
@@ -4440,6 +4456,84 @@ static void print_layout_map32(uint32_t p, struct DisassembleInfo *info) {
   print_layout_map(layout_map, left);
 }
 
+static void print_relative_method_list(uint32_t structSizeAndFlags,
+                                       uint32_t structCount, uint64_t p,
+                                       struct DisassembleInfo *info,
+                                       const char *indent,
+                                       uint32_t pointerBits) {
+  struct method_relative_t m;
+  const char *r, *name;
+  uint32_t offset, xoffset, left, i;
+  SectionRef S, xS;
+
+  assert(((structSizeAndFlags & ML_HAS_RELATIVE_PTRS) != 0) &&
+         "expected structSizeAndFlags to have ML_HAS_RELATIVE_PTRS flag");
+
+  outs() << indent << "\t\t   entsize "
+         << (structSizeAndFlags & ML_ENTSIZE_MASK) << " (relative) \n";
+  outs() << indent << "\t\t     count " << structCount << "\n";
+
+  for (i = 0; i < structCount; i++) {
+    r = get_pointer_64(p, offset, left, S, info);
+    memset(&m, '\0', sizeof(struct method_relative_t));
+    if (left < sizeof(struct method_relative_t)) {
+      memcpy(&m, r, left);
+      outs() << indent << "   (method_t extends past the end of the section)\n";
+    } else
+      memcpy(&m, r, sizeof(struct method_relative_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(m);
+
+    outs() << indent << "\t\t      name " << format("0x%" PRIx32, m.name);
+    uint64_t relNameRefVA = p + offsetof(struct method_relative_t, name);
+    uint64_t absNameRefVA = relNameRefVA + m.name;
+    outs() << " (" << format("0x%" PRIx32, absNameRefVA) << ")";
+
+    // since this is a relative list, absNameRefVA is the address of the
+    // __objc_selrefs entry, so a pointer, not the actual name
+    const char *nameRefPtr =
+        get_pointer_64(absNameRefVA, xoffset, left, xS, info);
+    if (nameRefPtr) {
+      uint32_t pointerSize = pointerBits / CHAR_BIT;
+      if (left < pointerSize)
+        outs() << indent << " (nameRefPtr extends past the end of the section)";
+      else {
+        if (pointerSize == 64) {
+          name = get_pointer_64(*reinterpret_cast<const uint64_t *>(nameRefPtr),
+                                xoffset, left, xS, info);
+        } else {
+          name = get_pointer_32(*reinterpret_cast<const uint32_t *>(nameRefPtr),
+                                xoffset, left, xS, info);
+        }
+        if (name != nullptr)
+          outs() << format(" %.*s", left, name);
+      }
+    }
+    outs() << "\n";
+
+    outs() << indent << "\t\t     types " << format("0x%" PRIx32, m.types);
+    uint64_t relTypesVA = p + offsetof(struct method_relative_t, types);
+    uint64_t absTypesVA = relTypesVA + m.types;
+    outs() << " (" << format("0x%" PRIx32, absTypesVA) << ")";
+    name = get_pointer_32(absTypesVA, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << indent << "\t\t       imp " << format("0x%" PRIx32, m.imp);
+    uint64_t relImpVA = p + offsetof(struct method_relative_t, imp);
+    uint64_t absImpVA = relImpVA + m.imp;
+    outs() << " (" << format("0x%" PRIx32, absImpVA) << ")";
+    name = GuessSymbolName(absImpVA, info->AddrMap);
+    if (name != nullptr)
+      outs() << " " << name;
+    outs() << "\n";
+
+    p += sizeof(struct method_relative_t);
+    offset += sizeof(struct method_relative_t);
+  }
+}
+
 static void print_method_list64_t(uint64_t p, struct DisassembleInfo *info,
                                   const char *indent) {
   struct method_list64_t ml;
@@ -4461,10 +4555,17 @@ static void print_method_list64_t(uint64_t p, struct DisassembleInfo *info,
     memcpy(&ml, r, sizeof(struct method_list64_t));
   if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
     swapStruct(ml);
+  p += sizeof(struct method_list64_t);
+
+  if ((ml.entsize & ML_HAS_RELATIVE_PTRS) != 0) {
+    print_relative_method_list(ml.entsize, ml.count, p, info, indent,
+                               /*pointerBits=*/64);
+    return;
+  }
+
   outs() << indent << "\t\t   entsize " << ml.entsize << "\n";
   outs() << indent << "\t\t     count " << ml.count << "\n";
 
-  p += sizeof(struct method_list64_t);
   offset += sizeof(struct method_list64_t);
   for (i = 0; i < ml.count; i++) {
     r = get_pointer_64(p, offset, left, S, info);
@@ -4552,10 +4653,17 @@ static void print_method_list32_t(uint64_t p, struct DisassembleInfo *info,
     memcpy(&ml, r, sizeof(struct method_list32_t));
   if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
     swapStruct(ml);
+  p += sizeof(struct method_list32_t);
+
+  if ((ml.entsize & ML_HAS_RELATIVE_PTRS) != 0) {
+    print_relative_method_list(ml.entsize, ml.count, p, info, indent,
+                               /*pointerBits=*/32);
+    return;
+  }
+
   outs() << indent << "\t\t   entsize " << ml.entsize << "\n";
   outs() << indent << "\t\t     count " << ml.count << "\n";
 
-  p += sizeof(struct method_list32_t);
   offset += sizeof(struct method_list32_t);
   for (i = 0; i < ml.count; i++) {
     r = get_pointer_32(p, offset, left, S, info);
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 8a070d256398..c591e5056480 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -17,6 +17,7 @@
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Interfaces/ShapedOpInterfaces.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/ScopeExit.h"
@@ -220,6 +221,8 @@ void AffineDialect::initialize() {
 #include "mlir/Dialect/Affine/IR/AffineOps.cpp.inc"
                 >();
   addInterfaces<AffineInlinerInterface>();
+  declarePromisedInterfaces<ValueBoundsOpInterface, AffineApplyOp, AffineMaxOp,
+                            AffineMinOp>();
 }
 
 /// Materialize a single constant operation from a given attribute value with
diff --git a/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp b/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp
index 745c5706a838..6a593185cced 100644
--- a/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp
+++ b/mlir/lib/Dialect/Arith/IR/ArithDialect.cpp
@@ -8,9 +8,12 @@
 
 #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/UB/IR/UBOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/DialectImplementation.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
 #include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/TypeSwitch.h"
 
@@ -46,6 +49,12 @@ void arith::ArithDialect::initialize() {
       >();
   addInterfaces<ArithInlinerInterface>();
   declarePromisedInterface<ArithDialect, ConvertToLLVMPatternInterface>();
+  declarePromisedInterface<SelectOp,
+                           bufferization::BufferDeallocationOpInterface>();
+  declarePromisedInterfaces<bufferization::BufferizableOpInterface, ConstantOp,
+                            IndexCastOp, SelectOp>();
+  declarePromisedInterfaces<ValueBoundsOpInterface, AddIOp, ConstantOp, SubIOp,
+                            MulIOp>();
 }
 
 /// Materialize an integer or floating point constant.
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index 34a0c594a5a5..2b226c7a1207 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -252,16 +252,6 @@ LogicalResult AllocTensorOp::verify() {
            << getType().getNumDynamicDims() << " dynamic sizes";
   if (getCopy() && getCopy().getType() != getType())
     return emitError("expected that `copy` and return type match");
-
-  // For sparse tensor allocation, we require that none of its
-  // uses escapes the function boundary directly.
-  if (sparse_tensor::getSparseTensorEncoding(getType())) {
-    for (auto &use : getOperation()->getUses())
-      if (isa<func::ReturnOp, func::CallOp, func::CallIndirectOp>(
-              use.getOwner()))
-        return emitError("sparse tensor allocation should not escape function");
-  }
-
   return success();
 }
 
diff --git a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp
index d242d75bd51f..c6b02b9703e7 100644
--- a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp
+++ b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp
@@ -10,6 +10,8 @@
 
 #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
@@ -69,6 +71,10 @@ void ControlFlowDialect::initialize() {
       >();
   addInterfaces<ControlFlowInlinerInterface>();
   declarePromisedInterface<ControlFlowDialect, ConvertToLLVMPatternInterface>();
+  declarePromisedInterfaces<bufferization::BufferizableOpInterface, BranchOp,
+                            CondBranchOp>();
+  declarePromisedInterface<CondBranchOp,
+                           bufferization::BufferDeallocationOpInterface>();
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Func/IR/FuncOps.cpp b/mlir/lib/Dialect/Func/IR/FuncOps.cpp
index d18ec279e85c..ed2ecfe9d0fb 100644
--- a/mlir/lib/Dialect/Func/IR/FuncOps.cpp
+++ b/mlir/lib/Dialect/Func/IR/FuncOps.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 
 #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/IRMapping.h"
@@ -43,6 +44,8 @@ void FuncDialect::initialize() {
       >();
   declarePromisedInterface<FuncDialect, DialectInlinerInterface>();
   declarePromisedInterface<FuncDialect, ConvertToLLVMPatternInterface>();
+  declarePromisedInterfaces<bufferization::BufferizableOpInterface, CallOp,
+                            FuncOp, ReturnOp>();
 }
 
 /// Materialize a single constant operation from a given attribute value with
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 33ce5c159db4..a02eca8b1179 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
@@ -215,6 +216,8 @@ void GPUDialect::initialize() {
 #include "mlir/Dialect/GPU/IR/GPUOpsAttributes.cpp.inc"
       >();
   addInterfaces<GPUInlinerInterface>();
+  declarePromisedInterface<TerminatorOp,
+                           bufferization::BufferDeallocationOpInterface>();
 }
 
 static std::string getSparseHandleKeyword(SparseHandleKind kind) {
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
index 027058d4de63..a6936fde4370 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
@@ -21,7 +21,10 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/DialectImplementation.h"
+#include "mlir/Interfaces/DestinationStyleOpInterface.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
+#include "mlir/Interfaces/SubsetOpInterface.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
 #include "mlir/Parser/Parser.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/InliningUtils.h"
@@ -125,6 +128,23 @@ void mlir::linalg::LinalgDialect::initialize() {
 #define GET_OP_LIST
 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
                             >();
+  declarePromisedInterface<CopyOp, SubsetOpInterface>();
+  declarePromisedInterface<CopyOp, SubsetInsertionOpInterface>();
+  declarePromisedInterface<IndexOp, ValueBoundsOpInterface>();
+  declarePromisedInterface<linalg::GenericOp, TilingInterface>();
+  declarePromisedInterface<linalg::GenericOp, PartialReductionOpInterface>();
+  declarePromisedInterfaces<TilingInterface,
+#define GET_OP_LIST
+#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
+                            >();
+  declarePromisedInterfaces<PartialReductionOpInterface,
+#define GET_OP_LIST
+#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
+                            >();
+  declarePromisedInterfaces<bufferization::BufferizableOpInterface,
+#define GET_OP_LIST
+#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
+                            >();
 }
 
 LogicalResult LinalgDialect::verifyOperationAttribute(Operation *op,
diff --git a/mlir/lib/Dialect/MLProgram/IR/CMakeLists.txt b/mlir/lib/Dialect/MLProgram/IR/CMakeLists.txt
index 90c9c9aa7875..725bb5fd9da9 100644
--- a/mlir/lib/Dialect/MLProgram/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/MLProgram/IR/CMakeLists.txt
@@ -15,5 +15,6 @@ add_mlir_dialect_library(MLIRMLProgramDialect
   MLIRControlFlowInterfaces
   MLIRFunctionInterfaces
   MLIRInferTypeOpInterface
+  MLIRTransforms
   MLIRIR
   )
diff --git a/mlir/lib/Dialect/MLProgram/IR/MLProgramDialect.cpp b/mlir/lib/Dialect/MLProgram/IR/MLProgramDialect.cpp
index 1a8fe208d409..bda1032ed988 100644
--- a/mlir/lib/Dialect/MLProgram/IR/MLProgramDialect.cpp
+++ b/mlir/lib/Dialect/MLProgram/IR/MLProgramDialect.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/Dialect/MLProgram/IR/MLProgram.h"
 #include "mlir/IR/DialectImplementation.h"
+#include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/TypeSwitch.h"
 
 using namespace mlir;
@@ -24,6 +25,18 @@ using namespace mlir::ml_program;
 #include "mlir/Dialect/MLProgram/IR/MLProgramTypes.cpp.inc"
 
 namespace {
+
+struct MLProgramInlinerInterface : public DialectInlinerInterface {
+  using DialectInlinerInterface::DialectInlinerInterface;
+
+  bool isLegalToInline(Operation *, Region *, bool,
+                       IRMapping &) const override {
+    // We have no specific opinion on whether ops defined in this dialect should
+    // be inlined.
+    return true;
+  }
+};
+
 struct MLProgramOpAsmDialectInterface : public OpAsmDialectInterface {
   using OpAsmDialectInterface::OpAsmDialectInterface;
 
@@ -53,5 +66,5 @@ void ml_program::MLProgramDialect::initialize() {
 #include "mlir/Dialect/MLProgram/IR/MLProgramOps.cpp.inc"
       >();
 
-  addInterfaces<MLProgramOpAsmDialectInterface>();
+  addInterfaces<MLProgramInlinerInterface, MLProgramOpAsmDialectInterface>();
 }
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
index d71669a274b8..41082a85a485 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
@@ -8,8 +8,13 @@
 
 #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/Interfaces/MemorySlotInterfaces.h"
+#include "mlir/Interfaces/RuntimeVerifiableOpInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
 #include "mlir/Transforms/InliningUtils.h"
 #include <optional>
 
@@ -43,6 +48,13 @@ void mlir::memref::MemRefDialect::initialize() {
       >();
   addInterfaces<MemRefInlinerInterface>();
   declarePromisedInterface<MemRefDialect, ConvertToLLVMPatternInterface>();
+  declarePromisedInterfaces<bufferization::AllocationOpInterface, AllocOp,
+                            AllocaOp, ReallocOp>();
+  declarePromisedInterfaces<RuntimeVerifiableOpInterface, CastOp, ExpandShapeOp,
+                            LoadOp, ReinterpretCastOp, StoreOp, SubViewOp>();
+  declarePromisedInterfaces<ValueBoundsOpInterface, AllocOp, AllocaOp, CastOp,
+                            DimOp, GetGlobalOp, RankOp, SubViewOp>();
+  declarePromisedInterface<MemRefType, DestructurableTypeInterface>();
 }
 
 /// Finds the unique dealloc operation (if one exists) for `allocValue`.
diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp
index 233e702dbb22..ddb9676eb4f6 100644
--- a/mlir/lib/Dialect/SCF/IR/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp
@@ -9,6 +9,8 @@
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"
@@ -18,6 +20,7 @@
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/MapVector.h"
@@ -71,6 +74,12 @@ void SCFDialect::initialize() {
 #include "mlir/Dialect/SCF/IR/SCFOps.cpp.inc"
       >();
   addInterfaces<SCFInlinerInterface>();
+  declarePromisedInterfaces<bufferization::BufferDeallocationOpInterface,
+                            InParallelOp, ReduceReturnOp>();
+  declarePromisedInterfaces<bufferization::BufferizableOpInterface, ConditionOp,
+                            ExecuteRegionOp, ForOp, IfOp, IndexSwitchOp,
+                            ForallOp, InParallelOp, WhileOp, YieldOp>();
+  declarePromisedInterface<ForOp, ValueBoundsOpInterface>();
 }
 
 /// Default callback for IfOp builders. Inserts a yield without arguments.
diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp
index d9ee39a4e8dd..f5a3717f815d 100644
--- a/mlir/lib/Dialect/Shape/IR/Shape.cpp
+++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp
@@ -11,6 +11,7 @@
 #include "mlir/Dialect/Shape/IR/Shape.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/CommonFolders.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Traits.h"
@@ -143,6 +144,8 @@ void ShapeDialect::initialize() {
   // still evolving it makes it simple to start with an unregistered ops and
   // try different variants before actually defining the op.
   allowUnknownOperations();
+  declarePromisedInterfaces<bufferization::BufferizableOpInterface, AssumingOp,
+                            AssumingYieldOp>();
 }
 
 Operation *ShapeDialect::materializeConstant(OpBuilder &builder,
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 7750efdd9add..6da51bb6b9ca 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h"
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/DialectImplementation.h"
@@ -1956,6 +1957,10 @@ void SparseTensorDialect::initialize() {
 #define GET_OP_LIST
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorOps.cpp.inc"
       >();
+  declarePromisedInterfaces<
+      bufferization::BufferizableOpInterface, ConcatenateOp, ConvertOp, LoadOp,
+      NewOp, NumberOfEntriesOp, AssembleOp, DisassembleOp,
+      ToCoordinatesBufferOp, ToCoordinatesOp, ToPositionsOp, ToValuesOp>();
 }
 
 #define GET_OP_CLASSES
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp
index 5b4395cc31a4..c370d104e098 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/StageSparseOperations.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h"
 #include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
@@ -16,6 +17,37 @@ using namespace mlir::sparse_tensor;
 
 namespace {
 
+struct GuardSparseAlloc
+    : public OpRewritePattern<bufferization::AllocTensorOp> {
+  using OpRewritePattern<bufferization::AllocTensorOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(bufferization::AllocTensorOp op,
+                                PatternRewriter &rewriter) const override {
+    // Only rewrite sparse allocations.
+    if (!getSparseTensorEncoding(op.getResult().getType()))
+      return failure();
+
+    // Only rewrite sparse allocations that escape the method
+    // without any chance of a finalizing operation in between.
+    // Here we assume that sparse tensor setup never crosses
+    // method boundaries. The current rewriting only repairs
+    // the most obvious allocate-call/return cases.
+    if (!llvm::all_of(op->getUses(), [](OpOperand &use) {
+          return isa<func::ReturnOp, func::CallOp, func::CallIndirectOp>(
+              use.getOwner());
+        }))
+      return failure();
+
+    // Guard escaping empty sparse tensor allocations with a finalizing
+    // operation that leaves the underlying storage in a proper state
+    // before the tensor escapes across the method boundary.
+    rewriter.setInsertionPointAfter(op);
+    auto load = rewriter.create<LoadOp>(op.getLoc(), op.getResult(), true);
+    rewriter.replaceAllUsesExcept(op, load, load);
+    return success();
+  }
+};
+
 template <typename StageWithSortOp>
 struct StageUnorderedSparseOps : public OpRewritePattern<StageWithSortOp> {
   using OpRewritePattern<StageWithSortOp>::OpRewritePattern;
@@ -37,6 +69,6 @@ struct StageUnorderedSparseOps : public OpRewritePattern<StageWithSortOp> {
 } // namespace
 
 void mlir::populateStageSparseOperationsPatterns(RewritePatternSet &patterns) {
-  patterns.add<StageUnorderedSparseOps<ConvertOp>,
+  patterns.add<GuardSparseAlloc, StageUnorderedSparseOps<ConvertOp>,
                StageUnorderedSparseOps<ConcatenateOp>>(patterns.getContext());
 }
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
index 62032ff301be..5ca9510408c3 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
@@ -8,8 +8,11 @@
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Transform/IR/TransformInterfaces.h"
+#include "mlir/Interfaces/SubsetOpInterface.h"
 #include "mlir/Transforms/InliningUtils.h"
 
 using namespace mlir;
@@ -45,4 +48,22 @@ void TensorDialect::initialize() {
 #include "mlir/Dialect/Tensor/IR/TensorOps.cpp.inc"
       >();
   addInterfaces<TensorInlinerInterface>();
+  declarePromisedInterfaces<
+      bufferization::BufferizableOpInterface, CastOp, CollapseShapeOp, DimOp,
+      EmptyOp, ExpandShapeOp, ExtractSliceOp, ExtractOp, FromElementsOp,
+      GenerateOp, InsertOp, InsertSliceOp, PadOp, ParallelInsertSliceOp, RankOp,
+      ReshapeOp, SplatOp>();
+  declarePromisedInterfaces<transform::FindPayloadReplacementOpInterface,
+                            CollapseShapeOp, ExpandShapeOp, ExtractSliceOp,
+                            InsertSliceOp, ReshapeOp>();
+  declarePromisedInterfaces<ReifyRankedShapedTypeOpInterface, ExpandShapeOp,
+                            CollapseShapeOp, PadOp>();
+  declarePromisedInterfaces<SubsetOpInterface, ExtractSliceOp, InsertSliceOp,
+                            ParallelInsertSliceOp>();
+  declarePromisedInterfaces<SubsetInsertionOpInterface, InsertSliceOp,
+                            ParallelInsertSliceOp>();
+  declarePromisedInterface<ExtractSliceOp, SubsetExtractionOpInterface>();
+  declarePromisedInterfaces<TilingInterface, PadOp, PackOp, UnPackOp>();
+  declarePromisedInterfaces<ValueBoundsOpInterface, CastOp, DimOp, EmptyOp,
+                            ExtractSliceOp, PadOp, RankOp>();
 }
diff --git a/mlir/lib/Dialect/Tosa/CMakeLists.txt b/mlir/lib/Dialect/Tosa/CMakeLists.txt
index ba5343dcd7ac..1911405c63cd 100644
--- a/mlir/lib/Dialect/Tosa/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tosa/CMakeLists.txt
@@ -12,6 +12,7 @@ add_mlir_dialect_library(MLIRTosaDialect
   MLIRTosaDialectBytecodeIncGen
   MLIRTosaOpsIncGen
   MLIRTosaInterfacesIncGen
+  MLIRShardingInterfaceIncGen
 
   LINK_LIBS PUBLIC
   MLIRIR
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
index 62d07859e32f..f461e7e1a555 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
+#include "mlir/Dialect/Mesh/Interfaces/ShardingInterface.h"
 #include "mlir/Dialect/Quant/QuantOps.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tosa/Utils/QuantUtils.h"
@@ -136,6 +137,14 @@ void TosaDialect::initialize() {
 #include "mlir/Dialect/Tosa/IR/TosaAttributes.cpp.inc"
       >();
   addInterfaces<TosaDialectBytecodeInterface, TosaInlinerInterface>();
+  declarePromisedInterfaces<
+      mesh::ShardingInterface, ClampOp, SigmoidOp, TanhOp, AddOp,
+      ArithmeticRightShiftOp, BitwiseAndOp, BitwiseOrOp, BitwiseXorOp, DivOp,
+      LogicalAndOp, LogicalLeftShiftOp, LogicalRightShiftOp, LogicalOrOp,
+      LogicalXorOp, MaximumOp, MinimumOp, MulOp, PowOp, SubOp, AbsOp,
+      BitwiseNotOp, CeilOp, ClzOp, ExpOp, FloorOp, LogOp, LogicalNotOp,
+      NegateOp, ReciprocalOp, RsqrtOp, SelectOp, EqualOp, GreaterOp,
+      GreaterEqualOp, MatMulOp>();
 }
 
 Operation *TosaDialect::materializeConstant(OpBuilder &builder, Attribute value,
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 75f6220ad8f3..35296824246e 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
@@ -31,6 +32,7 @@
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/TypeUtilities.h"
+#include "mlir/Interfaces/SubsetOpInterface.h"
 #include "mlir/Interfaces/ValueBoundsOpInterface.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/InliningUtils.h"
@@ -374,6 +376,14 @@ void VectorDialect::initialize() {
       >();
 
   addInterfaces<VectorInlinerInterface>();
+
+  declarePromisedInterfaces<bufferization::BufferizableOpInterface,
+                            TransferReadOp, TransferWriteOp, GatherOp, MaskOp,
+                            YieldOp>();
+  declarePromisedInterfaces<SubsetOpInterface, TransferReadOp,
+                            TransferWriteOp>();
+  declarePromisedInterface<TransferReadOp, SubsetExtractionOpInterface>();
+  declarePromisedInterface<TransferWriteOp, SubsetInsertionOpInterface>();
 }
 
 /// Materialize a single constant operation from a given attribute value with
@@ -6054,7 +6064,7 @@ LogicalResult MaskOp::fold(FoldAdaptor adaptor,
   maskableOp->dropAllUses();
   maskableOp->moveBefore(getOperation());
 
-  results.push_back(maskableOp->getResult(0));
+  llvm::append_range(results, maskableOp->getResults());
   return success();
 }
 
diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp
index eaf9373e7616..e7e314f739fa 100644
--- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp
@@ -223,8 +223,10 @@ DebugTranslation::translateRecursive(DIRecursiveTypeAttrInterface attr) {
   }
 
   auto setRecursivePlaceholder = [&](llvm::DIType *placeholder) {
-    auto [iter, inserted] =
+    [[maybe_unused]] auto [iter, inserted] =
         recursiveTypeMap.try_emplace(recursiveId, placeholder);
+    (void)iter;
+    (void)inserted;
     assert(inserted && "illegal reuse of recursive id");
   };
 
diff --git a/mlir/test/Dialect/Bufferization/invalid.mlir b/mlir/test/Dialect/Bufferization/invalid.mlir
index 83f8ef786154..4ebdb0a8f049 100644
--- a/mlir/test/Dialect/Bufferization/invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/invalid.mlir
@@ -26,29 +26,6 @@ func.func @alloc_tensor_copy_and_dims(%t: tensor<?xf32>, %sz: index) {
 
 // -----
 
-#DCSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }>
-
-func.func @sparse_alloc_direct_return() -> tensor<20x40xf32, #DCSR> {
-  // expected-error @+1{{sparse tensor allocation should not escape function}}
-  %0 = bufferization.alloc_tensor() : tensor<20x40xf32, #DCSR>
-  return %0 : tensor<20x40xf32, #DCSR>
-}
-
-// -----
-
-#DCSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }>
-
-func.func private @foo(tensor<20x40xf32, #DCSR>) -> ()
-
-func.func @sparse_alloc_call() {
-  // expected-error @+1{{sparse tensor allocation should not escape function}}
-  %0 = bufferization.alloc_tensor() : tensor<20x40xf32, #DCSR>
-  call @foo(%0) : (tensor<20x40xf32, #DCSR>) -> ()
-  return
-}
-
-// -----
-
 // expected-error @+1{{invalid value for 'bufferization.access'}}
 func.func private @invalid_buffer_access_type(tensor<*xf32> {bufferization.access = "foo"})
 
diff --git a/mlir/test/Dialect/MLProgram/inlining.mlir b/mlir/test/Dialect/MLProgram/inlining.mlir
new file mode 100644
index 000000000000..ac3677e1b928
--- /dev/null
+++ b/mlir/test/Dialect/MLProgram/inlining.mlir
@@ -0,0 +1,19 @@
+// RUN: mlir-opt %s -inline | FileCheck %s
+
+// Verifies that regions with operations from the ml_program dialect can
+// be inlined.
+
+ml_program.global private @global(dense<4> : tensor<4xi32>) : tensor<4xi32>
+
+// CHECK: @inline_into
+func.func @inline_into() -> tensor<4xi32> {
+  // CHECK-NOT: @inline_from
+  // CHECK: ml_program.global_load_const
+  %0 = call @inline_from() : () -> tensor<4xi32>
+  return %0 : tensor<4xi32>
+}
+
+func.func @inline_from() -> tensor<4xi32> {
+  %0 = ml_program.global_load_const @global : tensor<4xi32>
+  return %0 : tensor<4xi32>
+}
diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir
index 48f28ef390ed..18851f29d8ea 100644
--- a/mlir/test/Dialect/SparseTensor/invalid.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid.mlir
@@ -868,16 +868,6 @@ func.func @sparse_sort_coo_no_perm(%arg0: index, %arg1: memref<?xindex>) -> (mem
 
 // -----
 
-#CSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense, d1 : compressed)}>
-
-func.func @sparse_alloc_escapes(%arg0: index) -> tensor<10x?xf64, #CSR> {
-  // expected-error@+1 {{sparse tensor allocation should not escape function}}
-  %0 = bufferization.alloc_tensor(%arg0) : tensor<10x?xf64, #CSR>
-  return %0: tensor<10x?xf64, #CSR>
-}
-
-// -----
-
 #UnorderedCOO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered))}>
 #OrderedCOOPerm = #sparse_tensor.encoding<{map = (d0, d1) -> (d1 : compressed(nonunique), d0 : singleton)}>
 
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index 4c73a6271786..627ac54cf145 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -2483,6 +2483,18 @@ func.func @all_true_vector_mask(%a : vector<3x4xf32>) -> vector<3x4xf32> {
 
 // -----
 
+// CHECK-LABEL: func @all_true_vector_mask_no_result(
+func.func @all_true_vector_mask_no_result(%a : vector<3x4xf32>, %m : memref<3x4xf32>) {
+//   CHECK-NOT:   vector.mask
+//       CHECK:   vector.transfer_write
+  %c0 = arith.constant 0 : index
+  %all_true = vector.constant_mask [3, 4] : vector<3x4xi1>
+  vector.mask %all_true { vector.transfer_write %a, %m[%c0, %c0] : vector<3x4xf32>, memref<3x4xf32> } : vector<3x4xi1>
+  return
+}
+
+// -----
+
 // CHECK-LABEL:   func.func @fold_shape_cast_with_mask(
 // CHECK-SAME:     %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x4xi1> {
 func.func @fold_shape_cast_with_mask(%arg0: tensor<1x?xf32>) -> vector<1x4xi1> {
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/mmt4d.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/mmt4d.mlir
index 8ee4e1fb48fe..92c7039c8496 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/mmt4d.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/mmt4d.mlir
@@ -1,6 +1,6 @@
 // DEFINE: %{compile} =  mlir-opt %s \
 // DEFINE:    -transform-interpreter -test-transform-dialect-erase-schedule \
-// DEFINE:    -one-shot-bufferize -func-bufferize -cse -canonicalize -convert-vector-to-scf -test-lower-to-llvm -o %t
+// DEFINE:    -one-shot-bufferize="bufferize-function-boundaries" -buffer-deallocation-pipeline -cse -canonicalize -convert-vector-to-scf -test-lower-to-llvm -o %t
 // DEFINE: %{entry_point} = mmt4d
 // DEFINE: %{run} = mlir-cpu-runner %t -e %{entry_point} -entry-point-result=void \
 // DEFINE:    -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-matmul-masked-vec.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-matmul-masked-vec.mlir
index 0378d638df61..a70d794506c4 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-matmul-masked-vec.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-matmul-masked-vec.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -transform-interpreter -test-transform-dialect-erase-schedule -one-shot-bufferize -func-bufferize -lower-vector-mask --test-lower-to-llvm | \
+// RUN: mlir-opt %s -transform-interpreter -test-transform-dialect-erase-schedule -one-shot-bufferize="bufferize-function-boundaries" -buffer-deallocation-pipeline -lower-vector-mask --test-lower-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void --shared-libs=%mlir_c_runner_utils,%mlir_runner_utils | \
 // RUN: FileCheck %s
 
diff --git a/mlir/test/Integration/Dialect/Memref/reinterpret-cast-runtime-verification.mlir b/mlir/test/Integration/Dialect/Memref/reinterpret-cast-runtime-verification.mlir
index 370029154054..2239ba50b662 100644
--- a/mlir/test/Integration/Dialect/Memref/reinterpret-cast-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/Memref/reinterpret-cast-runtime-verification.mlir
@@ -34,7 +34,8 @@ func.func @main() {
   %5 = arith.constant 5 : index
 
   %alloca_1 = memref.alloca() : memref<1xf32>
-  %alloc_4 = memref.alloc(%4) : memref<?xf32>
+  %alloca_4 = memref.alloca() : memref<4xf32>
+  %alloca_4_dyn = memref.cast %alloca_4 : memref<4xf32> to memref<?xf32>
 
   // Offset is out-of-bounds
   //      CHECK: ERROR: Runtime op verification failed
@@ -55,20 +56,20 @@ func.func @main() {
   // CHECK-NEXT: "memref.reinterpret_cast"(%{{.*}})
   // CHECK-NEXT: ^ result of reinterpret_cast is out-of-bounds of the base memref
   // CHECK-NEXT: Location: loc({{.*}})
-  func.call @reinterpret_cast_fully_dynamic(%alloc_4, %0, %5, %1) : (memref<?xf32>, index, index, index) -> ()
+  func.call @reinterpret_cast_fully_dynamic(%alloca_4_dyn, %0, %5, %1) : (memref<?xf32>, index, index, index) -> ()
 
   // Stride is out-of-bounds
   //      CHECK: ERROR: Runtime op verification failed
   // CHECK-NEXT: "memref.reinterpret_cast"(%{{.*}})
   // CHECK-NEXT: ^ result of reinterpret_cast is out-of-bounds of the base memref
   // CHECK-NEXT: Location: loc({{.*}})
-  func.call @reinterpret_cast_fully_dynamic(%alloc_4, %0, %4, %4) : (memref<?xf32>, index, index, index) -> ()
+  func.call @reinterpret_cast_fully_dynamic(%alloca_4_dyn, %0, %4, %4) : (memref<?xf32>, index, index, index) -> ()
 
   //  CHECK-NOT: ERROR: Runtime op verification failed
   func.call @reinterpret_cast(%alloca_1, %0) : (memref<1xf32>, index) -> ()
 
   //  CHECK-NOT: ERROR: Runtime op verification failed
-  func.call @reinterpret_cast_fully_dynamic(%alloc_4, %0, %4, %1) : (memref<?xf32>, index, index, index) -> ()
+  func.call @reinterpret_cast_fully_dynamic(%alloca_4_dyn, %0, %4, %1) : (memref<?xf32>, index, index, index) -> ()
 
   return
 }
diff --git a/mlir/test/Integration/Dialect/Memref/subview-runtime-verification.mlir b/mlir/test/Integration/Dialect/Memref/subview-runtime-verification.mlir
index 48987ce216f1..3ccf8b1be6d7 100644
--- a/mlir/test/Integration/Dialect/Memref/subview-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/Memref/subview-runtime-verification.mlir
@@ -38,14 +38,15 @@ func.func @main() {
   %5 = arith.constant 5 : index
 
   %alloca = memref.alloca() : memref<1xf32>
-  %alloc = memref.alloc(%4) : memref<?x4xf32>
+  %alloca_4 = memref.alloca() : memref<4x4xf32>
+  %alloca_4_dyn = memref.cast %alloca_4 : memref<4x4xf32> to memref<?x4xf32>
 
   // Offset is out-of-bounds
   //      CHECK: ERROR: Runtime op verification failed
   // CHECK-NEXT: "memref.subview"
   // CHECK-NEXT: ^ subview is out-of-bounds of the base memref
   // CHECK-NEXT: Location: loc({{.*}})
-  func.call @subview_dynamic_rank_reduce(%alloc, %5, %5, %1) : (memref<?x4xf32>, index, index, index) -> ()
+  func.call @subview_dynamic_rank_reduce(%alloca_4_dyn, %5, %5, %1) : (memref<?x4xf32>, index, index, index) -> ()
 
   // Offset is out-of-bounds
   //      CHECK: ERROR: Runtime op verification failed
@@ -66,23 +67,23 @@ func.func @main() {
   // CHECK-NEXT: "memref.subview"
   // CHECK-NEXT: ^ subview is out-of-bounds of the base memref
   // CHECK-NEXT: Location: loc({{.*}})
-  func.call @subview_dynamic(%alloc, %0, %5, %1) : (memref<?x4xf32>, index, index, index) -> ()
+  func.call @subview_dynamic(%alloca_4_dyn, %0, %5, %1) : (memref<?x4xf32>, index, index, index) -> ()
 
   // Stride is out-of-bounds
   //      CHECK: ERROR: Runtime op verification failed
   // CHECK-NEXT: "memref.subview"
   // CHECK-NEXT: ^ subview is out-of-bounds of the base memref
   // CHECK-NEXT: Location: loc({{.*}})
-  func.call @subview_dynamic(%alloc, %0, %4, %4) : (memref<?x4xf32>, index, index, index) -> ()
+  func.call @subview_dynamic(%alloca_4_dyn, %0, %4, %4) : (memref<?x4xf32>, index, index, index) -> ()
 
   // CHECK-NOT: ERROR: Runtime op verification failed
   func.call @subview(%alloca, %0) : (memref<1xf32>, index) -> ()
 
   // CHECK-NOT: ERROR: Runtime op verification failed
-  func.call @subview_dynamic(%alloc, %0, %4, %1) : (memref<?x4xf32>, index, index, index) -> ()
+  func.call @subview_dynamic(%alloca_4_dyn, %0, %4, %1) : (memref<?x4xf32>, index, index, index) -> ()
 
   // CHECK-NOT: ERROR: Runtime op verification failed
-  func.call @subview_dynamic_rank_reduce(%alloc, %0, %1, %0) : (memref<?x4xf32>, index, index, index) -> ()
+  func.call @subview_dynamic_rank_reduce(%alloca_4_dyn, %0, %1, %0) : (memref<?x4xf32>, index, index, index) -> ()
 
 
   return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_empty.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_empty.mlir
new file mode 100755
index 000000000000..bcd71f7bd674
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_empty.mlir
@@ -0,0 +1,144 @@
+//--------------------------------------------------------------------------------------------------
+// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
+//
+// Set-up that's shared across all tests in this directory. In principle, this
+// config could be moved to lit.local.cfg. However, there are downstream users that
+//  do not use these LIT config files. Hence why this is kept inline.
+//
+// DEFINE: %{sparsifier_opts} = enable-runtime-library=true
+// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts}
+// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
+// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
+// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
+// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
+// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
+//
+// DEFINE: %{env} =
+//--------------------------------------------------------------------------------------------------
+
+// RUN: %{compile} | %{run} | FileCheck %s
+//
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true
+// RUN: %{compile} | %{run} | FileCheck %s
+//
+// Do the same run, but now with direct IR generation and vectorization.
+// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
+// RUN: %{compile} | %{run} | FileCheck %s
+//
+// Do the same run, but now with direct IR generation and VLA vectorization.
+// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %}
+
+
+#map = affine_map<(d0) -> (d0)>
+
+#SV  = #sparse_tensor.encoding<{
+  map = (d0) -> (d0 : compressed)
+}>
+
+module {
+
+  // This directly yields an empty sparse vector.
+  func.func @empty() -> tensor<10xf32, #SV> {
+    %0 = tensor.empty() : tensor<10xf32, #SV>
+    return %0 : tensor<10xf32, #SV>
+  }
+
+  // This also directly yields an empty sparse vector.
+  func.func @empty_alloc() -> tensor<10xf32, #SV> {
+    %0 = bufferization.alloc_tensor() : tensor<10xf32, #SV>
+    return %0 : tensor<10xf32, #SV>
+  }
+
+  // This yields a hidden empty sparse vector (all zeros).
+  func.func @zeros() -> tensor<10xf32, #SV> {
+    %cst = arith.constant 0.0 : f32
+    %0 = bufferization.alloc_tensor() : tensor<10xf32, #SV>
+    %1 = linalg.generic {
+        indexing_maps = [#map],
+	iterator_types = ["parallel"]}
+      outs(%0 : tensor<10xf32, #SV>) {
+         ^bb0(%out: f32):
+            linalg.yield %cst : f32
+    } -> tensor<10xf32, #SV>
+    return %1 : tensor<10xf32, #SV>
+  }
+
+  // This yields a filled sparse vector (all ones).
+  func.func @ones() -> tensor<10xf32, #SV> {
+    %cst = arith.constant 1.0 : f32
+    %0 = bufferization.alloc_tensor() : tensor<10xf32, #SV>
+    %1 = linalg.generic {
+        indexing_maps = [#map],
+	iterator_types = ["parallel"]}
+      outs(%0 : tensor<10xf32, #SV>) {
+         ^bb0(%out: f32):
+            linalg.yield %cst : f32
+    } -> tensor<10xf32, #SV>
+    return %1 : tensor<10xf32, #SV>
+  }
+
+  //
+  // Main driver.
+  //
+  func.func @main() {
+
+    %0 = call @empty()       : () -> tensor<10xf32, #SV>
+    %1 = call @empty_alloc() : () -> tensor<10xf32, #SV>
+    %2 = call @zeros()       : () -> tensor<10xf32, #SV>
+    %3 = call @ones()        : () -> tensor<10xf32, #SV>
+
+    //
+    // Verify the output. In particular, make sure that
+    // all empty sparse vector data structures are properly
+    // finalized with a pair (0,0) for positions.
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 0
+    // CHECK-NEXT: dim = ( 10 )
+    // CHECK-NEXT: lvl = ( 10 )
+    // CHECK-NEXT: pos[0] : ( 0, 0,
+    // CHECK-NEXT: crd[0] : (
+    // CHECK-NEXT: values : (
+    // CHECK-NEXT: ----
+    //
+    // CHECK-NEXT: ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 0
+    // CHECK-NEXT: dim = ( 10 )
+    // CHECK-NEXT: lvl = ( 10 )
+    // CHECK-NEXT: pos[0] : ( 0, 0,
+    // CHECK-NEXT: crd[0] : (
+    // CHECK-NEXT: values : (
+    // CHECK-NEXT: ----
+    //
+    // CHECK-NEXT: ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 0
+    // CHECK-NEXT: dim = ( 10 )
+    // CHECK-NEXT: lvl = ( 10 )
+    // CHECK-NEXT: pos[0] : ( 0, 0,
+    // CHECK-NEXT: crd[0] : (
+    // CHECK-NEXT: values : (
+    // CHECK-NEXT: ----
+    //
+    // CHECK-NEXT: ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 10
+    // CHECK-NEXT: dim = ( 10 )
+    // CHECK-NEXT: lvl = ( 10 )
+    // CHECK-NEXT: pos[0] : ( 0, 10,
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+    // CHECK-NEXT: values : ( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %0 : tensor<10xf32, #SV>
+    sparse_tensor.print %1 : tensor<10xf32, #SV>
+    sparse_tensor.print %2 : tensor<10xf32, #SV>
+    sparse_tensor.print %3 : tensor<10xf32, #SV>
+
+    bufferization.dealloc_tensor %0 : tensor<10xf32, #SV>
+    bufferization.dealloc_tensor %1 : tensor<10xf32, #SV>
+    bufferization.dealloc_tensor %2 : tensor<10xf32, #SV>
+    bufferization.dealloc_tensor %3 : tensor<10xf32, #SV>
+    return
+  }
+}
diff --git a/mlir/test/Integration/Dialect/Tosa/CPU/test-fully-connected.mlir b/mlir/test/Integration/Dialect/Tosa/CPU/test-fully-connected.mlir
index bf178c826574..258b1b4f2fab 100644
--- a/mlir/test/Integration/Dialect/Tosa/CPU/test-fully-connected.mlir
+++ b/mlir/test/Integration/Dialect/Tosa/CPU/test-fully-connected.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named,tosa-to-linalg,tosa-to-arith))" | \
-// RUN: mlir-opt -one-shot-bufferize -func-bufferize -test-lower-to-llvm | \
+// RUN: mlir-opt -one-shot-bufferize="bufferize-function-boundaries" -buffer-deallocation-pipeline -test-lower-to-llvm | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_runner_utils \
 // RUN: | FileCheck %s
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 9645ab2a6726..118bb7d3051b 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -1393,7 +1393,7 @@ protected:
 };
 
 /// A static check on whether or not we support RPC in libomptarget.
-const bool libomptargetSupportsRPC();
+bool libomptargetSupportsRPC();
 
 } // namespace plugin
 } // namespace target
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 8c8c52de0c21..414506f30b8f 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1560,7 +1560,7 @@ Expected<bool> GenericPluginTy::checkELFImage(StringRef Image) const {
   return isELFCompatible(Image);
 }
 
-const bool llvm::omp::target::plugin::libomptargetSupportsRPC() {
+bool llvm::omp::target::plugin::libomptargetSupportsRPC() {
 #ifdef LIBOMPTARGET_RPC_SUPPORT
   return true;
 #else
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 569a1ab9b477..885d6636abe4 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1402,19 +1402,9 @@ extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
 // subleaf is only needed for cache and topology discovery and can be set to
 // zero in most cases
 static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {
-#if KMP_ARCH_X86 && (defined(__pic__) || defined(__PIC__))
-  // on i386 arch, the ebx reg. is used by pic, thus we need to preserve from
-  // being trashed beforehand
-  __asm__ __volatile__("mov %%ebx, %%edi\n"
-                       "cpuid\n"
-                       "xchg %%edi, %%ebx\n"
-                       : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
-                       : "a"(leaf), "c"(subleaf));
-#else
   __asm__ __volatile__("cpuid"
                        : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
                        : "a"(leaf), "c"(subleaf));
-#endif
 }
 // Load p into FPU control word
 static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 073353a89c89..bb054b648102 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -528,6 +528,7 @@ libc_support_library(
         ":__support_cpp_type_traits",
         ":__support_ctype_utils",
         ":__support_str_to_num_result",
+        ":__support_uint128",
         ":errno",
     ],
 )
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index c14869fe3867..0c67f331a5b6 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -3889,6 +3889,7 @@ cc_library(
         ":ControlFlowInterfaces",
         ":DialectUtils",
         ":IR",
+        ":InliningUtils",
         ":LoopLikeInterface",
         ":MemRefDialect",
         ":ShapedOpInterfaces",
@@ -4296,6 +4297,7 @@ cc_library(
         ":FunctionInterfaces",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":LoopLikeInterface",
         ":MemRefDialect",
         ":ParallelCombiningOpInterface",
@@ -4367,6 +4369,18 @@ cc_library(
 )
 
 cc_library(
+    name = "InliningUtils",
+    srcs = ["lib/Transforms/Utils/InliningUtils.cpp"],
+    hdrs = ["include/mlir/Transforms/InliningUtils.h"],
+    includes = ["include"],
+    deps = [
+        ":CallOpInterfaces",
+        ":IR",
+        "//llvm:Support",
+    ],
+)
+
+cc_library(
     name = "LoopLikeInterface",
     srcs = ["lib/Interfaces/LoopLikeInterface.cpp"],
     hdrs = ["include/mlir/Interfaces/LoopLikeInterface.h"],
@@ -4543,6 +4557,7 @@ cc_library(
         ":FunctionInterfaces",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":MLIRShapeCanonicalizationIncGen",
         ":ShapeOpsIncGen",
         ":SideEffectInterfaces",
@@ -4706,6 +4721,7 @@ cc_library(
         ":ControlFlowOpsIncGen",
         ":ConvertToLLVMInterface",
         ":IR",
+        ":InliningUtils",
         ":SideEffectInterfaces",
         ":Support",
         "//llvm:Support",
@@ -4742,7 +4758,7 @@ cc_library(
     hdrs = glob([
         "include/mlir/Dialect/Func/IR/*.h",
         "include/mlir/Dialect/Func/Utils/*.h",
-    ]) + ["include/mlir/Transforms/InliningUtils.h"],
+    ]),
     includes = ["include"],
     deps = [
         ":ArithDialect",
@@ -4756,6 +4772,7 @@ cc_library(
         ":FunctionInterfaces",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":SideEffectInterfaces",
         ":Support",
         "//llvm:Support",
@@ -4772,6 +4789,7 @@ cc_library(
         ":FuncDialect",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":MeshShardingInterface",
     ],
 )
@@ -4928,6 +4946,7 @@ cc_library(
         ":DialectUtils",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":MaskableOpInterface",
         ":MaskingOpInterface",
         ":MemRefDialect",
@@ -5344,7 +5363,6 @@ cc_library(
             "include/mlir/Dialect/LLVMIR/*X86Vector*.h",
         ],
     ) + [
-        "include/mlir/Transforms/InliningUtils.h",
         "include/mlir/Transforms/Mem2Reg.h",
     ],
     includes = ["include"],
@@ -5355,6 +5373,7 @@ cc_library(
         ":FunctionInterfaces",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":LLVMDialectInterfaceIncGen",
         ":LLVMIntrinsicOpsIncGen",
         ":LLVMOpsIncGen",
@@ -5561,6 +5580,7 @@ cc_library(
         ":IR",
         ":InferIntRangeInterface",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":LLVMDialect",
         ":MemRefDialect",
         ":SCFDialect",
@@ -6895,7 +6915,7 @@ cc_library(
     srcs = glob([
         "lib/Dialect/SPIRV/IR/*.cpp",
         "lib/Dialect/SPIRV/IR/*.h",
-    ]) + ["include/mlir/Transforms/InliningUtils.h"],
+    ]),
     hdrs = glob([
         "include/mlir/Dialect/SPIRV/IR/*.h",
     ]),
@@ -6907,6 +6927,7 @@ cc_library(
         ":GPUDialect",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":Parser",
         ":SPIRVAttrUtilsGen",
         ":SPIRVAttributesIncGen",
@@ -7311,7 +7332,6 @@ gentbl_cc_library(
 cc_library(
     name = "TensorDialect",
     srcs = [
-        "include/mlir/Transforms/InliningUtils.h",
         "lib/Dialect/Tensor/IR/TensorDialect.cpp",
         "lib/Dialect/Tensor/IR/TensorOps.cpp",
         "lib/Dialect/Tensor/IR/ValueBoundsOpInterfaceImpl.cpp",
@@ -7332,6 +7352,7 @@ cc_library(
         ":DialectUtils",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":LoopLikeInterface",
         ":ParallelCombiningOpInterface",
         ":ShapedOpInterfaces",
@@ -7522,15 +7543,19 @@ cc_library(
 
 cc_library(
     name = "TransformUtils",
-    srcs = glob([
-        "lib/Transforms/Utils/*.cpp",
-        "lib/Transforms/Utils/*.h",
-    ]),
+    srcs = glob(
+        include = [
+            "lib/Transforms/Utils/*.cpp",
+            "lib/Transforms/Utils/*.h",
+        ],
+        exclude = ["lib/Transforms/Utils/InliningUtils.cpp"],
+    ),
     hdrs = glob(
-        [
-            "include/mlir/Transforms/*.h",
+        include = ["include/mlir/Transforms/*.h"],
+        exclude = [
+            "include/mlir/Transforms/InliningUtils.h",
+            "include/mlir/Transforms/Passes.h",
         ],
-        exclude = ["include/mlir/Transforms/Passes.h"],
     ),
     includes = ["include"],
     deps = [
@@ -7538,6 +7563,7 @@ cc_library(
         ":ControlFlowInterfaces",
         ":FunctionInterfaces",
         ":IR",
+        ":InliningUtils",
         ":LoopLikeInterface",
         ":MemorySlotInterfaces",
         ":Pass",
@@ -7867,6 +7893,7 @@ cc_library(
         ":ControlFlowInterfaces",
         ":FunctionInterfaces",
         ":IR",
+        ":InliningUtils",
         ":LoopLikeInterface",
         ":MemorySlotInterfaces",
         ":Pass",
@@ -10963,6 +10990,7 @@ cc_library(
         ":FunctionInterfaces",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":LinalgEnumsIncGen",
         ":LinalgInterfacesIncGen",
         ":LinalgNamedStructuredOpsYamlIncGen",
@@ -11709,6 +11737,7 @@ cc_library(
         ":FuncDialect",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":LoopLikeInterface",
         ":MeshDialect",
         ":MeshShardingInterface",
@@ -12323,6 +12352,7 @@ cc_library(
         ":ConvertToLLVMInterface",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":SideEffectInterfaces",
         "//llvm:Support",
     ],
@@ -12574,7 +12604,6 @@ cc_library(
     ],
     hdrs = [
         "include/mlir/Dialect/Arith/IR/Arith.h",
-        "include/mlir/Transforms/InliningUtils.h",
     ],
     includes = ["include"],
     deps = [
@@ -12589,6 +12618,7 @@ cc_library(
         ":InferIntRangeCommon",
         ":InferIntRangeInterface",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":Support",
         ":UBDialect",
         ":VectorInterfaces",
@@ -12750,6 +12780,7 @@ cc_library(
         ":ConvertToLLVMInterface",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":MathBaseIncGen",
         ":MathOpsIncGen",
         ":SideEffectInterfaces",
@@ -12898,6 +12929,7 @@ cc_library(
         ":DialectUtils",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":MemRefBaseIncGen",
         ":MemRefOpsIncGen",
         ":MemorySlotInterfaces",
@@ -13521,6 +13553,7 @@ cc_library(
         ":FunctionInterfaces",
         ":IR",
         ":InferTypeOpInterface",
+        ":InliningUtils",
         ":MemRefDialect",
         ":SparseTensorDialect",
         ":SubsetOpInterface",
@@ -13918,7 +13951,6 @@ gentbl_cc_library(
 cc_library(
     name = "UBDialect",
     srcs = [
-        "include/mlir/Transforms/InliningUtils.h",
         "lib/Dialect/UB/IR/UBOps.cpp",
     ],
     hdrs = ["include/mlir/Dialect/UB/IR/UBOps.h"],
@@ -13926,6 +13958,7 @@ cc_library(
     deps = [
         ":ConvertToLLVMInterface",
         ":IR",
+        ":InliningUtils",
         ":SideEffectInterfaces",
         ":UBOpsIncGen",
         ":UBOpsInterfacesIncGen",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 91706af935ac..ccfef3f24340 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -399,6 +399,7 @@ cc_library(
         "//mlir:IR",
         "//mlir:InferIntRangeInterface",
         "//mlir:InferTypeOpInterface",
+        "//mlir:InliningUtils",
         "//mlir:LLVMDialect",
         "//mlir:LinalgDialect",
         "//mlir:LoopLikeInterface",
@@ -407,7 +408,6 @@ cc_library(
         "//mlir:SideEffectInterfaces",
         "//mlir:Support",
         "//mlir:TensorDialect",
-        "//mlir:TransformUtils",
         "//mlir:Transforms",
         "//mlir:ViewLikeInterface",
     ],
@@ -532,7 +532,7 @@ cc_library(
         "//mlir:PDLInterpDialect",
         "//mlir:Pass",
         "//mlir:Support",
-        "//mlir:TransformUtils",
+        "//mlir:Transforms",
     ],
 )
 
@@ -570,7 +570,7 @@ cc_library(
         "//mlir:SCFDialect",
         "//mlir:SPIRVDialect",
         "//mlir:Support",
-        "//mlir:TransformUtils",
+        "//mlir:Transforms",
     ],
 )
 
@@ -600,7 +600,7 @@ cc_library(
         "//mlir:Pass",
         "//mlir:SCFDialect",
         "//mlir:SCFTransforms",
-        "//mlir:TransformUtils",
+        "//mlir:Transforms",
     ],
 )
 
@@ -695,7 +695,6 @@ cc_library(
         "//mlir:SCFToControlFlow",
         "//mlir:SPIRVDialect",
         "//mlir:ToLLVMIRTranslation",
-        "//mlir:TransformUtils",
         "//mlir:Transforms",
         "//mlir:VectorDialect",
         "//mlir:VectorToLLVM",
@@ -728,7 +727,6 @@ cc_library(
         "//mlir:SCFTransforms",
         "//mlir:TensorDialect",
         "//mlir:TensorTransforms",
-        "//mlir:TransformUtils",
         "//mlir:Transforms",
         "//mlir:VectorDialect",
         "//mlir:VectorToSCF",
@@ -770,7 +768,7 @@ cc_library(
         "//mlir:MathTransforms",
         "//mlir:Pass",
         "//mlir:SCFDialect",
-        "//mlir:TransformUtils",
+        "//mlir:Transforms",
         "//mlir:VectorDialect",
         "//mlir:X86VectorDialect",
     ],
@@ -786,7 +784,7 @@ cc_library(
         "//mlir:IR",
         "//mlir:MathDialect",
         "//mlir:Pass",
-        "//mlir:TransformUtils",
+        "//mlir:Transforms",
         "//mlir:VCIXDialect",
         "//mlir:VectorDialect",
     ],
@@ -850,7 +848,7 @@ cc_library(
         "//mlir:Pass",
         "//mlir:SCFDialect",
         "//mlir:Support",
-        "//mlir:TransformUtils",
+        "//mlir:Transforms",
     ],
 )
 
@@ -869,7 +867,7 @@ cc_library(
         "//mlir:SCFDialect",
         "//mlir:SCFTransforms",
         "//mlir:SCFUtils",
-        "//mlir:TransformUtils",
+        "//mlir:Transforms",
     ],
 )
 
@@ -994,7 +992,7 @@ cc_library(
         "//mlir:FuncTransforms",
         "//mlir:IR",
         "//mlir:Pass",
-        "//mlir:TransformUtils",
+        "//mlir:Transforms",
     ],
 )
 
@@ -1033,7 +1031,7 @@ cc_library(
         "//mlir:SCFDialect",
         "//mlir:Support",
         "//mlir:TensorDialect",
-        "//mlir:TransformUtils",
+        "//mlir:Transforms",
         "//mlir:VectorDialect",
         "//mlir:VectorToSCF",
         "//mlir:VectorTransforms",
@@ -1119,6 +1117,6 @@ cc_library(
         "//mlir:Parser",
         "//mlir:Pass",
         "//mlir:Support",
-        "//mlir:TransformUtils",
+        "//mlir:Transforms",
     ],
 )
author	Fangrui Song <i@maskray.me>	2024-03-16 14:24:50 -0700
committer	Fangrui Song <i@maskray.me>	2024-03-16 14:24:50 -0700
commit	3aa7d5b9726ba51fc943ad80c11369264806fcce (patch)
tree	32beb24493d323a3264e32ae5733011dd7530f1a
parent	0fd3675447f41b033472f9269648e8e735bc543a (diff)
parent	84b5178124e47f6019b56c04abcfb978a94b1c3c (diff)