[𝘀𝗽𝗿] changes introduced through rebaseusers/vitalybuka/spr/main.lsan-fix-compilation-on-android

Created using spr 1.3.4 [skip ci]
author: Vitaly Buka <vitalybuka@google.com> 2024-10-18 16:36:18 -0700
committer: Vitaly Buka <vitalybuka@google.com> 2024-10-18 16:36:18 -0700
commit: 3f695d789826bc19057a2e758e9bebfde2678fde (patch)
tree: 0a159d1148f00705fad6c03b5071f9f1cc3262cf
parent: 9a4661cf31ea41143ee1c5a926a75320f91b1783 (diff)
parent: f5bd36aece8f6b12422ce30903dd78d1b5006efd (diff)
230 files changed, 16893 insertions, 15044 deletions
diff --git a/.gitattributes b/.gitattributes
index aced01d485c1..6b281f33f737 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,10 +1,3 @@
-# Checkout as native, commit as LF except in specific circumstances
-* text=auto
-*.bat text eol=crlf
-*.rc text eol=crlf
-*.sln text eol=crlf
-*.natvis text eol=crlf
-
 libcxx/src/**/*.cpp     merge=libcxx-reformat
 libcxx/include/**/*.h   merge=libcxx-reformat
 
diff --git a/clang-tools-extra/clangd/test/.gitattributes b/clang-tools-extra/clangd/test/.gitattributes
deleted file mode 100644
index 20971adc2b5d..000000000000
--- a/clang-tools-extra/clangd/test/.gitattributes
+++ /dev/null
@@ -1,3 +0,0 @@
-input-mirror.test text eol=crlf
-too_large.test text eol=crlf
-protocol.test text eol=crlf
diff --git a/clang-tools-extra/clangd/test/input-mirror.test b/clang-tools-extra/clangd/test/input-mirror.test
index bce3f9923a3b..a34a4a08cf60 100644
--- a/clang-tools-extra/clangd/test/input-mirror.test
+++ b/clang-tools-extra/clangd/test/input-mirror.test
@@ -1,17 +1,17 @@
-# RUN: clangd -pretty -sync -input-mirror-file %t < %s
-# Note that we have to use '-b' as -input-mirror-file does not have a newline at the end of file.
-# RUN: diff -b %t %s
-# It is absolutely vital that this file has CRLF line endings.
-#
-Content-Length: 125
-
-{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}}
-Content-Length: 172
-
-{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"int main() {\nint a;\na;\n}\n"}}}
-Content-Length: 44
-
-{"jsonrpc":"2.0","id":3,"method":"shutdown"}
-Content-Length: 33
-
-{"jsonrpc":"2.0","method":"exit"}
+# RUN: clangd -pretty -sync -input-mirror-file %t < %s
+# Note that we have to use '-b' as -input-mirror-file does not have a newline at the end of file.
+# RUN: diff -b %t %s
+# It is absolutely vital that this file has CRLF line endings.
+#
+Content-Length: 125
+
+{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}}
+Content-Length: 172
+
+{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"int main() {\nint a;\na;\n}\n"}}}
+Content-Length: 44
+
+{"jsonrpc":"2.0","id":3,"method":"shutdown"}
+Content-Length: 33
+
+{"jsonrpc":"2.0","method":"exit"}
diff --git a/clang-tools-extra/clangd/test/protocol.test b/clang-tools-extra/clangd/test/protocol.test
index 64ccfaef1891..5e852d1d9dee 100644
--- a/clang-tools-extra/clangd/test/protocol.test
+++ b/clang-tools-extra/clangd/test/protocol.test
@@ -1,113 +1,113 @@
-# RUN: not clangd -pretty -sync -enable-test-uri-scheme < %s | FileCheck -strict-whitespace %s
-# RUN: not clangd -pretty -sync -enable-test-uri-scheme < %s 2>&1 | FileCheck -check-prefix=STDERR %s
-# vim: fileformat=dos
-# It is absolutely vital that this file has CRLF line endings.
-#
-# Note that we invert the test because we intent to let clangd exit prematurely.
-#
-# Test protocol parsing
-Content-Length: 125
-Content-Type: application/vscode-jsonrpc; charset-utf-8
-
-{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}}
-# Test message with Content-Type after Content-Length
-#
-#      CHECK:  "jsonrpc": "2.0",
-# CHECK-NEXT:  "result": {
-#      CHECK:  }
-Content-Length: 246
-
-{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"struct fake { int a, bb, ccc; int f(int i, const float f) const; };\nint main() {\n  fake f;\n  f.\n}\n"}}}
-
-Content-Length: 104
-
-{"jsonrpc":"2.0","method":"textDocument/didChange","params":{"textDocument":{"uri":"test:///main.cpp"}}}
-
-Content-Type: application/vscode-jsonrpc; charset-utf-8
-Content-Length: 146
-
-{"jsonrpc":"2.0","id":1,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
-# Test message with Content-Type before Content-Length
-#
-#      CHECK:  "id": 1,
-# CHECK-NEXT:  "jsonrpc": "2.0",
-# CHECK-NEXT:  "result": {
-# CHECK-NEXT:    "isIncomplete": false,
-# CHECK-NEXT:    "items": [
-#      CHECK:        "filterText": "a",
-# CHECK-NEXT:        "insertText": "a",
-# CHECK-NEXT:        "insertTextFormat": 1,
-# CHECK-NEXT:        "kind": 5,
-# CHECK-NEXT:        "label": " a",
-# CHECK-NEXT:        "score": {{[0-9]+.[0-9]+}},
-# CHECK-NEXT:        "sortText": "{{.*}}"
-#      CHECK:    ]
-# CHECK-NEXT:  }
-
-X-Test: Testing
-Content-Type: application/vscode-jsonrpc; charset-utf-8
-Content-Length: 146
-Content-Type: application/vscode-jsonrpc; charset-utf-8
-X-Testing: Test
-
-{"jsonrpc":"2.0","id":2,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
-
-Content-Type: application/vscode-jsonrpc; charset-utf-8
-Content-Length: 10
-Content-Length: 146
-
-{"jsonrpc":"2.0","id":3,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
-# Test message with duplicate Content-Length headers
-#
-#      CHECK:  "id": 3,
-# CHECK-NEXT:  "jsonrpc": "2.0",
-# CHECK-NEXT:  "result": {
-# CHECK-NEXT:    "isIncomplete": false,
-# CHECK-NEXT:    "items": [
-#      CHECK:        "filterText": "a",
-# CHECK-NEXT:        "insertText": "a",
-# CHECK-NEXT:        "insertTextFormat": 1,
-# CHECK-NEXT:        "kind": 5,
-# CHECK-NEXT:        "label": " a",
-# CHECK-NEXT:        "score": {{[0-9]+.[0-9]+}},
-# CHECK-NEXT:        "sortText": "{{.*}}"
-#      CHECK:    ]
-# CHECK-NEXT:  }
-# STDERR: Warning: Duplicate Content-Length header received. The previous value for this message (10) was ignored.
-
-Content-Type: application/vscode-jsonrpc; charset-utf-8
-Content-Length: 10
-
-{"jsonrpc":"2.0","id":4,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
-# Test message with malformed Content-Length
-#
-# STDERR: JSON parse error
-# Ensure we recover by sending another (valid) message
-
-Content-Length: 146
-
-{"jsonrpc":"2.0","id":5,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
-# Test message with Content-Type before Content-Length
-#
-#      CHECK:  "id": 5,
-# CHECK-NEXT:  "jsonrpc": "2.0",
-# CHECK-NEXT:  "result": {
-# CHECK-NEXT:    "isIncomplete": false,
-# CHECK-NEXT:    "items": [
-#      CHECK:        "filterText": "a",
-# CHECK-NEXT:        "insertText": "a",
-# CHECK-NEXT:        "insertTextFormat": 1,
-# CHECK-NEXT:        "kind": 5,
-# CHECK-NEXT:        "label": " a",
-# CHECK-NEXT:        "score": {{[0-9]+.[0-9]+}},
-# CHECK-NEXT:        "sortText": "{{.*}}"
-#      CHECK:    ]
-# CHECK-NEXT:  }
-Content-Length: 1024
-
-{"jsonrpc":"2.0","id":5,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
-# Test message which reads beyond the end of the stream.
-#
-# Ensure this is the last test in the file!
-# STDERR: Input was aborted. Read only {{[0-9]+}} bytes of expected {{[0-9]+}}.
-
+# RUN: not clangd -pretty -sync -enable-test-uri-scheme < %s | FileCheck -strict-whitespace %s
+# RUN: not clangd -pretty -sync -enable-test-uri-scheme < %s 2>&1 | FileCheck -check-prefix=STDERR %s
+# vim: fileformat=dos
+# It is absolutely vital that this file has CRLF line endings.
+#
+# Note that we invert the test because we intent to let clangd exit prematurely.
+#
+# Test protocol parsing
+Content-Length: 125
+Content-Type: application/vscode-jsonrpc; charset-utf-8
+
+{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}}
+# Test message with Content-Type after Content-Length
+#
+#      CHECK:  "jsonrpc": "2.0",
+# CHECK-NEXT:  "result": {
+#      CHECK:  }
+Content-Length: 246
+
+{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"struct fake { int a, bb, ccc; int f(int i, const float f) const; };\nint main() {\n  fake f;\n  f.\n}\n"}}}
+
+Content-Length: 104
+
+{"jsonrpc":"2.0","method":"textDocument/didChange","params":{"textDocument":{"uri":"test:///main.cpp"}}}
+
+Content-Type: application/vscode-jsonrpc; charset-utf-8
+Content-Length: 146
+
+{"jsonrpc":"2.0","id":1,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
+# Test message with Content-Type before Content-Length
+#
+#      CHECK:  "id": 1,
+# CHECK-NEXT:  "jsonrpc": "2.0",
+# CHECK-NEXT:  "result": {
+# CHECK-NEXT:    "isIncomplete": false,
+# CHECK-NEXT:    "items": [
+#      CHECK:        "filterText": "a",
+# CHECK-NEXT:        "insertText": "a",
+# CHECK-NEXT:        "insertTextFormat": 1,
+# CHECK-NEXT:        "kind": 5,
+# CHECK-NEXT:        "label": " a",
+# CHECK-NEXT:        "score": {{[0-9]+.[0-9]+}},
+# CHECK-NEXT:        "sortText": "{{.*}}"
+#      CHECK:    ]
+# CHECK-NEXT:  }
+
+X-Test: Testing
+Content-Type: application/vscode-jsonrpc; charset-utf-8
+Content-Length: 146
+Content-Type: application/vscode-jsonrpc; charset-utf-8
+X-Testing: Test
+
+{"jsonrpc":"2.0","id":2,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
+
+Content-Type: application/vscode-jsonrpc; charset-utf-8
+Content-Length: 10
+Content-Length: 146
+
+{"jsonrpc":"2.0","id":3,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
+# Test message with duplicate Content-Length headers
+#
+#      CHECK:  "id": 3,
+# CHECK-NEXT:  "jsonrpc": "2.0",
+# CHECK-NEXT:  "result": {
+# CHECK-NEXT:    "isIncomplete": false,
+# CHECK-NEXT:    "items": [
+#      CHECK:        "filterText": "a",
+# CHECK-NEXT:        "insertText": "a",
+# CHECK-NEXT:        "insertTextFormat": 1,
+# CHECK-NEXT:        "kind": 5,
+# CHECK-NEXT:        "label": " a",
+# CHECK-NEXT:        "score": {{[0-9]+.[0-9]+}},
+# CHECK-NEXT:        "sortText": "{{.*}}"
+#      CHECK:    ]
+# CHECK-NEXT:  }
+# STDERR: Warning: Duplicate Content-Length header received. The previous value for this message (10) was ignored.
+
+Content-Type: application/vscode-jsonrpc; charset-utf-8
+Content-Length: 10
+
+{"jsonrpc":"2.0","id":4,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
+# Test message with malformed Content-Length
+#
+# STDERR: JSON parse error
+# Ensure we recover by sending another (valid) message
+
+Content-Length: 146
+
+{"jsonrpc":"2.0","id":5,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
+# Test message with Content-Type before Content-Length
+#
+#      CHECK:  "id": 5,
+# CHECK-NEXT:  "jsonrpc": "2.0",
+# CHECK-NEXT:  "result": {
+# CHECK-NEXT:    "isIncomplete": false,
+# CHECK-NEXT:    "items": [
+#      CHECK:        "filterText": "a",
+# CHECK-NEXT:        "insertText": "a",
+# CHECK-NEXT:        "insertTextFormat": 1,
+# CHECK-NEXT:        "kind": 5,
+# CHECK-NEXT:        "label": " a",
+# CHECK-NEXT:        "score": {{[0-9]+.[0-9]+}},
+# CHECK-NEXT:        "sortText": "{{.*}}"
+#      CHECK:    ]
+# CHECK-NEXT:  }
+Content-Length: 1024
+
+{"jsonrpc":"2.0","id":5,"method":"textDocument/completion","params":{"textDocument":{"uri":"test:/main.cpp"},"position":{"line":3,"character":5}}}
+# Test message which reads beyond the end of the stream.
+#
+# Ensure this is the last test in the file!
+# STDERR: Input was aborted. Read only {{[0-9]+}} bytes of expected {{[0-9]+}}.
+
diff --git a/clang-tools-extra/clangd/test/too_large.test b/clang-tools-extra/clangd/test/too_large.test
index 6986bd5e258e..7df981e79420 100644
--- a/clang-tools-extra/clangd/test/too_large.test
+++ b/clang-tools-extra/clangd/test/too_large.test
@@ -1,7 +1,7 @@
-# RUN: not clangd -sync < %s 2>&1 | FileCheck -check-prefix=STDERR %s
-# vim: fileformat=dos
-# It is absolutely vital that this file has CRLF line endings.
-#
-Content-Length: 2147483648
-
-# STDERR: Refusing to read message
+# RUN: not clangd -sync < %s 2>&1 | FileCheck -check-prefix=STDERR %s
+# vim: fileformat=dos
+# It is absolutely vital that this file has CRLF line endings.
+#
+Content-Length: 2147483648
+
+# STDERR: Refusing to read message
diff --git a/clang/include/clang/AST/ExternalASTSource.h b/clang/include/clang/AST/ExternalASTSource.h
index 385c32edbae0..582ed7c65f58 100644
--- a/clang/include/clang/AST/ExternalASTSource.h
+++ b/clang/include/clang/AST/ExternalASTSource.h
@@ -25,10 +25,12 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
+#include <new>
 #include <optional>
 #include <utility>
 
@@ -326,29 +328,49 @@ struct LazyOffsetPtr {
   ///
   /// If the low bit is clear, a pointer to the AST node. If the low
   /// bit is set, the upper 63 bits are the offset.
-  mutable uint64_t Ptr = 0;
+  static constexpr size_t DataSize = std::max(sizeof(uint64_t), sizeof(T *));
+  alignas(uint64_t) alignas(T *) mutable unsigned char Data[DataSize] = {};
+
+  unsigned char GetLSB() const {
+    return Data[llvm::sys::IsBigEndianHost ? DataSize - 1 : 0];
+  }
+
+  template <typename U> U &As(bool New) const {
+    unsigned char *Obj =
+        Data + (llvm::sys::IsBigEndianHost ? DataSize - sizeof(U) : 0);
+    if (New)
+      return *new (Obj) U;
+    return *std::launder(reinterpret_cast<U *>(Obj));
+  }
+
+  T *&GetPtr() const { return As<T *>(false); }
+  uint64_t &GetU64() const { return As<uint64_t>(false); }
+  void SetPtr(T *Ptr) const { As<T *>(true) = Ptr; }
+  void SetU64(uint64_t U64) const { As<uint64_t>(true) = U64; }
 
 public:
   LazyOffsetPtr() = default;
-  explicit LazyOffsetPtr(T *Ptr) : Ptr(reinterpret_cast<uint64_t>(Ptr)) {}
+  explicit LazyOffsetPtr(T *Ptr) : Data() { SetPtr(Ptr); }
 
-  explicit LazyOffsetPtr(uint64_t Offset) : Ptr((Offset << 1) | 0x01) {
+  explicit LazyOffsetPtr(uint64_t Offset) : Data() {
     assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits");
     if (Offset == 0)
-      Ptr = 0;
+      SetPtr(nullptr);
+    else
+      SetU64((Offset << 1) | 0x01);
   }
 
   LazyOffsetPtr &operator=(T *Ptr) {
-    this->Ptr = reinterpret_cast<uint64_t>(Ptr);
+    SetPtr(Ptr);
     return *this;
   }
 
   LazyOffsetPtr &operator=(uint64_t Offset) {
     assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits");
     if (Offset == 0)
-      Ptr = 0;
+      SetPtr(nullptr);
     else
-      Ptr = (Offset << 1) | 0x01;
+      SetU64((Offset << 1) | 0x01);
 
     return *this;
   }
@@ -356,15 +378,15 @@ public:
   /// Whether this pointer is non-NULL.
   ///
   /// This operation does not require the AST node to be deserialized.
-  explicit operator bool() const { return Ptr != 0; }
+  explicit operator bool() const { return isOffset() || GetPtr() != nullptr; }
 
   /// Whether this pointer is non-NULL.
   ///
   /// This operation does not require the AST node to be deserialized.
-  bool isValid() const { return Ptr != 0; }
+  bool isValid() const { return isOffset() || GetPtr() != nullptr; }
 
   /// Whether this pointer is currently stored as an offset.
-  bool isOffset() const { return Ptr & 0x01; }
+  bool isOffset() const { return GetLSB() & 0x01; }
 
   /// Retrieve the pointer to the AST node that this lazy pointer points to.
   ///
@@ -375,9 +397,9 @@ public:
     if (isOffset()) {
       assert(Source &&
              "Cannot deserialize a lazy pointer without an AST source");
-      Ptr = reinterpret_cast<uint64_t>((Source->*Get)(OffsT(Ptr >> 1)));
+      SetPtr((Source->*Get)(OffsT(GetU64() >> 1)));
     }
-    return reinterpret_cast<T*>(Ptr);
+    return GetPtr();
   }
 
   /// Retrieve the address of the AST node pointer. Deserializes the pointee if
@@ -385,7 +407,7 @@ public:
   T **getAddressOfPointer(ExternalASTSource *Source) const {
     // Ensure the integer is in pointer form.
     (void)get(Source);
-    return reinterpret_cast<T**>(&Ptr);
+    return &GetPtr();
   }
 };
 
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index b368c4737567..e9d2e3fe6d5c 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -634,8 +634,8 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   std::vector<StringRef> Features;
   getNVPTXTargetFeatures(C.getDriver(), getToolChain().getTriple(), Args,
                          Features);
-  for (StringRef Feature : Features)
-    CmdArgs.append({"--feature", Args.MakeArgString(Feature)});
+  CmdArgs.push_back(
+      Args.MakeArgString("--plugin-opt=mattr=" + llvm::join(Features, ",")));
 
   // Add paths for the default clang library path.
   SmallString<256> DefaultLibPath =
diff --git a/clang/test/.gitattributes b/clang/test/.gitattributes
deleted file mode 100644
index 160fc6cf5617..000000000000
--- a/clang/test/.gitattributes
+++ /dev/null
@@ -1,4 +0,0 @@
-FixIt/fixit-newline-style.c text eol=crlf
-Frontend/system-header-line-directive-ms-lineendings.c text eol=crlf
-Frontend/rewrite-includes-mixed-eol-crlf.* text eol=crlf
-clang/test/Frontend/rewrite-includes-mixed-eol-lf.h text eolf=lf
diff --git a/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl b/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl
index 9c1630f6f570..030fcfc31691 100644
--- a/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl
+++ b/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl
@@ -1,64 +1,64 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY %s | FileCheck -check-prefix=EMPTY %s
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump %s | FileCheck %s
-
-
-// This test tests two different AST generations. The "EMPTY" test mode verifies
-// the AST generated by forward declaration of the HLSL types which happens on
-// initializing the HLSL external AST with an AST Context.
-
-// The non-empty mode has a use that requires the StructuredBuffer type be complete,
-// which results in the AST being populated by the external AST source. That
-// case covers the full implementation of the template declaration and the
-// instantiated specialization.
-
-// EMPTY: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit StructuredBuffer
-// EMPTY-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type
-// EMPTY-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit <undeserialized declarations> class StructuredBuffer
-// EMPTY-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
-
-// There should be no more occurrances of StructuredBuffer
-// EMPTY-NOT: StructuredBuffer
-
-#ifndef EMPTY
-
-StructuredBuffer<float> Buffer;
-
-#endif
-
-// CHECK: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit StructuredBuffer
-// CHECK-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type
-// CHECK-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit class StructuredBuffer definition
-
-// CHECK: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
-// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
-// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]]
-// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
-
-// CHECK: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &const (unsigned int) const'
-// CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int'
-// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
-// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
-// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type' lvalue .e 0x{{[0-9A-Fa-f]+}}
-// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'const StructuredBuffer<element_type>' lvalue implicit this
-// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
-
-// CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &(unsigned int)'
-// CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int'
-// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
-// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
-// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type' lvalue .e 0x{{[0-9A-Fa-f]+}}
-// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'StructuredBuffer<element_type>' lvalue implicit this
-// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
-
-// CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class StructuredBuffer definition
-
-// CHECK: TemplateArgument type 'float'
-// CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'float'
-// CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
-// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
-// CHECK-SAME{LITERAL}: [[hlsl::contained_type(float)]]
-// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY %s | FileCheck -check-prefix=EMPTY %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump %s | FileCheck %s
+
+
+// This test tests two different AST generations. The "EMPTY" test mode verifies
+// the AST generated by forward declaration of the HLSL types which happens on
+// initializing the HLSL external AST with an AST Context.
+
+// The non-empty mode has a use that requires the StructuredBuffer type be complete,
+// which results in the AST being populated by the external AST source. That
+// case covers the full implementation of the template declaration and the
+// instantiated specialization.
+
+// EMPTY: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit StructuredBuffer
+// EMPTY-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type
+// EMPTY-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit <undeserialized declarations> class StructuredBuffer
+// EMPTY-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
+
+// There should be no more occurrances of StructuredBuffer
+// EMPTY-NOT: StructuredBuffer
+
+#ifndef EMPTY
+
+StructuredBuffer<float> Buffer;
+
+#endif
+
+// CHECK: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit StructuredBuffer
+// CHECK-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type
+// CHECK-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit class StructuredBuffer definition
+
+// CHECK: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
+// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]]
+// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
+
+// CHECK: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &const (unsigned int) const'
+// CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int'
+// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type' lvalue .e 0x{{[0-9A-Fa-f]+}}
+// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'const StructuredBuffer<element_type>' lvalue implicit this
+// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
+
+// CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &(unsigned int)'
+// CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int'
+// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type' lvalue .e 0x{{[0-9A-Fa-f]+}}
+// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'StructuredBuffer<element_type>' lvalue implicit this
+// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
+
+// CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class StructuredBuffer definition
+
+// CHECK: TemplateArgument type 'float'
+// CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'float'
+// CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
+// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(float)]]
+// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
diff --git a/clang/test/C/C2y/n3262.c b/clang/test/C/C2y/n3262.c
index 864ab351bdbc..3ff2062d88dd 100644
--- a/clang/test/C/C2y/n3262.c
+++ b/clang/test/C/C2y/n3262.c
@@ -1,20 +1,20 @@
-// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s
-// expected-no-diagnostics
-
-/* WG14 N3262: Yes
- * Usability of a byte-wise copy of va_list
- *
- * NB: Clang explicitly documents this as being undefined behavior. A
- * diagnostic is produced for some targets but not for others for assignment or
- * initialization, but no diagnostic is possible to produce for use with memcpy
- * in the general case, nor with a manual bytewise copy via a for loop.
- *
- * Therefore, nothing is tested in this file; it serves as a reminder that we
- * validated our documentation against the paper. See
- * clang/docs/LanguageExtensions.rst for more details.
- *
- * FIXME: it would be nice to add ubsan support for recognizing when an invalid
- * copy is made and diagnosing on copy (or on use of the copied va_list).
- */
-
-int main() {}
+// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s
+// expected-no-diagnostics
+
+/* WG14 N3262: Yes
+ * Usability of a byte-wise copy of va_list
+ *
+ * NB: Clang explicitly documents this as being undefined behavior. A
+ * diagnostic is produced for some targets but not for others for assignment or
+ * initialization, but no diagnostic is possible to produce for use with memcpy
+ * in the general case, nor with a manual bytewise copy via a for loop.
+ *
+ * Therefore, nothing is tested in this file; it serves as a reminder that we
+ * validated our documentation against the paper. See
+ * clang/docs/LanguageExtensions.rst for more details.
+ *
+ * FIXME: it would be nice to add ubsan support for recognizing when an invalid
+ * copy is made and diagnosing on copy (or on use of the copied va_list).
+ */
+
+int main() {}
diff --git a/clang/test/C/C2y/n3274.c b/clang/test/C/C2y/n3274.c
index 6bf8d72d0f33..ccdb89f4069d 100644
--- a/clang/test/C/C2y/n3274.c
+++ b/clang/test/C/C2y/n3274.c
@@ -1,18 +1,18 @@
-// RUN: %clang_cc1 -verify -std=c23 -Wall -pedantic %s
-// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s
-
-/* WG14 N3274: Yes
- * Remove imaginary types
- */
-
-// Clang has never supported _Imaginary.
-#ifdef __STDC_IEC_559_COMPLEX__
-#error "When did this happen?"
-#endif
-
-_Imaginary float i; // expected-error {{imaginary types are not supported}}
-
-// _Imaginary is a keyword in older language modes, but doesn't need to be one
-// in C2y or later. However, to improve diagnostic behavior, we retain it as a
-// keyword in all language modes -- it is not available as an identifier.
-static_assert(!__is_identifier(_Imaginary));
+// RUN: %clang_cc1 -verify -std=c23 -Wall -pedantic %s
+// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s
+
+/* WG14 N3274: Yes
+ * Remove imaginary types
+ */
+
+// Clang has never supported _Imaginary.
+#ifdef __STDC_IEC_559_COMPLEX__
+#error "When did this happen?"
+#endif
+
+_Imaginary float i; // expected-error {{imaginary types are not supported}}
+
+// _Imaginary is a keyword in older language modes, but doesn't need to be one
+// in C2y or later. However, to improve diagnostic behavior, we retain it as a
+// keyword in all language modes -- it is not available as an identifier.
+static_assert(!__is_identifier(_Imaginary));
diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl
index 81c5837d8f20..4d3d4908c396 100644
--- a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl
@@ -1,22 +1,22 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
-
-StructuredBuffer<float> Buffer1;
-StructuredBuffer<vector<float, 4> > BufferArray[4];
-
-StructuredBuffer<float> Buffer2 : register(u3);
-StructuredBuffer<vector<float, 4> > BufferArray2[4] : register(u4);
-
-StructuredBuffer<float> Buffer3 : register(u3, space1);
-StructuredBuffer<vector<float, 4> > BufferArray3[4] : register(u4, space1);
-
-[numthreads(1,1,1)]
-void main() {
-}
-
-// CHECK: !hlsl.uavs = !{![[Single:[0-9]+]], ![[Array:[0-9]+]], ![[SingleAllocated:[0-9]+]], ![[ArrayAllocated:[0-9]+]], ![[SingleSpace:[0-9]+]], ![[ArraySpace:[0-9]+]]}
-// CHECK-DAG: ![[Single]] = !{ptr @Buffer1, i32 10, i32 9, i1 false, i32 -1, i32 0}
-// CHECK-DAG: ![[Array]] = !{ptr @BufferArray, i32 10, i32 9, i1 false, i32 -1, i32 0}
-// CHECK-DAG: ![[SingleAllocated]] = !{ptr @Buffer2, i32 10, i32 9, i1 false, i32 3, i32 0}
-// CHECK-DAG: ![[ArrayAllocated]] = !{ptr @BufferArray2, i32 10, i32 9, i1 false, i32 4, i32 0}
-// CHECK-DAG: ![[SingleSpace]] = !{ptr @Buffer3, i32 10, i32 9, i1 false, i32 3, i32 1}
-// CHECK-DAG: ![[ArraySpace]] = !{ptr @BufferArray3, i32 10, i32 9, i1 false, i32 4, i32 1}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+StructuredBuffer<float> Buffer1;
+StructuredBuffer<vector<float, 4> > BufferArray[4];
+
+StructuredBuffer<float> Buffer2 : register(u3);
+StructuredBuffer<vector<float, 4> > BufferArray2[4] : register(u4);
+
+StructuredBuffer<float> Buffer3 : register(u3, space1);
+StructuredBuffer<vector<float, 4> > BufferArray3[4] : register(u4, space1);
+
+[numthreads(1,1,1)]
+void main() {
+}
+
+// CHECK: !hlsl.uavs = !{![[Single:[0-9]+]], ![[Array:[0-9]+]], ![[SingleAllocated:[0-9]+]], ![[ArrayAllocated:[0-9]+]], ![[SingleSpace:[0-9]+]], ![[ArraySpace:[0-9]+]]}
+// CHECK-DAG: ![[Single]] = !{ptr @Buffer1, i32 10, i32 9, i1 false, i32 -1, i32 0}
+// CHECK-DAG: ![[Array]] = !{ptr @BufferArray, i32 10, i32 9, i1 false, i32 -1, i32 0}
+// CHECK-DAG: ![[SingleAllocated]] = !{ptr @Buffer2, i32 10, i32 9, i1 false, i32 3, i32 0}
+// CHECK-DAG: ![[ArrayAllocated]] = !{ptr @BufferArray2, i32 10, i32 9, i1 false, i32 4, i32 0}
+// CHECK-DAG: ![[SingleSpace]] = !{ptr @Buffer3, i32 10, i32 9, i1 false, i32 3, i32 1}
+// CHECK-DAG: ![[ArraySpace]] = !{ptr @BufferArray3, i32 10, i32 9, i1 false, i32 4, i32 1}
diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl
index 435a904327a2..a99c7f98a1af 100644
--- a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl
@@ -1,70 +1,70 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s
-
-// NOTE: The number in type name and whether the struct is packed or not will mostly
-// likely change once subscript operators are properly implemented (llvm/llvm-project#95956) 
-// and theinterim field of the contained type is removed.
-
-// CHECK: %"class.hlsl::StructuredBuffer" = type <{ target("dx.RawBuffer", i16, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.0" = type <{ target("dx.RawBuffer", i16, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.3" = type { target("dx.RawBuffer", i32, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.5" = type { target("dx.RawBuffer", i64, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.6" = type <{ target("dx.RawBuffer", half, 1, 0) 
-// CHECK: %"class.hlsl::StructuredBuffer.8" = type { target("dx.RawBuffer", float, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.9" = type { target("dx.RawBuffer", double, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.10" = type { target("dx.RawBuffer", <4 x i16>, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.11" = type { target("dx.RawBuffer", <3 x i32>, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.12" = type { target("dx.RawBuffer", <2 x half>, 1, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.13" = type { target("dx.RawBuffer", <3 x float>, 1, 0)
-
-StructuredBuffer<int16_t> BufI16;
-StructuredBuffer<uint16_t> BufU16;
-StructuredBuffer<int> BufI32;
-StructuredBuffer<uint> BufU32;
-StructuredBuffer<int64_t> BufI64;
-StructuredBuffer<uint64_t> BufU64;
-StructuredBuffer<half> BufF16;
-StructuredBuffer<float> BufF32;
-StructuredBuffer<double> BufF64;
-StructuredBuffer< vector<int16_t, 4> > BufI16x4;
-StructuredBuffer< vector<uint, 3> > BufU32x3;
-StructuredBuffer<half2> BufF16x2;
-StructuredBuffer<float3> BufF32x3;
-// TODO: StructuredBuffer<snorm half> BufSNormF16; -> 11
-// TODO: StructuredBuffer<unorm half> BufUNormF16; -> 12
-// TODO: StructuredBuffer<snorm float> BufSNormF32; -> 13
-// TODO: StructuredBuffer<unorm float> BufUNormF32; -> 14
-// TODO: StructuredBuffer<snorm double> BufSNormF64; -> 15
-// TODO: StructuredBuffer<unorm double> BufUNormF64; -> 16
-
-[numthreads(1,1,1)]
-void main(int GI : SV_GroupIndex) {
-  BufI16[GI] = 0;
-  BufU16[GI] = 0;
-  BufI32[GI] = 0;
-  BufU32[GI] = 0;
-  BufI64[GI] = 0;
-  BufU64[GI] = 0;
-  BufF16[GI] = 0;
-  BufF32[GI] = 0;
-  BufF64[GI] = 0;
-  BufI16x4[GI] = 0;
-  BufU32x3[GI] = 0;
-  BufF16x2[GI] = 0;
-  BufF32x3[GI] = 0;
-}
-
-// CHECK: !{{[0-9]+}} = !{ptr @BufI16, i32 10, i32 2,
-// CHECK: !{{[0-9]+}} = !{ptr @BufU16, i32 10, i32 3,
-// CHECK: !{{[0-9]+}} = !{ptr @BufI32, i32 10, i32 4,
-// CHECK: !{{[0-9]+}} = !{ptr @BufU32, i32 10, i32 5,
-// CHECK: !{{[0-9]+}} = !{ptr @BufI64, i32 10, i32 6,
-// CHECK: !{{[0-9]+}} = !{ptr @BufU64, i32 10, i32 7,
-// CHECK: !{{[0-9]+}} = !{ptr @BufF16, i32 10, i32 8,
-// CHECK: !{{[0-9]+}} = !{ptr @BufF32, i32 10, i32 9,
-// CHECK: !{{[0-9]+}} = !{ptr @BufF64, i32 10, i32 10,
-// CHECK: !{{[0-9]+}} = !{ptr @BufI16x4, i32 10, i32 2,
-// CHECK: !{{[0-9]+}} = !{ptr @BufU32x3, i32 10, i32 5,
-// CHECK: !{{[0-9]+}} = !{ptr @BufF16x2, i32 10, i32 8,
-// CHECK: !{{[0-9]+}} = !{ptr @BufF32x3, i32 10, i32 9,
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s
+
+// NOTE: The number in type name and whether the struct is packed or not will mostly
+// likely change once subscript operators are properly implemented (llvm/llvm-project#95956) 
+// and theinterim field of the contained type is removed.
+
+// CHECK: %"class.hlsl::StructuredBuffer" = type <{ target("dx.RawBuffer", i16, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.0" = type <{ target("dx.RawBuffer", i16, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.3" = type { target("dx.RawBuffer", i32, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.5" = type { target("dx.RawBuffer", i64, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.6" = type <{ target("dx.RawBuffer", half, 1, 0) 
+// CHECK: %"class.hlsl::StructuredBuffer.8" = type { target("dx.RawBuffer", float, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.9" = type { target("dx.RawBuffer", double, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.10" = type { target("dx.RawBuffer", <4 x i16>, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.11" = type { target("dx.RawBuffer", <3 x i32>, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.12" = type { target("dx.RawBuffer", <2 x half>, 1, 0)
+// CHECK: %"class.hlsl::StructuredBuffer.13" = type { target("dx.RawBuffer", <3 x float>, 1, 0)
+
+StructuredBuffer<int16_t> BufI16;
+StructuredBuffer<uint16_t> BufU16;
+StructuredBuffer<int> BufI32;
+StructuredBuffer<uint> BufU32;
+StructuredBuffer<int64_t> BufI64;
+StructuredBuffer<uint64_t> BufU64;
+StructuredBuffer<half> BufF16;
+StructuredBuffer<float> BufF32;
+StructuredBuffer<double> BufF64;
+StructuredBuffer< vector<int16_t, 4> > BufI16x4;
+StructuredBuffer< vector<uint, 3> > BufU32x3;
+StructuredBuffer<half2> BufF16x2;
+StructuredBuffer<float3> BufF32x3;
+// TODO: StructuredBuffer<snorm half> BufSNormF16; -> 11
+// TODO: StructuredBuffer<unorm half> BufUNormF16; -> 12
+// TODO: StructuredBuffer<snorm float> BufSNormF32; -> 13
+// TODO: StructuredBuffer<unorm float> BufUNormF32; -> 14
+// TODO: StructuredBuffer<snorm double> BufSNormF64; -> 15
+// TODO: StructuredBuffer<unorm double> BufUNormF64; -> 16
+
+[numthreads(1,1,1)]
+void main(int GI : SV_GroupIndex) {
+  BufI16[GI] = 0;
+  BufU16[GI] = 0;
+  BufI32[GI] = 0;
+  BufU32[GI] = 0;
+  BufI64[GI] = 0;
+  BufU64[GI] = 0;
+  BufF16[GI] = 0;
+  BufF32[GI] = 0;
+  BufF64[GI] = 0;
+  BufI16x4[GI] = 0;
+  BufU32x3[GI] = 0;
+  BufF16x2[GI] = 0;
+  BufF32x3[GI] = 0;
+}
+
+// CHECK: !{{[0-9]+}} = !{ptr @BufI16, i32 10, i32 2,
+// CHECK: !{{[0-9]+}} = !{ptr @BufU16, i32 10, i32 3,
+// CHECK: !{{[0-9]+}} = !{ptr @BufI32, i32 10, i32 4,
+// CHECK: !{{[0-9]+}} = !{ptr @BufU32, i32 10, i32 5,
+// CHECK: !{{[0-9]+}} = !{ptr @BufI64, i32 10, i32 6,
+// CHECK: !{{[0-9]+}} = !{ptr @BufU64, i32 10, i32 7,
+// CHECK: !{{[0-9]+}} = !{ptr @BufF16, i32 10, i32 8,
+// CHECK: !{{[0-9]+}} = !{ptr @BufF32, i32 10, i32 9,
+// CHECK: !{{[0-9]+}} = !{ptr @BufF64, i32 10, i32 10,
+// CHECK: !{{[0-9]+}} = !{ptr @BufI16x4, i32 10, i32 2,
+// CHECK: !{{[0-9]+}} = !{ptr @BufU32x3, i32 10, i32 5,
+// CHECK: !{{[0-9]+}} = !{ptr @BufF16x2, i32 10, i32 8,
+// CHECK: !{{[0-9]+}} = !{ptr @BufF32x3, i32 10, i32 9,
diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-subscript.hlsl
index 89bde9236288..155749ec4f94 100644
--- a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-subscript.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-subscript.hlsl
@@ -1,17 +1,17 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s
-
-StructuredBuffer<int> In;
-StructuredBuffer<int> Out;
-
-[numthreads(1,1,1)]
-void main(unsigned GI : SV_GroupIndex) {
-  Out[GI] = In[GI];
-}
-
-// Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy
-// and confusing to follow so the match here is pretty weak.
-
-// CHECK: define void @main()
-// Verify inlining leaves only calls to "llvm." intrinsics
-// CHECK-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
-// CHECK: ret void
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s
+
+StructuredBuffer<int> In;
+StructuredBuffer<int> Out;
+
+[numthreads(1,1,1)]
+void main(unsigned GI : SV_GroupIndex) {
+  Out[GI] = In[GI];
+}
+
+// Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy
+// and confusing to follow so the match here is pretty weak.
+
+// CHECK: define void @main()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// CHECK-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
diff --git a/clang/test/CodeGenHLSL/builtins/atan2.hlsl b/clang/test/CodeGenHLSL/builtins/atan2.hlsl
index ada269db2f00..40796052e608 100644
--- a/clang/test/CodeGenHLSL/builtins/atan2.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/atan2.hlsl
@@ -1,59 +1,59 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-
-// CHECK-LABEL: test_atan2_half
-// NATIVE_HALF: call half @llvm.atan2.f16
-// NO_HALF: call float @llvm.atan2.f32
-half test_atan2_half (half p0, half p1) {
-  return atan2(p0, p1);
-}
-
-// CHECK-LABEL: test_atan2_half2
-// NATIVE_HALF: call <2 x half> @llvm.atan2.v2f16
-// NO_HALF: call <2 x float> @llvm.atan2.v2f32
-half2 test_atan2_half2 (half2 p0, half2 p1) {
-  return atan2(p0, p1);
-}
-
-// CHECK-LABEL: test_atan2_half3
-// NATIVE_HALF: call <3 x half> @llvm.atan2.v3f16
-// NO_HALF: call <3 x float> @llvm.atan2.v3f32
-half3 test_atan2_half3 (half3 p0, half3 p1) {
-  return atan2(p0, p1);
-}
-
-// CHECK-LABEL: test_atan2_half4
-// NATIVE_HALF: call <4 x half> @llvm.atan2.v4f16
-// NO_HALF: call <4 x float> @llvm.atan2.v4f32
-half4 test_atan2_half4 (half4 p0, half4 p1) {
-  return atan2(p0, p1);
-}
-
-// CHECK-LABEL: test_atan2_float
-// CHECK: call float @llvm.atan2.f32
-float test_atan2_float (float p0, float p1) {
-  return atan2(p0, p1);
-}
-
-// CHECK-LABEL: test_atan2_float2
-// CHECK: call <2 x float> @llvm.atan2.v2f32
-float2 test_atan2_float2 (float2 p0, float2 p1) {
-  return atan2(p0, p1);
-}
-
-// CHECK-LABEL: test_atan2_float3
-// CHECK: call <3 x float> @llvm.atan2.v3f32
-float3 test_atan2_float3 (float3 p0, float3 p1) {
-  return atan2(p0, p1);
-}
-
-// CHECK-LABEL: test_atan2_float4
-// CHECK: call <4 x float> @llvm.atan2.v4f32
-float4 test_atan2_float4 (float4 p0, float4 p1) {
-  return atan2(p0, p1);
-}
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+// CHECK-LABEL: test_atan2_half
+// NATIVE_HALF: call half @llvm.atan2.f16
+// NO_HALF: call float @llvm.atan2.f32
+half test_atan2_half (half p0, half p1) {
+  return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half2
+// NATIVE_HALF: call <2 x half> @llvm.atan2.v2f16
+// NO_HALF: call <2 x float> @llvm.atan2.v2f32
+half2 test_atan2_half2 (half2 p0, half2 p1) {
+  return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half3
+// NATIVE_HALF: call <3 x half> @llvm.atan2.v3f16
+// NO_HALF: call <3 x float> @llvm.atan2.v3f32
+half3 test_atan2_half3 (half3 p0, half3 p1) {
+  return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_half4
+// NATIVE_HALF: call <4 x half> @llvm.atan2.v4f16
+// NO_HALF: call <4 x float> @llvm.atan2.v4f32
+half4 test_atan2_half4 (half4 p0, half4 p1) {
+  return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float
+// CHECK: call float @llvm.atan2.f32
+float test_atan2_float (float p0, float p1) {
+  return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float2
+// CHECK: call <2 x float> @llvm.atan2.v2f32
+float2 test_atan2_float2 (float2 p0, float2 p1) {
+  return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float3
+// CHECK: call <3 x float> @llvm.atan2.v3f32
+float3 test_atan2_float3 (float3 p0, float3 p1) {
+  return atan2(p0, p1);
+}
+
+// CHECK-LABEL: test_atan2_float4
+// CHECK: call <4 x float> @llvm.atan2.v4f32
+float4 test_atan2_float4 (float4 p0, float4 p1) {
+  return atan2(p0, p1);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/cross.hlsl b/clang/test/CodeGenHLSL/builtins/cross.hlsl
index eba710c905bf..514e57d36b20 100644
--- a/clang/test/CodeGenHLSL/builtins/cross.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/cross.hlsl
@@ -1,37 +1,37 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
-
-// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
-// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].cross.v3f16(<3 x half>
-// NATIVE_HALF: ret <3 x half> %hlsl.cross
-// NO_HALF: define [[FNATTRS]] <3 x float> @
-// NO_HALF: call <3 x float> @llvm.[[TARGET]].cross.v3f32(<3 x float>
-// NO_HALF: ret <3 x float> %hlsl.cross
-half3 test_cross_half3(half3 p0, half3 p1)
-{
-    return cross(p0, p1);
-}
-
-// CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %hlsl.cross = call <3 x float> @llvm.[[TARGET]].cross.v3f32(
-// CHECK: ret <3 x float> %hlsl.cross
-float3 test_cross_float3(float3 p0, float3 p1)
-{
-    return cross(p0, p1);
-}
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+
+// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
+// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].cross.v3f16(<3 x half>
+// NATIVE_HALF: ret <3 x half> %hlsl.cross
+// NO_HALF: define [[FNATTRS]] <3 x float> @
+// NO_HALF: call <3 x float> @llvm.[[TARGET]].cross.v3f32(<3 x float>
+// NO_HALF: ret <3 x float> %hlsl.cross
+half3 test_cross_half3(half3 p0, half3 p1)
+{
+    return cross(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] <3 x float> @
+// CHECK: %hlsl.cross = call <3 x float> @llvm.[[TARGET]].cross.v3f32(
+// CHECK: ret <3 x float> %hlsl.cross
+float3 test_cross_float3(float3 p0, float3 p1)
+{
+    return cross(p0, p1);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index 9b0293c218a5..1c23b0df04df 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -1,73 +1,73 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: call half @llvm.fabs.f16(half
-// NO_HALF: call float @llvm.fabs.f32(float
-// NATIVE_HALF: ret half
-// NO_HALF: ret float
-half test_length_half(half p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half2(half2 p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half3(half3 p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half4(half4 p0)
-{
-  return length(p0);
-}
-
-// CHECK: define noundef float @
-// CHECK: call float @llvm.fabs.f32(float
-// CHECK: ret float
-float test_length_float(float p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32(
-// CHECK: ret float %hlsl.length
-float test_length_float2(float2 p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32(
-// CHECK: ret float %hlsl.length
-float test_length_float3(float3 p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32(
-// CHECK: ret float %hlsl.length
-float test_length_float4(float4 p0)
-{
-  return length(p0);
-}
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+// NATIVE_HALF: define noundef half @
+// NATIVE_HALF: call half @llvm.fabs.f16(half
+// NO_HALF: call float @llvm.fabs.f32(float
+// NATIVE_HALF: ret half
+// NO_HALF: ret float
+half test_length_half(half p0)
+{
+  return length(p0);
+}
+// NATIVE_HALF: define noundef half @
+// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16
+// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32(
+// NATIVE_HALF: ret half %hlsl.length
+// NO_HALF: ret float %hlsl.length
+half test_length_half2(half2 p0)
+{
+  return length(p0);
+}
+// NATIVE_HALF: define noundef half @
+// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16
+// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32(
+// NATIVE_HALF: ret half %hlsl.length
+// NO_HALF: ret float %hlsl.length
+half test_length_half3(half3 p0)
+{
+  return length(p0);
+}
+// NATIVE_HALF: define noundef half @
+// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16
+// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32(
+// NATIVE_HALF: ret half %hlsl.length
+// NO_HALF: ret float %hlsl.length
+half test_length_half4(half4 p0)
+{
+  return length(p0);
+}
+
+// CHECK: define noundef float @
+// CHECK: call float @llvm.fabs.f32(float
+// CHECK: ret float
+float test_length_float(float p0)
+{
+  return length(p0);
+}
+// CHECK: define noundef float @
+// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32(
+// CHECK: ret float %hlsl.length
+float test_length_float2(float2 p0)
+{
+  return length(p0);
+}
+// CHECK: define noundef float @
+// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32(
+// CHECK: ret float %hlsl.length
+float test_length_float3(float3 p0)
+{
+  return length(p0);
+}
+// CHECK: define noundef float @
+// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32(
+// CHECK: ret float %hlsl.length
+float test_length_float4(float4 p0)
+{
+  return length(p0);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/normalize.hlsl b/clang/test/CodeGenHLSL/builtins/normalize.hlsl
index d14e7c70ce06..83ad607c14a6 100644
--- a/clang/test/CodeGenHLSL/builtins/normalize.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/normalize.hlsl
@@ -1,85 +1,85 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
-
-// NATIVE_HALF: define [[FNATTRS]] half @
-// NATIVE_HALF: call half @llvm.[[TARGET]].normalize.f16(half
-// NO_HALF: call float @llvm.[[TARGET]].normalize.f32(float
-// NATIVE_HALF: ret half
-// NO_HALF: ret float
-half test_normalize_half(half p0)
-{
-    return normalize(p0);
-}
-// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
-// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].normalize.v2f16(<2 x half>
-// NO_HALF: call <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float>
-// NATIVE_HALF: ret <2 x half> %hlsl.normalize
-// NO_HALF: ret <2 x float> %hlsl.normalize
-half2 test_normalize_half2(half2 p0)
-{
-    return normalize(p0);
-}
-// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
-// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].normalize.v3f16(<3 x half>
-// NO_HALF: call <3 x float> @llvm.[[TARGET]].normalize.v3f32(<3 x float>
-// NATIVE_HALF: ret <3 x half> %hlsl.normalize
-// NO_HALF: ret <3 x float> %hlsl.normalize
-half3 test_normalize_half3(half3 p0)
-{
-    return normalize(p0);
-}
-// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
-// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].normalize.v4f16(<4 x half>
-// NO_HALF: call <4 x float> @llvm.[[TARGET]].normalize.v4f32(<4 x float>
-// NATIVE_HALF: ret <4 x half> %hlsl.normalize
-// NO_HALF: ret <4 x float> %hlsl.normalize
-half4 test_normalize_half4(half4 p0)
-{
-    return normalize(p0);
-}
-
-// CHECK: define [[FNATTRS]] float @
-// CHECK: call float @llvm.[[TARGET]].normalize.f32(float
-// CHECK: ret float
-float test_normalize_float(float p0)
-{
-    return normalize(p0);
-}
-// CHECK: define [[FNATTRS]] <2 x float> @
-// CHECK: %hlsl.normalize = call <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float>
-
-// CHECK: ret <2 x float> %hlsl.normalize
-float2 test_normalize_float2(float2 p0)
-{
-    return normalize(p0);
-}
-// CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %hlsl.normalize = call <3 x float> @llvm.[[TARGET]].normalize.v3f32(
-// CHECK: ret <3 x float> %hlsl.normalize
-float3 test_normalize_float3(float3 p0)
-{
-    return normalize(p0);
-}
-// CHECK: define [[FNATTRS]] <4 x float> @
-// CHECK: %hlsl.normalize = call <4 x float> @llvm.[[TARGET]].normalize.v4f32(
-// CHECK: ret <4 x float> %hlsl.normalize
-float4 test_length_float4(float4 p0)
-{
-    return normalize(p0);
-}
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+
+// NATIVE_HALF: define [[FNATTRS]] half @
+// NATIVE_HALF: call half @llvm.[[TARGET]].normalize.f16(half
+// NO_HALF: call float @llvm.[[TARGET]].normalize.f32(float
+// NATIVE_HALF: ret half
+// NO_HALF: ret float
+half test_normalize_half(half p0)
+{
+    return normalize(p0);
+}
+// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
+// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].normalize.v2f16(<2 x half>
+// NO_HALF: call <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float>
+// NATIVE_HALF: ret <2 x half> %hlsl.normalize
+// NO_HALF: ret <2 x float> %hlsl.normalize
+half2 test_normalize_half2(half2 p0)
+{
+    return normalize(p0);
+}
+// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
+// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].normalize.v3f16(<3 x half>
+// NO_HALF: call <3 x float> @llvm.[[TARGET]].normalize.v3f32(<3 x float>
+// NATIVE_HALF: ret <3 x half> %hlsl.normalize
+// NO_HALF: ret <3 x float> %hlsl.normalize
+half3 test_normalize_half3(half3 p0)
+{
+    return normalize(p0);
+}
+// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
+// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].normalize.v4f16(<4 x half>
+// NO_HALF: call <4 x float> @llvm.[[TARGET]].normalize.v4f32(<4 x float>
+// NATIVE_HALF: ret <4 x half> %hlsl.normalize
+// NO_HALF: ret <4 x float> %hlsl.normalize
+half4 test_normalize_half4(half4 p0)
+{
+    return normalize(p0);
+}
+
+// CHECK: define [[FNATTRS]] float @
+// CHECK: call float @llvm.[[TARGET]].normalize.f32(float
+// CHECK: ret float
+float test_normalize_float(float p0)
+{
+    return normalize(p0);
+}
+// CHECK: define [[FNATTRS]] <2 x float> @
+// CHECK: %hlsl.normalize = call <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float>
+
+// CHECK: ret <2 x float> %hlsl.normalize
+float2 test_normalize_float2(float2 p0)
+{
+    return normalize(p0);
+}
+// CHECK: define [[FNATTRS]] <3 x float> @
+// CHECK: %hlsl.normalize = call <3 x float> @llvm.[[TARGET]].normalize.v3f32(
+// CHECK: ret <3 x float> %hlsl.normalize
+float3 test_normalize_float3(float3 p0)
+{
+    return normalize(p0);
+}
+// CHECK: define [[FNATTRS]] <4 x float> @
+// CHECK: %hlsl.normalize = call <4 x float> @llvm.[[TARGET]].normalize.v4f32(
+// CHECK: ret <4 x float> %hlsl.normalize
+float4 test_length_float4(float4 p0)
+{
+    return normalize(p0);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/step.hlsl b/clang/test/CodeGenHLSL/builtins/step.hlsl
index 8ef52794a3be..442f4930ca57 100644
--- a/clang/test/CodeGenHLSL/builtins/step.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/step.hlsl
@@ -1,84 +1,84 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
-
-// NATIVE_HALF: define [[FNATTRS]] half @
-// NATIVE_HALF: call half @llvm.[[TARGET]].step.f16(half
-// NO_HALF: call float @llvm.[[TARGET]].step.f32(float
-// NATIVE_HALF: ret half
-// NO_HALF: ret float
-half test_step_half(half p0, half p1)
-{
-    return step(p0, p1);
-}
-// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
-// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].step.v2f16(<2 x half>
-// NO_HALF: call <2 x float> @llvm.[[TARGET]].step.v2f32(<2 x float>
-// NATIVE_HALF: ret <2 x half> %hlsl.step
-// NO_HALF: ret <2 x float> %hlsl.step
-half2 test_step_half2(half2 p0, half2 p1)
-{
-    return step(p0, p1);
-}
-// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
-// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].step.v3f16(<3 x half>
-// NO_HALF: call <3 x float> @llvm.[[TARGET]].step.v3f32(<3 x float>
-// NATIVE_HALF: ret <3 x half> %hlsl.step
-// NO_HALF: ret <3 x float> %hlsl.step
-half3 test_step_half3(half3 p0, half3 p1)
-{
-    return step(p0, p1);
-}
-// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
-// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].step.v4f16(<4 x half>
-// NO_HALF: call <4 x float> @llvm.[[TARGET]].step.v4f32(<4 x float>
-// NATIVE_HALF: ret <4 x half> %hlsl.step
-// NO_HALF: ret <4 x float> %hlsl.step
-half4 test_step_half4(half4 p0, half4 p1)
-{
-    return step(p0, p1);
-}
-
-// CHECK: define [[FNATTRS]] float @
-// CHECK: call float @llvm.[[TARGET]].step.f32(float
-// CHECK: ret float
-float test_step_float(float p0, float p1)
-{
-    return step(p0, p1);
-}
-// CHECK: define [[FNATTRS]] <2 x float> @
-// CHECK: %hlsl.step = call <2 x float> @llvm.[[TARGET]].step.v2f32(
-// CHECK: ret <2 x float> %hlsl.step
-float2 test_step_float2(float2 p0, float2 p1)
-{
-    return step(p0, p1);
-}
-// CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %hlsl.step = call <3 x float> @llvm.[[TARGET]].step.v3f32(
-// CHECK: ret <3 x float> %hlsl.step
-float3 test_step_float3(float3 p0, float3 p1)
-{
-    return step(p0, p1);
-}
-// CHECK: define [[FNATTRS]] <4 x float> @
-// CHECK: %hlsl.step = call <4 x float> @llvm.[[TARGET]].step.v4f32(
-// CHECK: ret <4 x float> %hlsl.step
-float4 test_step_float4(float4 p0, float4 p1)
-{
-    return step(p0, p1);
-}
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+
+// NATIVE_HALF: define [[FNATTRS]] half @
+// NATIVE_HALF: call half @llvm.[[TARGET]].step.f16(half
+// NO_HALF: call float @llvm.[[TARGET]].step.f32(float
+// NATIVE_HALF: ret half
+// NO_HALF: ret float
+half test_step_half(half p0, half p1)
+{
+    return step(p0, p1);
+}
+// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
+// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].step.v2f16(<2 x half>
+// NO_HALF: call <2 x float> @llvm.[[TARGET]].step.v2f32(<2 x float>
+// NATIVE_HALF: ret <2 x half> %hlsl.step
+// NO_HALF: ret <2 x float> %hlsl.step
+half2 test_step_half2(half2 p0, half2 p1)
+{
+    return step(p0, p1);
+}
+// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
+// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].step.v3f16(<3 x half>
+// NO_HALF: call <3 x float> @llvm.[[TARGET]].step.v3f32(<3 x float>
+// NATIVE_HALF: ret <3 x half> %hlsl.step
+// NO_HALF: ret <3 x float> %hlsl.step
+half3 test_step_half3(half3 p0, half3 p1)
+{
+    return step(p0, p1);
+}
+// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
+// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].step.v4f16(<4 x half>
+// NO_HALF: call <4 x float> @llvm.[[TARGET]].step.v4f32(<4 x float>
+// NATIVE_HALF: ret <4 x half> %hlsl.step
+// NO_HALF: ret <4 x float> %hlsl.step
+half4 test_step_half4(half4 p0, half4 p1)
+{
+    return step(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] float @
+// CHECK: call float @llvm.[[TARGET]].step.f32(float
+// CHECK: ret float
+float test_step_float(float p0, float p1)
+{
+    return step(p0, p1);
+}
+// CHECK: define [[FNATTRS]] <2 x float> @
+// CHECK: %hlsl.step = call <2 x float> @llvm.[[TARGET]].step.v2f32(
+// CHECK: ret <2 x float> %hlsl.step
+float2 test_step_float2(float2 p0, float2 p1)
+{
+    return step(p0, p1);
+}
+// CHECK: define [[FNATTRS]] <3 x float> @
+// CHECK: %hlsl.step = call <3 x float> @llvm.[[TARGET]].step.v3f32(
+// CHECK: ret <3 x float> %hlsl.step
+float3 test_step_float3(float3 p0, float3 p1)
+{
+    return step(p0, p1);
+}
+// CHECK: define [[FNATTRS]] <4 x float> @
+// CHECK: %hlsl.step = call <4 x float> @llvm.[[TARGET]].step.v4f32(
+// CHECK: ret <4 x float> %hlsl.step
+float4 test_step_float4(float4 p0, float4 p1)
+{
+    return step(p0, p1);
+}
diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c
index 5f24e7a5accb..54c291fac66f 100644
--- a/clang/test/Driver/cuda-cross-compiling.c
+++ b/clang/test/Driver/cuda-cross-compiling.c
@@ -104,4 +104,4 @@
 // RUN: %clang -target nvptx64-nvidia-cuda --cuda-feature=+ptx63 -march=sm_52 -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=FEATURE %s
 
-// FEATURE: clang-nvlink-wrapper{{.*}}"--feature" "+ptx63"
+// FEATURE: clang-nvlink-wrapper{{.*}}"--plugin-opt=mattr=+ptx63"
diff --git a/clang/test/Driver/flang/msvc-link.f90 b/clang/test/Driver/flang/msvc-link.f90
index 3f7e162a9a61..463749510eb5 100644
--- a/clang/test/Driver/flang/msvc-link.f90
+++ b/clang/test/Driver/flang/msvc-link.f90
@@ -1,5 +1,5 @@
-! RUN: %clang --driver-mode=flang --target=x86_64-pc-windows-msvc -### %s -Ltest 2>&1 | FileCheck %s
-!
-! Test that user provided paths come before the Flang runtimes
-! CHECK: "-libpath:test"
-! CHECK: "-libpath:{{.*(\\|/)}}lib"
+! RUN: %clang --driver-mode=flang --target=x86_64-pc-windows-msvc -### %s -Ltest 2>&1 | FileCheck %s
+!
+! Test that user provided paths come before the Flang runtimes
+! CHECK: "-libpath:test"
+! CHECK: "-libpath:{{.*(\\|/)}}lib"
diff --git a/clang/test/FixIt/fixit-newline-style.c b/clang/test/FixIt/fixit-newline-style.c
index 2aac143d4d75..61e4df67e85b 100644
--- a/clang/test/FixIt/fixit-newline-style.c
+++ b/clang/test/FixIt/fixit-newline-style.c
@@ -1,11 +1,11 @@
-// RUN: %clang_cc1 -pedantic -Wunused-label -fno-diagnostics-show-line-numbers -x c %s 2>&1 | FileCheck %s -strict-whitespace
-
-// This file intentionally uses a CRLF newline style
-// CHECK: warning: unused label 'ddd'
-// CHECK-NEXT: {{^  ddd:}}
-// CHECK-NEXT: {{^  \^~~~$}}
-// CHECK-NOT: {{^  ;}}
-void f(void) {
-  ddd:
-  ;
-}
+// RUN: %clang_cc1 -pedantic -Wunused-label -fno-diagnostics-show-line-numbers -x c %s 2>&1 | FileCheck %s -strict-whitespace
+
+// This file intentionally uses a CRLF newline style
+// CHECK: warning: unused label 'ddd'
+// CHECK-NEXT: {{^  ddd:}}
+// CHECK-NEXT: {{^  \^~~~$}}
+// CHECK-NOT: {{^  ;}}
+void f(void) {
+  ddd:
+  ;
+}
diff --git a/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.c b/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.c
index 2faeaba32292..d6724444c066 100644
--- a/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.c
+++ b/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -E -frewrite-includes %s | %clang_cc1 -
-// expected-no-diagnostics
-// Note: This source file has CRLF line endings.
-// This test validates that -frewrite-includes translates the end of line (EOL)
-// form used in header files to the EOL form used in the the primary source
-// file when the files use different EOL forms.
-#include "rewrite-includes-mixed-eol-crlf.h"
-#include "rewrite-includes-mixed-eol-lf.h"
+// RUN: %clang_cc1 -E -frewrite-includes %s | %clang_cc1 -
+// expected-no-diagnostics
+// Note: This source file has CRLF line endings.
+// This test validates that -frewrite-includes translates the end of line (EOL)
+// form used in header files to the EOL form used in the the primary source
+// file when the files use different EOL forms.
+#include "rewrite-includes-mixed-eol-crlf.h"
+#include "rewrite-includes-mixed-eol-lf.h"
diff --git a/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.h b/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.h
index baedc282296b..0439b88b75e2 100644
--- a/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.h
+++ b/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.h
@@ -1,11 +1,11 @@
-// Note: This header file has CRLF line endings.
-// The indentation in some of the conditional inclusion directives below is
-// intentional and is required for this test to function as a regression test
-// for GH59736.
-_Static_assert(__LINE__ == 5, "");
-#if 1
-_Static_assert(__LINE__ == 7, "");
-  #if 1
-  _Static_assert(__LINE__ == 9, "");
-  #endif
-#endif
+// Note: This header file has CRLF line endings.
+// The indentation in some of the conditional inclusion directives below is
+// intentional and is required for this test to function as a regression test
+// for GH59736.
+_Static_assert(__LINE__ == 5, "");
+#if 1
+_Static_assert(__LINE__ == 7, "");
+  #if 1
+  _Static_assert(__LINE__ == 9, "");
+  #endif
+#endif
diff --git a/clang/test/Frontend/system-header-line-directive-ms-lineendings.c b/clang/test/Frontend/system-header-line-directive-ms-lineendings.c
index dffdd5cf1959..92fc07f65e0d 100644
--- a/clang/test/Frontend/system-header-line-directive-ms-lineendings.c
+++ b/clang/test/Frontend/system-header-line-directive-ms-lineendings.c
@@ -1,21 +1,21 @@
-// RUN: %clang_cc1 %s -E -o - -I %S/Inputs -isystem %S/Inputs/SystemHeaderPrefix | FileCheck %s
-#include <noline.h>
-#include <line-directive-in-system.h>
-
-#include "line-directive.h"
-
-// This tests that the line numbers for the current file are correctly outputted
-// for the include-file-completed test case. This file should be CRLF.
-
-// CHECK: # 1 "{{.*}}system-header-line-directive-ms-lineendings.c" 2
-// CHECK: # 1 "{{.*}}noline.h" 1 3
-// CHECK: foo(void);
-// CHECK: # 3 "{{.*}}system-header-line-directive-ms-lineendings.c" 2
-// CHECK: # 1 "{{.*}}line-directive-in-system.h" 1 3
-//      The "3" below indicates that "foo.h" is considered a system header.
-// CHECK: # 1 "foo.h" 3
-// CHECK: foo(void);
-// CHECK: # 4 "{{.*}}system-header-line-directive-ms-lineendings.c" 2
-// CHECK: # 1 "{{.*}}line-directive.h" 1
-// CHECK: # 10 "foo.h"{{$}}
-// CHECK: # 6 "{{.*}}system-header-line-directive-ms-lineendings.c" 2
+// RUN: %clang_cc1 %s -E -o - -I %S/Inputs -isystem %S/Inputs/SystemHeaderPrefix | FileCheck %s
+#include <noline.h>
+#include <line-directive-in-system.h>
+
+#include "line-directive.h"
+
+// This tests that the line numbers for the current file are correctly outputted
+// for the include-file-completed test case. This file should be CRLF.
+
+// CHECK: # 1 "{{.*}}system-header-line-directive-ms-lineendings.c" 2
+// CHECK: # 1 "{{.*}}noline.h" 1 3
+// CHECK: foo(void);
+// CHECK: # 3 "{{.*}}system-header-line-directive-ms-lineendings.c" 2
+// CHECK: # 1 "{{.*}}line-directive-in-system.h" 1 3
+//      The "3" below indicates that "foo.h" is considered a system header.
+// CHECK: # 1 "foo.h" 3
+// CHECK: foo(void);
+// CHECK: # 4 "{{.*}}system-header-line-directive-ms-lineendings.c" 2
+// CHECK: # 1 "{{.*}}line-directive.h" 1
+// CHECK: # 10 "foo.h"{{$}}
+// CHECK: # 6 "{{.*}}system-header-line-directive-ms-lineendings.c" 2
diff --git a/clang/test/ParserHLSL/bitfields.hlsl b/clang/test/ParserHLSL/bitfields.hlsl
index 57b6705babdc..307d1143a068 100644
--- a/clang/test/ParserHLSL/bitfields.hlsl
+++ b/clang/test/ParserHLSL/bitfields.hlsl
@@ -1,31 +1,31 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -ast-dump -x hlsl -o - %s | FileCheck %s
-
-
-struct MyBitFields {
-  // CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:9:3, col:25> col:16 referenced field1 'unsigned int'
-  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:25> 'int'
-  // CHECK:-value: Int 3
-  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:25> 'int' 3
-  unsigned int field1 : 3; // 3 bits for field1
-
-  // CHECK:FieldDecl 0x{{[0-9a-f]+}} <line:15:3, col:25> col:16 referenced field2 'unsigned int'
-  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:25> 'int'
-  // CHECK:-value: Int 4
-  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:25> 'int' 4
-  unsigned int field2 : 4; // 4 bits for field2
-  
-  // CHECK:FieldDecl 0x{{[0-9a-f]+}} <line:21:3, col:16> col:7 field3 'int'
-  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:16> 'int'
-  // CHECK:-value: Int 5
-  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:16> 'int' 5
-  int field3 : 5;          // 5 bits for field3 (signed)
-};
-
-
-
-[numthreads(1,1,1)]
-void main() {
-  MyBitFields m;
-  m.field1 = 4;
-  m.field2 = m.field1*2;
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -ast-dump -x hlsl -o - %s | FileCheck %s
+
+
+struct MyBitFields {
+  // CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:9:3, col:25> col:16 referenced field1 'unsigned int'
+  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:25> 'int'
+  // CHECK:-value: Int 3
+  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:25> 'int' 3
+  unsigned int field1 : 3; // 3 bits for field1
+
+  // CHECK:FieldDecl 0x{{[0-9a-f]+}} <line:15:3, col:25> col:16 referenced field2 'unsigned int'
+  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:25> 'int'
+  // CHECK:-value: Int 4
+  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:25> 'int' 4
+  unsigned int field2 : 4; // 4 bits for field2
+  
+  // CHECK:FieldDecl 0x{{[0-9a-f]+}} <line:21:3, col:16> col:7 field3 'int'
+  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:16> 'int'
+  // CHECK:-value: Int 5
+  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:16> 'int' 5
+  int field3 : 5;          // 5 bits for field3 (signed)
+};
+
+
+
+[numthreads(1,1,1)]
+void main() {
+  MyBitFields m;
+  m.field1 = 4;
+  m.field2 = m.field1*2;
 }
 \ No newline at end of file
diff --git a/clang/test/ParserHLSL/hlsl_annotations_on_struct_members.hlsl b/clang/test/ParserHLSL/hlsl_annotations_on_struct_members.hlsl
index 5b228d039345..2eebc920388b 100644
--- a/clang/test/ParserHLSL/hlsl_annotations_on_struct_members.hlsl
+++ b/clang/test/ParserHLSL/hlsl_annotations_on_struct_members.hlsl
@@ -1,21 +1,21 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
-
-// tests that hlsl annotations are properly parsed when applied on field decls,
-// and that the annotation gets properly placed on the AST.
-
-struct Eg9{
-  // CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:1, col:8> col:8 implicit struct Eg9
-  // CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:10:3, col:16> col:16 referenced a 'unsigned int'
-  // CHECK: -HLSLSV_DispatchThreadIDAttr 0x{{[0-9a-f]+}} <col:20>
-  unsigned int a : SV_DispatchThreadID;
-};
-Eg9 e9;
-
-
-RWBuffer<int> In : register(u1);
-
-
-[numthreads(1,1,1)]
-void main() {
-  In[0] = e9.a;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
+
+// tests that hlsl annotations are properly parsed when applied on field decls,
+// and that the annotation gets properly placed on the AST.
+
+struct Eg9{
+  // CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:1, col:8> col:8 implicit struct Eg9
+  // CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:10:3, col:16> col:16 referenced a 'unsigned int'
+  // CHECK: -HLSLSV_DispatchThreadIDAttr 0x{{[0-9a-f]+}} <col:20>
+  unsigned int a : SV_DispatchThreadID;
+};
+Eg9 e9;
+
+
+RWBuffer<int> In : register(u1);
+
+
+[numthreads(1,1,1)]
+void main() {
+  In[0] = e9.a;
+}
diff --git a/clang/test/ParserHLSL/hlsl_contained_type_attr.hlsl b/clang/test/ParserHLSL/hlsl_contained_type_attr.hlsl
index 476ec39e14da..5a72aa242e58 100644
--- a/clang/test/ParserHLSL/hlsl_contained_type_attr.hlsl
+++ b/clang/test/ParserHLSL/hlsl_contained_type_attr.hlsl
@@ -1,25 +1,25 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -x hlsl -ast-dump -o - %s | FileCheck %s
-
-typedef vector<float, 4> float4;
-
-// CHECK: -TypeAliasDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:1, col:83>
-// CHECK: -HLSLAttributedResourceType 0x{{[0-9a-f]+}} '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::contained_type(int)]]
-using ResourceIntAliasT = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(int)]];
-ResourceIntAliasT h1;
-
-// CHECK: -VarDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:1, col:82> col:82 h2 '__hlsl_resource_t 
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::contained_type(float4)]]
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float4)]] h2;
-
-// CHECK: ClassTemplateDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 6]]:1, line:[[# @LINE + 8]]:1> line:[[# @LINE + 6]]:30 S
-// CHECK: TemplateTypeParmDecl 0x{{[0-9a-f]+}} <col:11, col:20> col:20 referenced typename depth 0 index 0 T
-// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:23, line:[[# @LINE + 6]]:1> line:[[# @LINE + 4]]:30 struct S definition
-// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:3, col:79> col:79 h '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::contained_type(T)]]
-template <typename T> struct S {
-  __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(T)]] h;
-};
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -x hlsl -ast-dump -o - %s | FileCheck %s
+
+typedef vector<float, 4> float4;
+
+// CHECK: -TypeAliasDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:1, col:83>
+// CHECK: -HLSLAttributedResourceType 0x{{[0-9a-f]+}} '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(int)]]
+using ResourceIntAliasT = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(int)]];
+ResourceIntAliasT h1;
+
+// CHECK: -VarDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:1, col:82> col:82 h2 '__hlsl_resource_t 
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(float4)]]
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float4)]] h2;
+
+// CHECK: ClassTemplateDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 6]]:1, line:[[# @LINE + 8]]:1> line:[[# @LINE + 6]]:30 S
+// CHECK: TemplateTypeParmDecl 0x{{[0-9a-f]+}} <col:11, col:20> col:20 referenced typename depth 0 index 0 T
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:23, line:[[# @LINE + 6]]:1> line:[[# @LINE + 4]]:30 struct S definition
+// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:3, col:79> col:79 h '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(T)]]
+template <typename T> struct S {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(T)]] h;
+};
diff --git a/clang/test/ParserHLSL/hlsl_contained_type_attr_error.hlsl b/clang/test/ParserHLSL/hlsl_contained_type_attr_error.hlsl
index 673ff8693b83..b2d492d95945 100644
--- a/clang/test/ParserHLSL/hlsl_contained_type_attr_error.hlsl
+++ b/clang/test/ParserHLSL/hlsl_contained_type_attr_error.hlsl
@@ -1,28 +1,28 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -x hlsl -o - %s -verify
-
-typedef vector<float, 4> float4;
-
-// expected-error@+1{{'contained_type' attribute cannot be applied to a declaration}}
-[[hlsl::contained_type(float4)]] __hlsl_resource_t h1;
-
-// expected-error@+1{{'contained_type' attribute takes one argument}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type()]] h3;
-
-// expected-error@+1{{expected a type}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(0)]] h4;
-
-// expected-error@+1{{unknown type name 'a'}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(a)]] h5;
-
-// expected-error@+1{{expected a type}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type("b", c)]] h6;
-
-// expected-warning@+1{{attribute 'contained_type' is already applied}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]] [[hlsl::contained_type(float)]] h7;
-
-// expected-warning@+1{{attribute 'contained_type' is already applied with different arguments}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]] [[hlsl::contained_type(int)]] h8;
-
-// expected-error@+2{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}}
-// expected-error@+1{{attribute 'contained_type' can be used only on HLSL intangible type '__hlsl_resource_t'}}
-float [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]] res5;
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -x hlsl -o - %s -verify
+
+typedef vector<float, 4> float4;
+
+// expected-error@+1{{'contained_type' attribute cannot be applied to a declaration}}
+[[hlsl::contained_type(float4)]] __hlsl_resource_t h1;
+
+// expected-error@+1{{'contained_type' attribute takes one argument}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type()]] h3;
+
+// expected-error@+1{{expected a type}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(0)]] h4;
+
+// expected-error@+1{{unknown type name 'a'}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(a)]] h5;
+
+// expected-error@+1{{expected a type}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type("b", c)]] h6;
+
+// expected-warning@+1{{attribute 'contained_type' is already applied}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]] [[hlsl::contained_type(float)]] h7;
+
+// expected-warning@+1{{attribute 'contained_type' is already applied with different arguments}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]] [[hlsl::contained_type(int)]] h8;
+
+// expected-error@+2{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}}
+// expected-error@+1{{attribute 'contained_type' can be used only on HLSL intangible type '__hlsl_resource_t'}}
+float [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]] res5;
diff --git a/clang/test/ParserHLSL/hlsl_is_rov_attr.hlsl b/clang/test/ParserHLSL/hlsl_is_rov_attr.hlsl
index 487dc3241303..836d129c8d00 100644
--- a/clang/test/ParserHLSL/hlsl_is_rov_attr.hlsl
+++ b/clang/test/ParserHLSL/hlsl_is_rov_attr.hlsl
@@ -1,22 +1,22 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
-
-// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} {{.*}} struct MyBuffer definition
-// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:3, col:68> col:68 h '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::is_rov]]
-struct MyBuffer {
-  __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov]] h;
-};
-
-// CHECK: VarDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:1, col:66> col:66 res '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::is_rov]]
-__hlsl_resource_t [[hlsl::is_rov]] [[hlsl::resource_class(SRV)]] res;
-
-// CHECK: FunctionDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:1, line:[[# @LINE + 6]]:1> line:[[# @LINE + 4]]:6 f 'void ()
-// CHECK: VarDecl 0x{{[0-9a-f]+}} <col:3, col:72> col:72 r '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
-// CHECK-SAME{LITERAL}: [[hlsl::is_rov]]
-void f() {
-  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] [[hlsl::is_rov]] r;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
+
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} {{.*}} struct MyBuffer definition
+// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:3, col:68> col:68 h '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::is_rov]]
+struct MyBuffer {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov]] h;
+};
+
+// CHECK: VarDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:1, col:66> col:66 res '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::is_rov]]
+__hlsl_resource_t [[hlsl::is_rov]] [[hlsl::resource_class(SRV)]] res;
+
+// CHECK: FunctionDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:1, line:[[# @LINE + 6]]:1> line:[[# @LINE + 4]]:6 f 'void ()
+// CHECK: VarDecl 0x{{[0-9a-f]+}} <col:3, col:72> col:72 r '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+// CHECK-SAME{LITERAL}: [[hlsl::is_rov]]
+void f() {
+  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] [[hlsl::is_rov]] r;
+}
diff --git a/clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl b/clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl
index 9bb64ea990e2..3b2c12e7a96c 100644
--- a/clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl
+++ b/clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl
@@ -1,20 +1,20 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -o - %s -verify
-
-// expected-error@+1{{'is_rov' attribute cannot be applied to a declaration}}
-[[hlsl::is_rov]] __hlsl_resource_t res0;
-
-// expected-error@+1{{HLSL resource needs to have [[hlsl::resource_class()]] attribute}}
-__hlsl_resource_t [[hlsl::is_rov]] res1;
-
-// expected-error@+1{{'is_rov' attribute takes no arguments}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov(3)]] res2;
-  
-// expected-error@+1{{use of undeclared identifier 'gibberish'}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov(gibberish)]] res3;
-
-// expected-warning@+1{{attribute 'is_rov' is already applied}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov]] [[hlsl::is_rov]] res4;
-
-// expected-error@+2{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}}
-// expected-error@+1{{attribute 'is_rov' can be used only on HLSL intangible type '__hlsl_resource_t'}}
-float [[hlsl::resource_class(UAV)]] [[hlsl::is_rov]] res5;
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -o - %s -verify
+
+// expected-error@+1{{'is_rov' attribute cannot be applied to a declaration}}
+[[hlsl::is_rov]] __hlsl_resource_t res0;
+
+// expected-error@+1{{HLSL resource needs to have [[hlsl::resource_class()]] attribute}}
+__hlsl_resource_t [[hlsl::is_rov]] res1;
+
+// expected-error@+1{{'is_rov' attribute takes no arguments}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov(3)]] res2;
+  
+// expected-error@+1{{use of undeclared identifier 'gibberish'}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov(gibberish)]] res3;
+
+// expected-warning@+1{{attribute 'is_rov' is already applied}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov]] [[hlsl::is_rov]] res4;
+
+// expected-error@+2{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}}
+// expected-error@+1{{attribute 'is_rov' can be used only on HLSL intangible type '__hlsl_resource_t'}}
+float [[hlsl::resource_class(UAV)]] [[hlsl::is_rov]] res5;
diff --git a/clang/test/ParserHLSL/hlsl_raw_buffer_attr.hlsl b/clang/test/ParserHLSL/hlsl_raw_buffer_attr.hlsl
index e09ed5586c10..84c924eec24e 100644
--- a/clang/test/ParserHLSL/hlsl_raw_buffer_attr.hlsl
+++ b/clang/test/ParserHLSL/hlsl_raw_buffer_attr.hlsl
@@ -1,22 +1,22 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
-
-// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} {{.*}} struct MyBuffer definition
-// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:3, col:72> col:72 h1 '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
-struct MyBuffer {
-  __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] h1;
-};
-
-// CHECK: VarDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:1, col:70> col:70 h2 '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
-__hlsl_resource_t [[hlsl::raw_buffer]] [[hlsl::resource_class(SRV)]] h2;
-
-// CHECK: FunctionDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:1, line:[[# @LINE + 6]]:1> line:[[# @LINE + 4]]:6 f 'void ()
-// CHECK: VarDecl 0x{{[0-9a-f]+}} <col:3, col:72> col:72 h3 '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
-void f() {
-  __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] h3;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
+
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} {{.*}} struct MyBuffer definition
+// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:3, col:72> col:72 h1 '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
+struct MyBuffer {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] h1;
+};
+
+// CHECK: VarDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:1, col:70> col:70 h2 '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
+__hlsl_resource_t [[hlsl::raw_buffer]] [[hlsl::resource_class(SRV)]] h2;
+
+// CHECK: FunctionDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 4]]:1, line:[[# @LINE + 6]]:1> line:[[# @LINE + 4]]:6 f 'void ()
+// CHECK: VarDecl 0x{{[0-9a-f]+}} <col:3, col:72> col:72 h3 '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
+void f() {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] h3;
+}
diff --git a/clang/test/ParserHLSL/hlsl_raw_buffer_attr_error.hlsl b/clang/test/ParserHLSL/hlsl_raw_buffer_attr_error.hlsl
index a10aca4e96fc..77530cbf9e4d 100644
--- a/clang/test/ParserHLSL/hlsl_raw_buffer_attr_error.hlsl
+++ b/clang/test/ParserHLSL/hlsl_raw_buffer_attr_error.hlsl
@@ -1,17 +1,17 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -o - %s -verify
-
-// expected-error@+1{{'raw_buffer' attribute cannot be applied to a declaration}}
-[[hlsl::raw_buffer]] __hlsl_resource_t res0;
-
-// expected-error@+1{{'raw_buffer' attribute takes no arguments}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer(3)]] res2;
-  
-// expected-error@+1{{use of undeclared identifier 'gibberish'}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer(gibberish)]] res3;
-
-// expected-warning@+1{{attribute 'raw_buffer' is already applied}}
-__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] [[hlsl::raw_buffer]] res4;
-
-// expected-error@+2{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}}
-// expected-error@+1{{attribute 'raw_buffer' can be used only on HLSL intangible type '__hlsl_resource_t'}}
-float [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] res5;
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -o - %s -verify
+
+// expected-error@+1{{'raw_buffer' attribute cannot be applied to a declaration}}
+[[hlsl::raw_buffer]] __hlsl_resource_t res0;
+
+// expected-error@+1{{'raw_buffer' attribute takes no arguments}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer(3)]] res2;
+  
+// expected-error@+1{{use of undeclared identifier 'gibberish'}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer(gibberish)]] res3;
+
+// expected-warning@+1{{attribute 'raw_buffer' is already applied}}
+__hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] [[hlsl::raw_buffer]] res4;
+
+// expected-error@+2{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}}
+// expected-error@+1{{attribute 'raw_buffer' can be used only on HLSL intangible type '__hlsl_resource_t'}}
+float [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] res5;
diff --git a/clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl b/clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl
index 9fee9edddf61..fbada8b4b99f 100644
--- a/clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl
+++ b/clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl
@@ -1,37 +1,37 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
-
-// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} {{.*}} struct MyBuffer definition
-// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:3, col:51> col:51 h '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-struct MyBuffer {
-  __hlsl_resource_t [[hlsl::resource_class(UAV)]] h;
-};
-
-// CHECK: VarDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 2]]:1, col:49> col:49 res '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
-__hlsl_resource_t [[hlsl::resource_class(SRV)]] res;
-
-// CHECK: FunctionDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:1, line:[[# @LINE + 5]]:1> line:[[# @LINE + 3]]:6 f 'void ()
-// CHECK: VarDecl 0x{{[0-9a-f]+}} <col:3, col:55> col:55 r '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
-void f() {
-  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] r;
-}
-
-// CHECK: ClassTemplateDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 6]]:1, line:[[# @LINE + 8]]:1> line:[[# @LINE + 6]]:29 MyBuffer2
-// CHECK: TemplateTypeParmDecl 0x{{[0-9a-f]+}} <col:10, col:19> col:19 typename depth 0 index 0 T
-// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:22, line:[[# @LINE + 6]]:1> line:[[# @LINE + 4]]:29 struct MyBuffer2 definition
-// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:22, col:29> col:29 implicit struct MyBuffer2
-// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:3, col:51> col:51 h '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-template<typename T> struct MyBuffer2 {
-  __hlsl_resource_t [[hlsl::resource_class(UAV)]] h;
-};
-
-// CHECK: ClassTemplateSpecializationDecl 0x{{[0-9a-f]+}} <line:[[# @LINE - 4]]:1, line:[[# @LINE - 2]]:1> line:[[# @LINE - 4]]:29 struct MyBuffer2 definition implicit_instantiation
-// CHECK: TemplateArgument type 'float'
-// CHECK: BuiltinType 0x{{[0-9a-f]+}} 'float'
-// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:22, col:29> col:29 implicit struct MyBuffer2
-// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE - 7]]:3, col:51> col:51 h '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-MyBuffer2<float> myBuffer2;
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
+
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} {{.*}} struct MyBuffer definition
+// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:3, col:51> col:51 h '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+struct MyBuffer {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] h;
+};
+
+// CHECK: VarDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 2]]:1, col:49> col:49 res '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
+__hlsl_resource_t [[hlsl::resource_class(SRV)]] res;
+
+// CHECK: FunctionDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:1, line:[[# @LINE + 5]]:1> line:[[# @LINE + 3]]:6 f 'void ()
+// CHECK: VarDecl 0x{{[0-9a-f]+}} <col:3, col:55> col:55 r '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(Sampler)]]
+void f() {
+  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] r;
+}
+
+// CHECK: ClassTemplateDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 6]]:1, line:[[# @LINE + 8]]:1> line:[[# @LINE + 6]]:29 MyBuffer2
+// CHECK: TemplateTypeParmDecl 0x{{[0-9a-f]+}} <col:10, col:19> col:19 typename depth 0 index 0 T
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:22, line:[[# @LINE + 6]]:1> line:[[# @LINE + 4]]:29 struct MyBuffer2 definition
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:22, col:29> col:29 implicit struct MyBuffer2
+// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE + 3]]:3, col:51> col:51 h '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+template<typename T> struct MyBuffer2 {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] h;
+};
+
+// CHECK: ClassTemplateSpecializationDecl 0x{{[0-9a-f]+}} <line:[[# @LINE - 4]]:1, line:[[# @LINE - 2]]:1> line:[[# @LINE - 4]]:29 struct MyBuffer2 definition implicit_instantiation
+// CHECK: TemplateArgument type 'float'
+// CHECK: BuiltinType 0x{{[0-9a-f]+}} 'float'
+// CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:22, col:29> col:29 implicit struct MyBuffer2
+// CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:[[# @LINE - 7]]:3, col:51> col:51 h '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+MyBuffer2<float> myBuffer2;
diff --git a/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl b/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl
index a0a4da1dc2bf..63e39daff949 100644
--- a/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl
+++ b/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl
@@ -1,22 +1,22 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -o - %s -verify
-
-// expected-error@+1{{'resource_class' attribute cannot be applied to a declaration}}
-[[hlsl::resource_class(UAV)]] __hlsl_resource_t e0;
-
-// expected-error@+1{{'resource_class' attribute takes one argument}}
-__hlsl_resource_t [[hlsl::resource_class()]] e1;
-
-// expected-warning@+1{{ResourceClass attribute argument not supported: gibberish}}
-__hlsl_resource_t [[hlsl::resource_class(gibberish)]] e2;
-
-// expected-warning@+1{{attribute 'resource_class' is already applied with different arguments}}
-__hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::resource_class(UAV)]] e3;
-
-// expected-warning@+1{{attribute 'resource_class' is already applied}}
-__hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::resource_class(SRV)]] e4;
-
-// expected-error@+1{{'resource_class' attribute takes one argument}}
-__hlsl_resource_t [[hlsl::resource_class(SRV, "aa")]] e5;
-
-// expected-error@+1{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}}
-float [[hlsl::resource_class(UAV)]] e6;
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -o - %s -verify
+
+// expected-error@+1{{'resource_class' attribute cannot be applied to a declaration}}
+[[hlsl::resource_class(UAV)]] __hlsl_resource_t e0;
+
+// expected-error@+1{{'resource_class' attribute takes one argument}}
+__hlsl_resource_t [[hlsl::resource_class()]] e1;
+
+// expected-warning@+1{{ResourceClass attribute argument not supported: gibberish}}
+__hlsl_resource_t [[hlsl::resource_class(gibberish)]] e2;
+
+// expected-warning@+1{{attribute 'resource_class' is already applied with different arguments}}
+__hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::resource_class(UAV)]] e3;
+
+// expected-warning@+1{{attribute 'resource_class' is already applied}}
+__hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::resource_class(SRV)]] e4;
+
+// expected-error@+1{{'resource_class' attribute takes one argument}}
+__hlsl_resource_t [[hlsl::resource_class(SRV, "aa")]] e5;
+
+// expected-error@+1{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}}
+float [[hlsl::resource_class(UAV)]] e6;
diff --git a/clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl b/clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl
index 8885e3923735..38d27bc21e4a 100644
--- a/clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl
+++ b/clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl
@@ -1,21 +1,21 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -ast-dump -o - %s | FileCheck %s
-
-// CHECK: -ClassTemplateSpecializationDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> class RWBuffer definition implicit_instantiation
-// CHECK: -TemplateArgument type 'float'
-// CHECK: `-BuiltinType 0x{{[0-9a-f]+}} 'float'
-// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
-// CHECK-SAME{LITERAL}: [[hlsl::contained_type(float)]]
-// CHECK: -HLSLResourceAttr 0x{{[0-9a-f]+}} <<invalid sloc>> Implicit TypedBuffer
-RWBuffer<float> Buffer1;
-
-// CHECK: -ClassTemplateSpecializationDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> class RasterizerOrderedBuffer definition implicit_instantiation
-// CHECK: -TemplateArgument type 'vector<float, 4>'
-// CHECK: `-ExtVectorType 0x{{[0-9a-f]+}} 'vector<float, 4>' 4
-// CHECK: `-BuiltinType 0x{{[0-9a-f]+}} 'float'
-// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
-// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]
-// CHECK-SAME{LITERAL}: [[hlsl::is_rov]]
-// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<float, 4>)]]
-// CHECK: -HLSLResourceAttr 0x{{[0-9a-f]+}} <<invalid sloc>> Implicit TypedBuffer
-RasterizerOrderedBuffer<vector<float, 4> > BufferArray3[4];
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -ast-dump -o - %s | FileCheck %s
+
+// CHECK: -ClassTemplateSpecializationDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> class RWBuffer definition implicit_instantiation
+// CHECK: -TemplateArgument type 'float'
+// CHECK: `-BuiltinType 0x{{[0-9a-f]+}} 'float'
+// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(float)]]
+// CHECK: -HLSLResourceAttr 0x{{[0-9a-f]+}} <<invalid sloc>> Implicit TypedBuffer
+RWBuffer<float> Buffer1;
+
+// CHECK: -ClassTemplateSpecializationDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> class RasterizerOrderedBuffer definition implicit_instantiation
+// CHECK: -TemplateArgument type 'vector<float, 4>'
+// CHECK: `-ExtVectorType 0x{{[0-9a-f]+}} 'vector<float, 4>' 4
+// CHECK: `-BuiltinType 0x{{[0-9a-f]+}} 'float'
+// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]
+// CHECK-SAME{LITERAL}: [[hlsl::is_rov]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector<float, 4>)]]
+// CHECK: -HLSLResourceAttr 0x{{[0-9a-f]+}} <<invalid sloc>> Implicit TypedBuffer
+RasterizerOrderedBuffer<vector<float, 4> > BufferArray3[4];
diff --git a/clang/test/Sema/aarch64-sve-vector-trig-ops.c b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
index f853abcd3379..3fe6834be2e0 100644
--- a/clang/test/Sema/aarch64-sve-vector-trig-ops.c
+++ b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
@@ -1,65 +1,65 @@
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve \
-// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
-// REQUIRES: aarch64-registered-target
-
-#include <arm_sve.h>
-
-svfloat32_t test_asin_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_asin(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_acos_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_acos(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_atan_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_atan(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_atan2_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_atan2(v, v);
-  // expected-error@-1 {{1st argument must be a floating point type}}
-}
-
-svfloat32_t test_sin_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_sin(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_cos_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_cos(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_tan_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_tan(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_sinh_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_sinh(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_cosh_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_cosh(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_tanh_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_tanh(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve \
+// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+svfloat32_t test_asin_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_asin(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_acos_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_acos(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_atan_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_atan(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_atan2_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_atan2(v, v);
+  // expected-error@-1 {{1st argument must be a floating point type}}
+}
+
+svfloat32_t test_sin_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_sin(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_cos_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_cos(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_tan_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_tan(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_sinh_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_sinh(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_cosh_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_cosh(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_tanh_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_tanh(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
diff --git a/clang/test/Sema/riscv-rvv-vector-trig-ops.c b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
index 006c136f8033..0aed1b2a0998 100644
--- a/clang/test/Sema/riscv-rvv-vector-trig-ops.c
+++ b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
@@ -1,67 +1,67 @@
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +v -target-feature +zfh -target-feature +zvfh \
-// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
-// REQUIRES: riscv-registered-target
-
-#include <riscv_vector.h>
-
-vfloat32mf2_t test_asin_vv_i8mf8(vfloat32mf2_t v) {
-
-    return __builtin_elementwise_asin(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-  
-  vfloat32mf2_t test_acos_vv_i8mf8(vfloat32mf2_t v) {
-  
-    return __builtin_elementwise_acos(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-  
-  vfloat32mf2_t test_atan_vv_i8mf8(vfloat32mf2_t v) {
-  
-    return __builtin_elementwise_atan(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-
-vfloat32mf2_t test_atan2_vv_i8mf8(vfloat32mf2_t v) {
-
-  return __builtin_elementwise_atan2(v, v);
-  // expected-error@-1 {{1st argument must be a floating point type}}
-}
-
-vfloat32mf2_t test_sin_vv_i8mf8(vfloat32mf2_t v) {
-
-  return __builtin_elementwise_sin(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-vfloat32mf2_t test_cos_vv_i8mf8(vfloat32mf2_t v) {
-
-  return __builtin_elementwise_cos(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-vfloat32mf2_t test_tan_vv_i8mf8(vfloat32mf2_t v) {
-
-  return __builtin_elementwise_tan(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-vfloat32mf2_t test_sinh_vv_i8mf8(vfloat32mf2_t v) {
-
-    return __builtin_elementwise_sinh(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-  
-  vfloat32mf2_t test_cosh_vv_i8mf8(vfloat32mf2_t v) {
-  
-    return __builtin_elementwise_cosh(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-  
-  vfloat32mf2_t test_tanh_vv_i8mf8(vfloat32mf2_t v) {
-  
-    return __builtin_elementwise_tanh(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-  
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
+// RUN:   -target-feature +v -target-feature +zfh -target-feature +zvfh \
+// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
+// REQUIRES: riscv-registered-target
+
+#include <riscv_vector.h>
+
+vfloat32mf2_t test_asin_vv_i8mf8(vfloat32mf2_t v) {
+
+    return __builtin_elementwise_asin(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+  
+  vfloat32mf2_t test_acos_vv_i8mf8(vfloat32mf2_t v) {
+  
+    return __builtin_elementwise_acos(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+  
+  vfloat32mf2_t test_atan_vv_i8mf8(vfloat32mf2_t v) {
+  
+    return __builtin_elementwise_atan(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+
+vfloat32mf2_t test_atan2_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_atan2(v, v);
+  // expected-error@-1 {{1st argument must be a floating point type}}
+}
+
+vfloat32mf2_t test_sin_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_sin(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+vfloat32mf2_t test_cos_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_cos(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+vfloat32mf2_t test_tan_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_tan(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+vfloat32mf2_t test_sinh_vv_i8mf8(vfloat32mf2_t v) {
+
+    return __builtin_elementwise_sinh(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+  
+  vfloat32mf2_t test_cosh_vv_i8mf8(vfloat32mf2_t v) {
+  
+    return __builtin_elementwise_cosh(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+  
+  vfloat32mf2_t test_tanh_vv_i8mf8(vfloat32mf2_t v) {
+  
+    return __builtin_elementwise_tanh(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+  
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-default-compute.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-default-compute.hlsl
index b60fba62bdb0..764b9e843f7f 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-default-compute.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-default-compute.hlsl
@@ -1,119 +1,119 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
-// RUN: -fsyntax-only -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-float also_alive(float f) {
-  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-  return 0;
-}
-
-float alive(float f) {
-  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-  // expected-error@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return 0;
-}
-
-float dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x);
-}
-
-float test(float x) {
-  return aliveTemp2(x);
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-    return 0;
-  }
-};
-
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);
-  float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  return a * b * c;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
+// RUN: -fsyntax-only -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+float also_alive(float f) {
+  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+  return 0;
+}
+
+float alive(float f) {
+  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+  // expected-error@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return 0;
+}
+
+float dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x);
+}
+
+float test(float x) {
+  return aliveTemp2(x);
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+    return 0;
+  }
+};
+
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);
+  float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  return a * b * c;
+}
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-default-lib.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-default-lib.hlsl
index 35b7c384f26c..6bfc8577670c 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-default-lib.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-default-lib.hlsl
@@ -1,180 +1,180 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
-// RUN: -fsyntax-only -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-float also_alive(float f) {
-  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  
-  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-
-  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-
-  return 0;
-}
-
-float alive(float f) {
-  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-
-  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-
-  // expected-error@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return 0;
-}
-
-float dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x);
-}
-
-float test(float x) {
-  return aliveTemp2(x);
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-    return 0;
-  }
-};
-
-// Exported function without body, not used
-export void exportedFunctionUnused(float f);
-
-// Exported function with body, without export, not used
-void exportedFunctionUnused(float f) {
-  // expected-error@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUnused_fx_call
-
-  // API with shader-stage-specific availability in unused exported library function
-  // - no errors expected because the actual shader stage this function
-  // will be used in not known at this time
-  float B = fy(f);
-  float C = fz(f);
-}
-
-// Exported function with body - called from main() which is a compute shader entry point
-export void exportedFunctionUsed(float f) {
-  // expected-error@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUsed_fx_call
-
-  // expected-error@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #exportedFunctionUsed_fy_call
-
-  // expected-error@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #exportedFunctionUsed_fz_call
-}
-
-namespace A {
-  namespace B {
-    export {
-      void exportedFunctionInNS(float x) {
-        // expected-error@#exportedFunctionInNS_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-        // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-        float A = fx(x); // #exportedFunctionInNS_fx_call
-
-        // API with shader-stage-specific availability in exported library function
-        // - no errors expected because the actual shader stage this function
-        // will be used in not known at this time
-        float B = fy(x);
-        float C = fz(x);
-      }
-    }
-  }
-}
-
-// Shader entry point without body
-[shader("compute")]
-[numthreads(4,1,1)]
-float main();
-
-// Shader entry point with body
-[shader("compute")]
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);
-  float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  exportedFunctionUsed(1.0f);
-  return a * b * c;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
+// RUN: -fsyntax-only -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+float also_alive(float f) {
+  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  
+  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+
+  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+
+  return 0;
+}
+
+float alive(float f) {
+  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+
+  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+
+  // expected-error@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return 0;
+}
+
+float dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x);
+}
+
+float test(float x) {
+  return aliveTemp2(x);
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+    return 0;
+  }
+};
+
+// Exported function without body, not used
+export void exportedFunctionUnused(float f);
+
+// Exported function with body, without export, not used
+void exportedFunctionUnused(float f) {
+  // expected-error@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUnused_fx_call
+
+  // API with shader-stage-specific availability in unused exported library function
+  // - no errors expected because the actual shader stage this function
+  // will be used in not known at this time
+  float B = fy(f);
+  float C = fz(f);
+}
+
+// Exported function with body - called from main() which is a compute shader entry point
+export void exportedFunctionUsed(float f) {
+  // expected-error@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUsed_fx_call
+
+  // expected-error@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #exportedFunctionUsed_fy_call
+
+  // expected-error@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #exportedFunctionUsed_fz_call
+}
+
+namespace A {
+  namespace B {
+    export {
+      void exportedFunctionInNS(float x) {
+        // expected-error@#exportedFunctionInNS_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+        // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+        float A = fx(x); // #exportedFunctionInNS_fx_call
+
+        // API with shader-stage-specific availability in exported library function
+        // - no errors expected because the actual shader stage this function
+        // will be used in not known at this time
+        float B = fy(x);
+        float C = fz(x);
+      }
+    }
+  }
+}
+
+// Shader entry point without body
+[shader("compute")]
+[numthreads(4,1,1)]
+float main();
+
+// Shader entry point with body
+[shader("compute")]
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);
+  float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  exportedFunctionUsed(1.0f);
+  return a * b * c;
+}
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-relaxed-compute.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-relaxed-compute.hlsl
index 406879838393..65836c55821d 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-relaxed-compute.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-relaxed-compute.hlsl
@@ -1,119 +1,119 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
-// RUN: -fsyntax-only -Wno-error=hlsl-availability -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-float also_alive(float f) {
-  // expected-warning@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  // expected-warning@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-  // expected-warning@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-  return 0;
-}
-
-float alive(float f) {
-  // expected-warning@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-  // expected-warning@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-  // expected-warning@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return 0;
-}
-
-float dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-warning@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-warning@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-warning@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x);
-}
-
-float test(float x) {
-  return aliveTemp2(x);
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-warning@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-warning@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-warning@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-    return 0;
-  }
-};
-
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);
-  float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  return a * b * c;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
+// RUN: -fsyntax-only -Wno-error=hlsl-availability -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+float also_alive(float f) {
+  // expected-warning@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  // expected-warning@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+  // expected-warning@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+  return 0;
+}
+
+float alive(float f) {
+  // expected-warning@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+  // expected-warning@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+  // expected-warning@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return 0;
+}
+
+float dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-warning@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-warning@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-warning@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x);
+}
+
+float test(float x) {
+  return aliveTemp2(x);
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-warning@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-warning@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-warning@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+    return 0;
+  }
+};
+
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);
+  float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  return a * b * c;
+}
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-relaxed-lib.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-relaxed-lib.hlsl
index a23e91a546b1..4c9783138f67 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-relaxed-lib.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-relaxed-lib.hlsl
@@ -1,162 +1,162 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
-// RUN: -fsyntax-only -Wno-error=hlsl-availability -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-float also_alive(float f) {
-  // expected-warning@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  
-  // expected-warning@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-
-  // expected-warning@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-
-  return 0;
-}
-
-float alive(float f) {
-  // expected-warning@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-
-  // expected-warning@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-
-  // expected-warning@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return 0;
-}
-
-float dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-warning@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-warning@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-warning@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x);
-}
-
-float test(float x) {
-  return aliveTemp2(x);
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-warning@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-warning@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-warning@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-    return 0;
-  }
-};
-
-// Exported function without body, not used
-export void exportedFunctionUnused(float f);
-
-// Exported function with body, without export, not used
-void exportedFunctionUnused(float f) {
-  // expected-warning@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUnused_fx_call
-
-  // API with shader-stage-specific availability in unused exported library function
-  // - no errors expected because the actual shader stage this function
-  // will be used in not known at this time
-  float B = fy(f);
-  float C = fz(f);
-}
-
-// Exported function with body - called from main() which is a compute shader entry point
-export void exportedFunctionUsed(float f) {
-  // expected-warning@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUsed_fx_call
-
-  // expected-warning@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #exportedFunctionUsed_fy_call
-
-  // expected-warning@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #exportedFunctionUsed_fz_call
-}
-
-// Shader entry point without body
-[shader("compute")]
-[numthreads(4,1,1)]
-float main();
-
-// Shader entry point with body
-[shader("compute")]
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);
-  float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  exportedFunctionUsed(1.0f);
-  return a * b * c;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
+// RUN: -fsyntax-only -Wno-error=hlsl-availability -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+float also_alive(float f) {
+  // expected-warning@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  
+  // expected-warning@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+
+  // expected-warning@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+
+  return 0;
+}
+
+float alive(float f) {
+  // expected-warning@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+
+  // expected-warning@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+
+  // expected-warning@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return 0;
+}
+
+float dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-warning@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-warning@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-warning@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x);
+}
+
+float test(float x) {
+  return aliveTemp2(x);
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-warning@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-warning@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-warning@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+    return 0;
+  }
+};
+
+// Exported function without body, not used
+export void exportedFunctionUnused(float f);
+
+// Exported function with body, without export, not used
+void exportedFunctionUnused(float f) {
+  // expected-warning@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUnused_fx_call
+
+  // API with shader-stage-specific availability in unused exported library function
+  // - no errors expected because the actual shader stage this function
+  // will be used in not known at this time
+  float B = fy(f);
+  float C = fz(f);
+}
+
+// Exported function with body - called from main() which is a compute shader entry point
+export void exportedFunctionUsed(float f) {
+  // expected-warning@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUsed_fx_call
+
+  // expected-warning@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #exportedFunctionUsed_fy_call
+
+  // expected-warning@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #exportedFunctionUsed_fz_call
+}
+
+// Shader entry point without body
+[shader("compute")]
+[numthreads(4,1,1)]
+float main();
+
+// Shader entry point with body
+[shader("compute")]
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);
+  float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  exportedFunctionUsed(1.0f);
+  return a * b * c;
+}
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-strict-compute.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-strict-compute.hlsl
index a8783c10cbab..b67e10c9a901 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-strict-compute.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-strict-compute.hlsl
@@ -1,129 +1,129 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
-// RUN: -fhlsl-strict-availability -fsyntax-only -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-float also_alive(float f) {
-  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-  return 0;
-}
-
-float alive(float f) {
-  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-  // expected-error@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // expected-error@#also_dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_dead_fx_call
-  // expected-error@#also_dead_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_dead_fy_call
-  // expected-error@#also_dead_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_dead_fz_call
-  return 0;
-}
-
-float dead(float f) {
-  // expected-error@#dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #dead_fx_call
-  // expected-error@#dead_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #dead_fy_call
-  // expected-error@#dead_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #dead_fz_call
-
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#aliveTemp_inst {{in instantiation of function template specialization 'aliveTemp<float>' requested here}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<half>' requested here}}
-}
-
-float test(float x) {
-  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<float>' requested here}}
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-  }
-};
-
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);
-  float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  return a * b * c;
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
+// RUN: -fhlsl-strict-availability -fsyntax-only -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+float also_alive(float f) {
+  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+  return 0;
+}
+
+float alive(float f) {
+  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+  // expected-error@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // expected-error@#also_dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_dead_fx_call
+  // expected-error@#also_dead_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_dead_fy_call
+  // expected-error@#also_dead_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_dead_fz_call
+  return 0;
+}
+
+float dead(float f) {
+  // expected-error@#dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #dead_fx_call
+  // expected-error@#dead_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #dead_fy_call
+  // expected-error@#dead_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #dead_fz_call
+
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#aliveTemp_inst {{in instantiation of function template specialization 'aliveTemp<float>' requested here}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<half>' requested here}}
+}
+
+float test(float x) {
+  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<float>' requested here}}
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+  }
+};
+
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);
+  float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  return a * b * c;
 }
 \ No newline at end of file
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-strict-lib.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-strict-lib.hlsl
index 0fffbc96dac1..c7be5afbc2d2 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-strict-lib.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-strict-lib.hlsl
@@ -1,192 +1,192 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
-// RUN: -fhlsl-strict-availability -fsyntax-only -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-// FIXME: all diagnostics marked as FUTURE will come alive when HLSL default
-// diagnostic mode is implemented in a future PR which will verify calls in
-// all functions that are reachable from the shader library entry points
-
-float also_alive(float f) {
-  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  
-  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-
-  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-
-  return 0;
-}
-
-float alive(float f) {
-  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-
-  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-
-  // expected-error@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // expected-error@#also_dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_dead_fx_call
-  
-  // Call to environment-specific function from an unreachable function 
-  // in a shader library - no diagnostic expected.
-  float B = fy(f); // #also_dead_fy_call
-
-  // Call to environment-specific function from an unreachable function 
-  // in a shader library - no diagnostic expected.
-  float C = fz(f); // #also_dead_fz_call
-  return 0;
-}
-
-float dead(float f) {
-  // expected-error@#dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #dead_fx_call
-
-  // Call to environment-specific function from an unreachable function 
-  // in a shader library - no diagnostic expected.
-  float B = fy(f); // #dead_fy_call
-
-  // Call to environment-specific function from an unreachable function 
-  // in a shader library - no diagnostic expected.
-  float C = fz(f); // #dead_fz_call
-
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#aliveTemp_inst {{in instantiation of function template specialization 'aliveTemp<float>' requested here}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<half>' requested here}}
-}
-
-float test(float x) {
-  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<float>' requested here}}
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-  }
-};
-
-// Exported function without body, not used
-export void exportedFunctionUnused(float f);
-
-// Exported function with body, without export, not used
-void exportedFunctionUnused(float f) {
-  // expected-error@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUnused_fx_call
-
-  // API with shader-stage-specific availability in unused exported library function
-  // - no errors expected because the actual shader stage this function
-  // will be used in not known at this time
-  float B = fy(f);
-  float C = fz(f);
-}
-
-// Exported function with body - called from main() which is a compute shader entry point
-export void exportedFunctionUsed(float f) {
-  // expected-error@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUsed_fx_call
-
-  // expected-error@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #exportedFunctionUsed_fy_call
-
-  // expected-error@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #exportedFunctionUsed_fz_call
-}
-
-namespace A {
-  namespace B {
-    export {
-      void exportedFunctionInNS(float x) {
-        // expected-error@#exportedFunctionInNS_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-        // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-        float A = fx(x); // #exportedFunctionInNS_fx_call
-
-        // API with shader-stage-specific availability in exported library function
-        // - no errors expected because the actual shader stage this function
-        // will be used in not known at this time
-        float B = fy(x);
-        float C = fz(x);
-      }
-    }
-  }
-}
-
-[shader("compute")]
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  exportedFunctionUsed(1.0f);
-  return a * b * c;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
+// RUN: -fhlsl-strict-availability -fsyntax-only -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+// FIXME: all diagnostics marked as FUTURE will come alive when HLSL default
+// diagnostic mode is implemented in a future PR which will verify calls in
+// all functions that are reachable from the shader library entry points
+
+float also_alive(float f) {
+  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  
+  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+
+  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+
+  return 0;
+}
+
+float alive(float f) {
+  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+
+  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+
+  // expected-error@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // expected-error@#also_dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_dead_fx_call
+  
+  // Call to environment-specific function from an unreachable function 
+  // in a shader library - no diagnostic expected.
+  float B = fy(f); // #also_dead_fy_call
+
+  // Call to environment-specific function from an unreachable function 
+  // in a shader library - no diagnostic expected.
+  float C = fz(f); // #also_dead_fz_call
+  return 0;
+}
+
+float dead(float f) {
+  // expected-error@#dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #dead_fx_call
+
+  // Call to environment-specific function from an unreachable function 
+  // in a shader library - no diagnostic expected.
+  float B = fy(f); // #dead_fy_call
+
+  // Call to environment-specific function from an unreachable function 
+  // in a shader library - no diagnostic expected.
+  float C = fz(f); // #dead_fz_call
+
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#aliveTemp_inst {{in instantiation of function template specialization 'aliveTemp<float>' requested here}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<half>' requested here}}
+}
+
+float test(float x) {
+  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<float>' requested here}}
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+  }
+};
+
+// Exported function without body, not used
+export void exportedFunctionUnused(float f);
+
+// Exported function with body, without export, not used
+void exportedFunctionUnused(float f) {
+  // expected-error@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUnused_fx_call
+
+  // API with shader-stage-specific availability in unused exported library function
+  // - no errors expected because the actual shader stage this function
+  // will be used in not known at this time
+  float B = fy(f);
+  float C = fz(f);
+}
+
+// Exported function with body - called from main() which is a compute shader entry point
+export void exportedFunctionUsed(float f) {
+  // expected-error@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUsed_fx_call
+
+  // expected-error@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #exportedFunctionUsed_fy_call
+
+  // expected-error@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #exportedFunctionUsed_fz_call
+}
+
+namespace A {
+  namespace B {
+    export {
+      void exportedFunctionInNS(float x) {
+        // expected-error@#exportedFunctionInNS_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+        // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+        float A = fx(x); // #exportedFunctionInNS_fx_call
+
+        // API with shader-stage-specific availability in exported library function
+        // - no errors expected because the actual shader stage this function
+        // will be used in not known at this time
+        float B = fy(x);
+        float C = fz(x);
+      }
+    }
+  }
+}
+
+[shader("compute")]
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  exportedFunctionUsed(1.0f);
+  return a * b * c;
+}
diff --git a/clang/test/SemaHLSL/Availability/avail-lib-multiple-stages.hlsl b/clang/test/SemaHLSL/Availability/avail-lib-multiple-stages.hlsl
index bfefc9b116a6..b56ab8fe4526 100644
--- a/clang/test/SemaHLSL/Availability/avail-lib-multiple-stages.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-lib-multiple-stages.hlsl
@@ -1,57 +1,57 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
-// RUN: -fsyntax-only -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = compute)))
-float fz(float); // #fz
-
-
-void F(float f) {
-  // Make sure we only get this error once, even though this function is scanned twice - once
-  // in compute shader context and once in pixel shader context.
-  // expected-error@#fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #fx_call
-  
-  // expected-error@#fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #fy_call
-
-  // expected-error@#fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 5.0 in compute environment here, but the deployment target is Shader Model 6.0 pixel environment}}
-  float X = fz(f); // #fz_call
-}
-
-void deadCode(float f) {
-  // no diagnostics expected under default diagnostic mode
-  float A = fx(f);
-  float B = fy(f);
-  float X = fz(f);
-}
-
-// Pixel shader
-[shader("pixel")]
-void mainPixel() {
-  F(1.0);
-}
-
-// First Compute shader
-[shader("compute")]
-[numthreads(4,1,1)]
-void mainCompute1() {
-  F(2.0);
-}
-
-// Second compute shader to make sure we do not get duplicate messages if F is called
-// from multiple entry points.
-[shader("compute")]
-[numthreads(4,1,1)]
-void mainCompute2() {
-  F(3.0);
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
+// RUN: -fsyntax-only -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = compute)))
+float fz(float); // #fz
+
+
+void F(float f) {
+  // Make sure we only get this error once, even though this function is scanned twice - once
+  // in compute shader context and once in pixel shader context.
+  // expected-error@#fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #fx_call
+  
+  // expected-error@#fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #fy_call
+
+  // expected-error@#fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 5.0 in compute environment here, but the deployment target is Shader Model 6.0 pixel environment}}
+  float X = fz(f); // #fz_call
+}
+
+void deadCode(float f) {
+  // no diagnostics expected under default diagnostic mode
+  float A = fx(f);
+  float B = fy(f);
+  float X = fz(f);
+}
+
+// Pixel shader
+[shader("pixel")]
+void mainPixel() {
+  F(1.0);
+}
+
+// First Compute shader
+[shader("compute")]
+[numthreads(4,1,1)]
+void mainCompute1() {
+  F(2.0);
+}
+
+// Second compute shader to make sure we do not get duplicate messages if F is called
+// from multiple entry points.
+[shader("compute")]
+[numthreads(4,1,1)]
+void mainCompute2() {
+  F(3.0);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl b/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl
index 1ec56542113d..a472d5519dc5 100644
--- a/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl
@@ -1,19 +1,19 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -fsyntax-only -verify %s
-
-typedef vector<float, 3> float3;
-
-StructuredBuffer<float3> Buffer;
-
-// expected-error@+2 {{class template 'StructuredBuffer' requires template arguments}}
-// expected-note@*:* {{template declaration from hidden source: template <class element_type> class StructuredBuffer}}
-StructuredBuffer BufferErr1;
-
-// expected-error@+2 {{too few template arguments for class template 'StructuredBuffer'}}
-// expected-note@*:* {{template declaration from hidden source: template <class element_type> class StructuredBuffer}}
-StructuredBuffer<> BufferErr2;
-
-[numthreads(1,1,1)]
-void main() {
-  (void)Buffer.h; // expected-error {{'h' is a private member of 'hlsl::StructuredBuffer<vector<float, 3>>'}}
-  // expected-note@* {{implicitly declared private here}}
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -fsyntax-only -verify %s
+
+typedef vector<float, 3> float3;
+
+StructuredBuffer<float3> Buffer;
+
+// expected-error@+2 {{class template 'StructuredBuffer' requires template arguments}}
+// expected-note@*:* {{template declaration from hidden source: template <class element_type> class StructuredBuffer}}
+StructuredBuffer BufferErr1;
+
+// expected-error@+2 {{too few template arguments for class template 'StructuredBuffer'}}
+// expected-note@*:* {{template declaration from hidden source: template <class element_type> class StructuredBuffer}}
+StructuredBuffer<> BufferErr2;
+
+[numthreads(1,1,1)]
+void main() {
+  (void)Buffer.h; // expected-error {{'h' is a private member of 'hlsl::StructuredBuffer<vector<float, 3>>'}}
+  // expected-note@* {{implicitly declared private here}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/cross-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/cross-errors.hlsl
index 354e7abb8a31..423f5bac9471 100644
--- a/clang/test/SemaHLSL/BuiltIns/cross-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/cross-errors.hlsl
@@ -1,43 +1,43 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify
-
-void test_too_few_arg()
-{
-  return __builtin_hlsl_cross();
-  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
-}
-
-void test_too_many_arg(float3 p0)
-{
-  return __builtin_hlsl_cross(p0, p0, p0);
-  // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
-}
-
-bool builtin_bool_to_float_type_promotion(bool p1)
-{
-  return __builtin_hlsl_cross(p1, p1);
-  // expected-error@-1 {{passing 'bool' to parameter of incompatible type 'float'}}
-}
-
-bool builtin_cross_int_to_float_promotion(int p1)
-{
-  return __builtin_hlsl_cross(p1, p1);
-  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
-}
-
-bool2 builtin_cross_int2_to_float2_promotion(int2 p1)
-{
-  return __builtin_hlsl_cross(p1, p1);
-  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
-}
-
-float2 builtin_cross_float2(float2 p1, float2 p2)
-{
-  return __builtin_hlsl_cross(p1, p2);
-  // expected-error@-1 {{too many elements in vector operand (expected 3 elements, have 2)}}
-}
-
-float3  builtin_cross_float3_int3(float3 p1, int3 p2)
-{
-  return __builtin_hlsl_cross(p1, p2);
-  // expected-error@-1 {{all arguments to '__builtin_hlsl_cross' must have the same type}}
-}
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify
+
+void test_too_few_arg()
+{
+  return __builtin_hlsl_cross();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+void test_too_many_arg(float3 p0)
+{
+  return __builtin_hlsl_cross(p0, p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+bool builtin_bool_to_float_type_promotion(bool p1)
+{
+  return __builtin_hlsl_cross(p1, p1);
+  // expected-error@-1 {{passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+bool builtin_cross_int_to_float_promotion(int p1)
+{
+  return __builtin_hlsl_cross(p1, p1);
+  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+bool2 builtin_cross_int2_to_float2_promotion(int2 p1)
+{
+  return __builtin_hlsl_cross(p1, p1);
+  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
+
+float2 builtin_cross_float2(float2 p1, float2 p2)
+{
+  return __builtin_hlsl_cross(p1, p2);
+  // expected-error@-1 {{too many elements in vector operand (expected 3 elements, have 2)}}
+}
+
+float3  builtin_cross_float3_int3(float3 p1, int3 p2)
+{
+  return __builtin_hlsl_cross(p1, p2);
+  // expected-error@-1 {{all arguments to '__builtin_hlsl_cross' must have the same type}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl
index b876a8e84cb3..bfbd8b28257a 100644
--- a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors2.hlsl
@@ -1,13 +1,13 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_atan2
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_fmod
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_pow
-
-double test_double_builtin(double p0, double p1) {
-    return TEST_FUNC(p0, p1);
-  // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}}
-}
-
-double2 test_vec_double_builtin(double2 p0, double2 p1) {
-    return TEST_FUNC(p0, p1);
-  // expected-error@-1 {{passing 'double2' (aka 'vector<double, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
-}
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_atan2
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_fmod
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_pow
+
+double test_double_builtin(double p0, double p1) {
+    return TEST_FUNC(p0, p1);
+  // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}}
+}
+
+double2 test_vec_double_builtin(double2 p0, double2 p1) {
+    return TEST_FUNC(p0, p1);
+  // expected-error@-1 {{passing 'double2' (aka 'vector<double, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
index c5e2ac0b502d..281faada6f5e 100644
--- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
@@ -1,32 +1,32 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
-
-
-void test_too_few_arg()
-{
-  return __builtin_hlsl_length();
-  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
-}
-
-void test_too_many_arg(float2 p0)
-{
-  return __builtin_hlsl_length(p0, p0);
-  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
-}
-
-bool builtin_bool_to_float_type_promotion(bool p1)
-{
-  return __builtin_hlsl_length(p1);
-  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
-}
-
-bool builtin_length_int_to_float_promotion(int p1)
-{
-  return __builtin_hlsl_length(p1);
-  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
-}
-
-bool2 builtin_length_int2_to_float2_promotion(int2 p1)
-{
-  return __builtin_hlsl_length(p1);
-  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
-}
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
+
+
+void test_too_few_arg()
+{
+  return __builtin_hlsl_length();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+void test_too_many_arg(float2 p0)
+{
+  return __builtin_hlsl_length(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+bool builtin_bool_to_float_type_promotion(bool p1)
+{
+  return __builtin_hlsl_length(p1);
+  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+bool builtin_length_int_to_float_promotion(int p1)
+{
+  return __builtin_hlsl_length(p1);
+  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+bool2 builtin_length_int2_to_float2_promotion(int2 p1)
+{
+  return __builtin_hlsl_length(p1);
+  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl
index 3720dca9b88a..fc48c9b2589f 100644
--- a/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl
@@ -1,31 +1,31 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
-
-void test_too_few_arg()
-{
-  return __builtin_hlsl_normalize();
-  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
-}
-
-void test_too_many_arg(float2 p0)
-{
-  return __builtin_hlsl_normalize(p0, p0);
-  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
-}
-
-bool builtin_bool_to_float_type_promotion(bool p1)
-{
-  return __builtin_hlsl_normalize(p1);
-  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
-}
-
-bool builtin_normalize_int_to_float_promotion(int p1)
-{
-  return __builtin_hlsl_normalize(p1);
-  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
-}
-
-bool2 builtin_normalize_int2_to_float2_promotion(int2 p1)
-{
-  return __builtin_hlsl_normalize(p1);
-  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
-}
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
+
+void test_too_few_arg()
+{
+  return __builtin_hlsl_normalize();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+void test_too_many_arg(float2 p0)
+{
+  return __builtin_hlsl_normalize(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+bool builtin_bool_to_float_type_promotion(bool p1)
+{
+  return __builtin_hlsl_normalize(p1);
+  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+bool builtin_normalize_int_to_float_promotion(int p1)
+{
+  return __builtin_hlsl_normalize(p1);
+  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+bool2 builtin_normalize_int2_to_float2_promotion(int2 p1)
+{
+  return __builtin_hlsl_normalize(p1);
+  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl
index a76c5ff5dbd2..823585201ca6 100644
--- a/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl
@@ -1,31 +1,31 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
-
-void test_too_few_arg()
-{
-  return __builtin_hlsl_step();
-  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
-}
-
-void test_too_many_arg(float2 p0)
-{
-  return __builtin_hlsl_step(p0, p0, p0);
-  // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
-}
-
-bool builtin_bool_to_float_type_promotion(bool p1)
-{
-  return __builtin_hlsl_step(p1, p1);
-  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
-}
-
-bool builtin_step_int_to_float_promotion(int p1)
-{
-  return __builtin_hlsl_step(p1, p1);
-  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
-}
-
-bool2 builtin_step_int2_to_float2_promotion(int2 p1)
-{
-  return __builtin_hlsl_step(p1, p1);
-  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
-}
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
+
+void test_too_few_arg()
+{
+  return __builtin_hlsl_step();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+void test_too_many_arg(float2 p0)
+{
+  return __builtin_hlsl_step(p0, p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+bool builtin_bool_to_float_type_promotion(bool p1)
+{
+  return __builtin_hlsl_step(p1, p1);
+  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+bool builtin_step_int_to_float_promotion(int p1)
+{
+  return __builtin_hlsl_step(p1, p1);
+  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+bool2 builtin_step_int2_to_float2_promotion(int2 p1)
+{
+  return __builtin_hlsl_step(p1, p1);
+  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
diff --git a/clang/test/SemaHLSL/Types/Traits/IsIntangibleType.hlsl b/clang/test/SemaHLSL/Types/Traits/IsIntangibleType.hlsl
index 1223a131af35..8c0f8d6f271d 100644
--- a/clang/test/SemaHLSL/Types/Traits/IsIntangibleType.hlsl
+++ b/clang/test/SemaHLSL/Types/Traits/IsIntangibleType.hlsl
@@ -1,81 +1,81 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -fnative-half-type -verify %s
-// expected-no-diagnostics
-
-_Static_assert(__builtin_hlsl_is_intangible(__hlsl_resource_t), "");
-// no need to check array of __hlsl_resource_t, arrays of sizeless types are not supported
-
-_Static_assert(!__builtin_hlsl_is_intangible(int), "");
-_Static_assert(!__builtin_hlsl_is_intangible(float3), "");
-_Static_assert(!__builtin_hlsl_is_intangible(half[4]), "");
-
-typedef __hlsl_resource_t Res;
-_Static_assert(__builtin_hlsl_is_intangible(const Res), "");
-// no need to check array of Res, arrays of sizeless types are not supported
-
-struct ABuffer {
-    const int i[10];
-    __hlsl_resource_t h;
-};
-_Static_assert(__builtin_hlsl_is_intangible(ABuffer), "");
-_Static_assert(__builtin_hlsl_is_intangible(ABuffer[10]), "");
-
-struct MyStruct {
-    half2 h2;
-    int3 i3;
-};
-_Static_assert(!__builtin_hlsl_is_intangible(MyStruct), "");
-_Static_assert(!__builtin_hlsl_is_intangible(MyStruct[10]), "");
-
-class MyClass {
-    int3 ivec;
-    float farray[12];
-    MyStruct ms;
-    ABuffer buf;
-};
-_Static_assert(__builtin_hlsl_is_intangible(MyClass), "");
-_Static_assert(__builtin_hlsl_is_intangible(MyClass[2]), "");
-
-union U {
-    double d[4];
-    Res buf;
-};
-_Static_assert(__builtin_hlsl_is_intangible(U), "");
-_Static_assert(__builtin_hlsl_is_intangible(U[100]), "");
-
-class MyClass2 {
-    int3 ivec;
-    float farray[12];
-    U u;
-};
-_Static_assert(__builtin_hlsl_is_intangible(MyClass2), "");
-_Static_assert(__builtin_hlsl_is_intangible(MyClass2[5]), "");
-
-class Simple {
-    int a;
-};
-
-template<typename T> struct TemplatedBuffer {
-    T a;
-    __hlsl_resource_t h;
-};
-_Static_assert(__builtin_hlsl_is_intangible(TemplatedBuffer<int>), "");
-
-struct MyStruct2 : TemplatedBuffer<float> {
-    float x;
-};
-_Static_assert(__builtin_hlsl_is_intangible(MyStruct2), "");
-
-struct MyStruct3 {
-    const TemplatedBuffer<float> TB[10];
-};
-_Static_assert(__builtin_hlsl_is_intangible(MyStruct3), "");
-
-template<typename T> struct SimpleTemplate {
-    T a;
-};
-_Static_assert(__builtin_hlsl_is_intangible(SimpleTemplate<__hlsl_resource_t>), "");
-_Static_assert(!__builtin_hlsl_is_intangible(SimpleTemplate<float>), "");
-
-_Static_assert(__builtin_hlsl_is_intangible(RWBuffer<float>), "");
-_Static_assert(__builtin_hlsl_is_intangible(StructuredBuffer<Simple>), "");
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -fnative-half-type -verify %s
+// expected-no-diagnostics
+
+_Static_assert(__builtin_hlsl_is_intangible(__hlsl_resource_t), "");
+// no need to check array of __hlsl_resource_t, arrays of sizeless types are not supported
+
+_Static_assert(!__builtin_hlsl_is_intangible(int), "");
+_Static_assert(!__builtin_hlsl_is_intangible(float3), "");
+_Static_assert(!__builtin_hlsl_is_intangible(half[4]), "");
+
+typedef __hlsl_resource_t Res;
+_Static_assert(__builtin_hlsl_is_intangible(const Res), "");
+// no need to check array of Res, arrays of sizeless types are not supported
+
+struct ABuffer {
+    const int i[10];
+    __hlsl_resource_t h;
+};
+_Static_assert(__builtin_hlsl_is_intangible(ABuffer), "");
+_Static_assert(__builtin_hlsl_is_intangible(ABuffer[10]), "");
+
+struct MyStruct {
+    half2 h2;
+    int3 i3;
+};
+_Static_assert(!__builtin_hlsl_is_intangible(MyStruct), "");
+_Static_assert(!__builtin_hlsl_is_intangible(MyStruct[10]), "");
+
+class MyClass {
+    int3 ivec;
+    float farray[12];
+    MyStruct ms;
+    ABuffer buf;
+};
+_Static_assert(__builtin_hlsl_is_intangible(MyClass), "");
+_Static_assert(__builtin_hlsl_is_intangible(MyClass[2]), "");
+
+union U {
+    double d[4];
+    Res buf;
+};
+_Static_assert(__builtin_hlsl_is_intangible(U), "");
+_Static_assert(__builtin_hlsl_is_intangible(U[100]), "");
+
+class MyClass2 {
+    int3 ivec;
+    float farray[12];
+    U u;
+};
+_Static_assert(__builtin_hlsl_is_intangible(MyClass2), "");
+_Static_assert(__builtin_hlsl_is_intangible(MyClass2[5]), "");
+
+class Simple {
+    int a;
+};
+
+template<typename T> struct TemplatedBuffer {
+    T a;
+    __hlsl_resource_t h;
+};
+_Static_assert(__builtin_hlsl_is_intangible(TemplatedBuffer<int>), "");
+
+struct MyStruct2 : TemplatedBuffer<float> {
+    float x;
+};
+_Static_assert(__builtin_hlsl_is_intangible(MyStruct2), "");
+
+struct MyStruct3 {
+    const TemplatedBuffer<float> TB[10];
+};
+_Static_assert(__builtin_hlsl_is_intangible(MyStruct3), "");
+
+template<typename T> struct SimpleTemplate {
+    T a;
+};
+_Static_assert(__builtin_hlsl_is_intangible(SimpleTemplate<__hlsl_resource_t>), "");
+_Static_assert(!__builtin_hlsl_is_intangible(SimpleTemplate<float>), "");
+
+_Static_assert(__builtin_hlsl_is_intangible(RWBuffer<float>), "");
+_Static_assert(__builtin_hlsl_is_intangible(StructuredBuffer<Simple>), "");
diff --git a/clang/test/SemaHLSL/Types/Traits/IsIntangibleTypeErrors.hlsl b/clang/test/SemaHLSL/Types/Traits/IsIntangibleTypeErrors.hlsl
index 33614e87640d..de9ac90b895f 100644
--- a/clang/test/SemaHLSL/Types/Traits/IsIntangibleTypeErrors.hlsl
+++ b/clang/test/SemaHLSL/Types/Traits/IsIntangibleTypeErrors.hlsl
@@ -1,12 +1,12 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library  -finclude-default-header -verify %s
-
-struct Undefined; // expected-note {{forward declaration of 'Undefined'}}
-_Static_assert(!__builtin_hlsl_is_intangible(Undefined), ""); // expected-error{{incomplete type 'Undefined' used in type trait expression}}
-
-void fn(int X) { // expected-note {{declared here}}
-  // expected-error@#vla {{variable length arrays are not supported for the current target}}
-  // expected-error@#vla {{variable length arrays are not supported in '__builtin_hlsl_is_intangible'}}
-  // expected-warning@#vla {{variable length arrays in C++ are a Clang extension}}
-  // expected-note@#vla {{function parameter 'X' with unknown value cannot be used in a constant expression}}
-  _Static_assert(!__builtin_hlsl_is_intangible(int[X]), ""); // #vla
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library  -finclude-default-header -verify %s
+
+struct Undefined; // expected-note {{forward declaration of 'Undefined'}}
+_Static_assert(!__builtin_hlsl_is_intangible(Undefined), ""); // expected-error{{incomplete type 'Undefined' used in type trait expression}}
+
+void fn(int X) { // expected-note {{declared here}}
+  // expected-error@#vla {{variable length arrays are not supported for the current target}}
+  // expected-error@#vla {{variable length arrays are not supported in '__builtin_hlsl_is_intangible'}}
+  // expected-warning@#vla {{variable length arrays in C++ are a Clang extension}}
+  // expected-note@#vla {{function parameter 'X' with unknown value cannot be used in a constant expression}}
+  _Static_assert(!__builtin_hlsl_is_intangible(int[X]), ""); // #vla
+}
diff --git a/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl
index 4e50f70952ad..760c057630a7 100644
--- a/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl
+++ b/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl
@@ -1,42 +1,42 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s -verify
-
-// expected-error@+1{{binding type 't' only applies to SRV resources}}
-float f1 : register(t0);
-
-// expected-error@+1 {{binding type 'u' only applies to UAV resources}}
-float f2 : register(u0);
-
-// expected-error@+1{{binding type 'b' only applies to constant buffers. The 'bool constant' binding type is no longer supported}}
-float f3 : register(b9);
-
-// expected-error@+1 {{binding type 's' only applies to sampler state}}
-float f4 : register(s0);
-
-// expected-error@+1{{binding type 'i' ignored. The 'integer constant' binding type is no longer supported}}
-float f5 : register(i9);
-
-// expected-error@+1{{binding type 'x' is invalid}}
-float f6 : register(x9);
-
-cbuffer g_cbuffer1 {
-// expected-error@+1{{binding type 'c' ignored in buffer declaration. Did you mean 'packoffset'?}}
-    float f7 : register(c2);
-};
-
-tbuffer g_tbuffer1 {
-// expected-error@+1{{binding type 'c' ignored in buffer declaration. Did you mean 'packoffset'?}}
-    float f8 : register(c2);
-};
-
-cbuffer g_cbuffer2 {
-// expected-error@+1{{binding type 'b' only applies to constant buffer resources}}
-    float f9 : register(b2);
-};
-
-tbuffer g_tbuffer2 {
-// expected-error@+1{{binding type 'i' ignored. The 'integer constant' binding type is no longer supported}}
-    float f10 : register(i2);
-};
-
-// expected-error@+1{{binding type 'c' only applies to numeric variables in the global scope}}
-RWBuffer<float> f11 : register(c3);
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s -verify
+
+// expected-error@+1{{binding type 't' only applies to SRV resources}}
+float f1 : register(t0);
+
+// expected-error@+1 {{binding type 'u' only applies to UAV resources}}
+float f2 : register(u0);
+
+// expected-error@+1{{binding type 'b' only applies to constant buffers. The 'bool constant' binding type is no longer supported}}
+float f3 : register(b9);
+
+// expected-error@+1 {{binding type 's' only applies to sampler state}}
+float f4 : register(s0);
+
+// expected-error@+1{{binding type 'i' ignored. The 'integer constant' binding type is no longer supported}}
+float f5 : register(i9);
+
+// expected-error@+1{{binding type 'x' is invalid}}
+float f6 : register(x9);
+
+cbuffer g_cbuffer1 {
+// expected-error@+1{{binding type 'c' ignored in buffer declaration. Did you mean 'packoffset'?}}
+    float f7 : register(c2);
+};
+
+tbuffer g_tbuffer1 {
+// expected-error@+1{{binding type 'c' ignored in buffer declaration. Did you mean 'packoffset'?}}
+    float f8 : register(c2);
+};
+
+cbuffer g_cbuffer2 {
+// expected-error@+1{{binding type 'b' only applies to constant buffer resources}}
+    float f9 : register(b2);
+};
+
+tbuffer g_tbuffer2 {
+// expected-error@+1{{binding type 'i' ignored. The 'integer constant' binding type is no longer supported}}
+    float f10 : register(i2);
+};
+
+// expected-error@+1{{binding type 'c' only applies to numeric variables in the global scope}}
+RWBuffer<float> f11 : register(c3);
diff --git a/clang/test/SemaHLSL/resource_binding_attr_error_other.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error_other.hlsl
index 503c8469666f..4c9e9a6b44c9 100644
--- a/clang/test/SemaHLSL/resource_binding_attr_error_other.hlsl
+++ b/clang/test/SemaHLSL/resource_binding_attr_error_other.hlsl
@@ -1,9 +1,9 @@
-// RUN: not %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s  | FileCheck %s
-
-// XFAIL: *
-// This expectedly fails because RayQuery is an unsupported type.
-// When it becomes supported, we should expect an error due to 
-// the variable type being classified as "other", and according
-// to the spec, err_hlsl_unsupported_register_type_and_variable_type
-// should be emitted.
-RayQuery<0> r1: register(t0);
+// RUN: not %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s  | FileCheck %s
+
+// XFAIL: *
+// This expectedly fails because RayQuery is an unsupported type.
+// When it becomes supported, we should expect an error due to 
+// the variable type being classified as "other", and according
+// to the spec, err_hlsl_unsupported_register_type_and_variable_type
+// should be emitted.
+RayQuery<0> r1: register(t0);
diff --git a/clang/test/SemaHLSL/resource_binding_attr_error_resource.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error_resource.hlsl
index ea43e27b5b5a..4b6af47c0ab7 100644
--- a/clang/test/SemaHLSL/resource_binding_attr_error_resource.hlsl
+++ b/clang/test/SemaHLSL/resource_binding_attr_error_resource.hlsl
@@ -1,49 +1,49 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s -verify
-
-// This test validates the diagnostics that are emitted when a variable with a "resource" type
-// is bound to a register using the register annotation
-
-
-template<typename T>
-struct MyTemplatedSRV {
-  __hlsl_resource_t [[hlsl::resource_class(SRV)]] x;
-};
-
-struct MySRV {
-  __hlsl_resource_t [[hlsl::resource_class(SRV)]] x;
-};
-
-struct MySampler {
-  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] x;
-};
-
-struct MyUAV {
-  __hlsl_resource_t [[hlsl::resource_class(UAV)]] x;
-};
-
-struct MyCBuffer {
-  __hlsl_resource_t [[hlsl::resource_class(CBuffer)]] x;
-};
-
-
-// expected-error@+1  {{binding type 'i' ignored. The 'integer constant' binding type is no longer supported}}
-MySRV invalid : register(i2);
-
-// expected-error@+1  {{binding type 't' only applies to SRV resources}}
-MyUAV a : register(t2, space1);
-
-// expected-error@+1  {{binding type 'u' only applies to UAV resources}}
-MySampler b : register(u2, space1);
-
-// expected-error@+1  {{binding type 'b' only applies to constant buffer resources}}
-MyTemplatedSRV<int> c : register(b2);
-
-// expected-error@+1  {{binding type 's' only applies to sampler state}}
-MyUAV d : register(s2, space1);
-
-// empty binding prefix cases:
-// expected-error@+1 {{expected identifier}}
-MyTemplatedSRV<int> e: register();
-
-// expected-error@+1 {{expected identifier}}
-MyTemplatedSRV<int> f: register("");
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s -verify
+
+// This test validates the diagnostics that are emitted when a variable with a "resource" type
+// is bound to a register using the register annotation
+
+
+template<typename T>
+struct MyTemplatedSRV {
+  __hlsl_resource_t [[hlsl::resource_class(SRV)]] x;
+};
+
+struct MySRV {
+  __hlsl_resource_t [[hlsl::resource_class(SRV)]] x;
+};
+
+struct MySampler {
+  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] x;
+};
+
+struct MyUAV {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] x;
+};
+
+struct MyCBuffer {
+  __hlsl_resource_t [[hlsl::resource_class(CBuffer)]] x;
+};
+
+
+// expected-error@+1  {{binding type 'i' ignored. The 'integer constant' binding type is no longer supported}}
+MySRV invalid : register(i2);
+
+// expected-error@+1  {{binding type 't' only applies to SRV resources}}
+MyUAV a : register(t2, space1);
+
+// expected-error@+1  {{binding type 'u' only applies to UAV resources}}
+MySampler b : register(u2, space1);
+
+// expected-error@+1  {{binding type 'b' only applies to constant buffer resources}}
+MyTemplatedSRV<int> c : register(b2);
+
+// expected-error@+1  {{binding type 's' only applies to sampler state}}
+MyUAV d : register(s2, space1);
+
+// empty binding prefix cases:
+// expected-error@+1 {{expected identifier}}
+MyTemplatedSRV<int> e: register();
+
+// expected-error@+1 {{expected identifier}}
+MyTemplatedSRV<int> f: register("");
diff --git a/clang/test/SemaHLSL/resource_binding_attr_error_silence_diags.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error_silence_diags.hlsl
index 7f248e30c070..e63f264452da 100644
--- a/clang/test/SemaHLSL/resource_binding_attr_error_silence_diags.hlsl
+++ b/clang/test/SemaHLSL/resource_binding_attr_error_silence_diags.hlsl
@@ -1,27 +1,27 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only -Wno-legacy-constant-register-binding %s -verify
-
-// expected-no-diagnostics
-float f2 : register(b9);
-
-float f3 : register(i9);
-
-cbuffer g_cbuffer1 {
-    float f4 : register(c2);
-};
-
-
-struct Eg12{
-  RWBuffer<int> a;  
-};
-
-Eg12 e12 : register(c9);
-
-Eg12 bar : register(i1);
-
-struct Eg7 {
-  struct Bar {
-    float f;
-  };
-  Bar b;
-};
-Eg7 e7 : register(t0);
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only -Wno-legacy-constant-register-binding %s -verify
+
+// expected-no-diagnostics
+float f2 : register(b9);
+
+float f3 : register(i9);
+
+cbuffer g_cbuffer1 {
+    float f4 : register(c2);
+};
+
+
+struct Eg12{
+  RWBuffer<int> a;  
+};
+
+Eg12 e12 : register(c9);
+
+Eg12 bar : register(i1);
+
+struct Eg7 {
+  struct Bar {
+    float f;
+  };
+  Bar b;
+};
+Eg7 e7 : register(t0);
diff --git a/clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl
index 3001dbb1e3ec..70e64e6ca752 100644
--- a/clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl
+++ b/clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl
@@ -1,62 +1,62 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s -verify
-
-// valid
-cbuffer cbuf {
-    RWBuffer<int> r : register(u0, space0);
-}
-
-cbuffer cbuf2 {
-    struct x {
-        // this test validates that no diagnostic is emitted on the space parameter, because
-        // this register annotation is not in the global scope.
-        // expected-error@+1 {{'register' attribute only applies to cbuffer/tbuffer and external global variables}}
-        RWBuffer<int> E : register(u2, space3);
-    };
-}
-
-struct MyStruct {
-    RWBuffer<int> E;
-};
-
-cbuffer cbuf3 {
-  // valid
-  MyStruct E : register(u2, space3);
-}
-
-// valid
-MyStruct F : register(u3, space4);
-
-cbuffer cbuf4 {
-  // this test validates that no diagnostic is emitted on the space parameter, because
-  // this register annotation is not in the global scope.
-  // expected-error@+1 {{binding type 'u' only applies to UAV resources}}
-  float a : register(u2, space3); 
-}
-
-// expected-error@+1 {{invalid space specifier 's2' used; expected 'space' followed by an integer, like space1}}
-cbuffer a : register(b0, s2) {
-
-}
-
-// expected-error@+1 {{invalid space specifier 'spaces' used; expected 'space' followed by an integer, like space1}}
-cbuffer b : register(b2, spaces) {
-
-}
-
-// expected-error@+1 {{wrong argument format for hlsl attribute, use space3 instead}}
-cbuffer c : register(b2, space 3) {}
-
-// expected-error@+1 {{register space cannot be specified on global constants}}
-int d : register(c2, space3);
-
-// expected-error@+1 {{register space cannot be specified on global constants}}
-int e : register(c2, space0);
-
-// expected-error@+1 {{register space cannot be specified on global constants}}
-int f : register(c2, space00);
-
-// valid
-RWBuffer<int> g : register(u2, space0);
-
-// valid
-RWBuffer<int> h : register(u2, space0);
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s -verify
+
+// valid
+cbuffer cbuf {
+    RWBuffer<int> r : register(u0, space0);
+}
+
+cbuffer cbuf2 {
+    struct x {
+        // this test validates that no diagnostic is emitted on the space parameter, because
+        // this register annotation is not in the global scope.
+        // expected-error@+1 {{'register' attribute only applies to cbuffer/tbuffer and external global variables}}
+        RWBuffer<int> E : register(u2, space3);
+    };
+}
+
+struct MyStruct {
+    RWBuffer<int> E;
+};
+
+cbuffer cbuf3 {
+  // valid
+  MyStruct E : register(u2, space3);
+}
+
+// valid
+MyStruct F : register(u3, space4);
+
+cbuffer cbuf4 {
+  // this test validates that no diagnostic is emitted on the space parameter, because
+  // this register annotation is not in the global scope.
+  // expected-error@+1 {{binding type 'u' only applies to UAV resources}}
+  float a : register(u2, space3); 
+}
+
+// expected-error@+1 {{invalid space specifier 's2' used; expected 'space' followed by an integer, like space1}}
+cbuffer a : register(b0, s2) {
+
+}
+
+// expected-error@+1 {{invalid space specifier 'spaces' used; expected 'space' followed by an integer, like space1}}
+cbuffer b : register(b2, spaces) {
+
+}
+
+// expected-error@+1 {{wrong argument format for hlsl attribute, use space3 instead}}
+cbuffer c : register(b2, space 3) {}
+
+// expected-error@+1 {{register space cannot be specified on global constants}}
+int d : register(c2, space3);
+
+// expected-error@+1 {{register space cannot be specified on global constants}}
+int e : register(c2, space0);
+
+// expected-error@+1 {{register space cannot be specified on global constants}}
+int f : register(c2, space00);
+
+// valid
+RWBuffer<int> g : register(u2, space0);
+
+// valid
+RWBuffer<int> h : register(u2, space0);
diff --git a/clang/test/SemaHLSL/resource_binding_attr_error_udt.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error_udt.hlsl
index 235004102a53..40517f393e12 100644
--- a/clang/test/SemaHLSL/resource_binding_attr_error_udt.hlsl
+++ b/clang/test/SemaHLSL/resource_binding_attr_error_udt.hlsl
@@ -1,135 +1,135 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s -verify
-
-template<typename T>
-struct MyTemplatedUAV {
-  __hlsl_resource_t [[hlsl::resource_class(UAV)]] x;
-};
-
-struct MySRV {
-  __hlsl_resource_t [[hlsl::resource_class(SRV)]] x;
-};
-
-struct MySampler {
-  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] x;
-};
-
-struct MyUAV {
-  __hlsl_resource_t [[hlsl::resource_class(UAV)]] x;
-};
-
-struct MyCBuffer {
-  __hlsl_resource_t [[hlsl::resource_class(CBuffer)]] x;
-};
-
-// Valid: f is skipped, SRVBuf is bound to t0, UAVBuf is bound to u0
-struct Eg1 {
-  float f;
-  MySRV SRVBuf;
-  MyUAV UAVBuf;
-  };
-Eg1 e1 : register(t0) : register(u0); 
-
-// Valid: f is skipped, SRVBuf is bound to t0, UAVBuf is bound to u0. 
-// UAVBuf2 gets automatically assigned to u1 even though there is no explicit binding for u1.
-struct Eg2 {
-  float f;
-  MySRV SRVBuf;
-  MyUAV UAVBuf;
-  MyUAV UAVBuf2;
-  };
-Eg2 e2 : register(t0) : register(u0); 
-
-// Valid: Bar, the struct within Eg3, has a valid resource that can be bound to t0. 
-struct Eg3 {
-  struct Bar {
-    MyUAV a;
-  };
-  Bar b;
-};
-Eg3 e3 : register(u0);
-
-// Valid: the first sampler state object within 's' is bound to slot 5
-struct Eg4 {
-  MySampler s[3];
-};
-
-Eg4 e4 : register(s5);
-
-
-struct Eg5 {
-  float f;
-}; 
-// expected-warning@+1{{binding type 't' only applies to types containing SRV resources}}
-Eg5 e5 : register(t0);
-
-struct Eg6 {
-  float f;
-}; 
-// expected-warning@+1{{binding type 'u' only applies to types containing UAV resources}}
-Eg6 e6 : register(u0);
-
-struct Eg7 {
-  float f;
-}; 
-// expected-warning@+1{{binding type 'b' only applies to types containing constant buffer resources}}
-Eg7 e7 : register(b0);
-
-struct Eg8 {
-  float f;
-}; 
-// expected-warning@+1{{binding type 's' only applies to types containing sampler state}}
-Eg8 e8 : register(s0);
-
-struct Eg9 {
-  MySRV s;
-}; 
-// expected-warning@+1{{binding type 'c' only applies to types containing numeric types}}
-Eg9 e9 : register(c0);
-
-struct Eg10{
-  // expected-error@+1{{'register' attribute only applies to cbuffer/tbuffer and external global variables}}
-  MyTemplatedUAV<int> a : register(u9);
-};
-Eg10 e10;
-
-
-template<typename R>
-struct Eg11 {
-    R b;
-};
-// expected-warning@+1{{binding type 'u' only applies to types containing UAV resources}}
-Eg11<MySRV> e11 : register(u0);
-// invalid because after template expansion, there are no valid resources inside Eg11 to bind as a UAV, only an SRV
-
-
-struct Eg12{
-  MySRV s1;
-  MySRV s2;
-};
-// expected-warning@+2{{binding type 'u' only applies to types containing UAV resources}}
-// expected-error@+1{{binding type 'u' cannot be applied more than once}}
-Eg12 e12 : register(u9) : register(u10);
-
-struct Eg13{
-  MySRV s1;
-  MySRV s2;
-};
-// expected-warning@+3{{binding type 'u' only applies to types containing UAV resources}}
-// expected-error@+2{{binding type 'u' cannot be applied more than once}}
-// expected-error@+1{{binding type 'u' cannot be applied more than once}}
-Eg13 e13 : register(u9) : register(u10) : register(u11);
-
-// expected-error@+1{{binding type 't' cannot be applied more than once}}
-Eg13 e13_2 : register(t11) : register(t12);
-
-struct Eg14{
- MyTemplatedUAV<int> r1;  
-};
-// expected-warning@+1{{binding type 't' only applies to types containing SRV resources}}
-Eg14 e14 : register(t9);
-
-struct Eg15 {
-  float f[4];
-}; 
-// expected no error
-Eg15 e15 : register(c0);
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s -verify
+
+template<typename T>
+struct MyTemplatedUAV {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] x;
+};
+
+struct MySRV {
+  __hlsl_resource_t [[hlsl::resource_class(SRV)]] x;
+};
+
+struct MySampler {
+  __hlsl_resource_t [[hlsl::resource_class(Sampler)]] x;
+};
+
+struct MyUAV {
+  __hlsl_resource_t [[hlsl::resource_class(UAV)]] x;
+};
+
+struct MyCBuffer {
+  __hlsl_resource_t [[hlsl::resource_class(CBuffer)]] x;
+};
+
+// Valid: f is skipped, SRVBuf is bound to t0, UAVBuf is bound to u0
+struct Eg1 {
+  float f;
+  MySRV SRVBuf;
+  MyUAV UAVBuf;
+  };
+Eg1 e1 : register(t0) : register(u0); 
+
+// Valid: f is skipped, SRVBuf is bound to t0, UAVBuf is bound to u0. 
+// UAVBuf2 gets automatically assigned to u1 even though there is no explicit binding for u1.
+struct Eg2 {
+  float f;
+  MySRV SRVBuf;
+  MyUAV UAVBuf;
+  MyUAV UAVBuf2;
+  };
+Eg2 e2 : register(t0) : register(u0); 
+
+// Valid: Bar, the struct within Eg3, has a valid resource that can be bound to t0. 
+struct Eg3 {
+  struct Bar {
+    MyUAV a;
+  };
+  Bar b;
+};
+Eg3 e3 : register(u0);
+
+// Valid: the first sampler state object within 's' is bound to slot 5
+struct Eg4 {
+  MySampler s[3];
+};
+
+Eg4 e4 : register(s5);
+
+
+struct Eg5 {
+  float f;
+}; 
+// expected-warning@+1{{binding type 't' only applies to types containing SRV resources}}
+Eg5 e5 : register(t0);
+
+struct Eg6 {
+  float f;
+}; 
+// expected-warning@+1{{binding type 'u' only applies to types containing UAV resources}}
+Eg6 e6 : register(u0);
+
+struct Eg7 {
+  float f;
+}; 
+// expected-warning@+1{{binding type 'b' only applies to types containing constant buffer resources}}
+Eg7 e7 : register(b0);
+
+struct Eg8 {
+  float f;
+}; 
+// expected-warning@+1{{binding type 's' only applies to types containing sampler state}}
+Eg8 e8 : register(s0);
+
+struct Eg9 {
+  MySRV s;
+}; 
+// expected-warning@+1{{binding type 'c' only applies to types containing numeric types}}
+Eg9 e9 : register(c0);
+
+struct Eg10{
+  // expected-error@+1{{'register' attribute only applies to cbuffer/tbuffer and external global variables}}
+  MyTemplatedUAV<int> a : register(u9);
+};
+Eg10 e10;
+
+
+template<typename R>
+struct Eg11 {
+    R b;
+};
+// expected-warning@+1{{binding type 'u' only applies to types containing UAV resources}}
+Eg11<MySRV> e11 : register(u0);
+// invalid because after template expansion, there are no valid resources inside Eg11 to bind as a UAV, only an SRV
+
+
+struct Eg12{
+  MySRV s1;
+  MySRV s2;
+};
+// expected-warning@+2{{binding type 'u' only applies to types containing UAV resources}}
+// expected-error@+1{{binding type 'u' cannot be applied more than once}}
+Eg12 e12 : register(u9) : register(u10);
+
+struct Eg13{
+  MySRV s1;
+  MySRV s2;
+};
+// expected-warning@+3{{binding type 'u' only applies to types containing UAV resources}}
+// expected-error@+2{{binding type 'u' cannot be applied more than once}}
+// expected-error@+1{{binding type 'u' cannot be applied more than once}}
+Eg13 e13 : register(u9) : register(u10) : register(u11);
+
+// expected-error@+1{{binding type 't' cannot be applied more than once}}
+Eg13 e13_2 : register(t11) : register(t12);
+
+struct Eg14{
+ MyTemplatedUAV<int> r1;  
+};
+// expected-warning@+1{{binding type 't' only applies to types containing SRV resources}}
+Eg14 e14 : register(t9);
+
+struct Eg15 {
+  float f[4];
+}; 
+// expected no error
+Eg15 e15 : register(c0);
diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
index b4b376fe0d11..b9767a7a03d0 100644
--- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
+++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
@@ -344,7 +344,7 @@ Expected<std::unique_ptr<lto::LTO>> createLTO(const ArgList &Args) {
   Conf.RemarksHotnessThreshold = RemarksHotnessThreshold;
   Conf.RemarksFormat = RemarksFormat;
 
-  Conf.MAttrs = {Args.getLastArgValue(OPT_feature, "").str()};
+  Conf.MAttrs = llvm::codegen::getMAttrs();
   std::optional<CodeGenOptLevel> CGOptLevelOrNone =
       CodeGenOpt::parseLevel(Args.getLastArgValue(OPT_O, "2")[0]);
   assert(CGOptLevelOrNone && "Invalid optimization level");
diff --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
index eeb9d1a62282..a80c5937b429 100644
--- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
+++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
@@ -47,9 +47,6 @@ def arch : Separate<["--", "-"], "arch">,
 def : Joined<["--", "-"], "plugin-opt=mcpu=">,
   Flags<[HelpHidden, WrapperOnlyOption]>, Alias<arch>;
 
-def feature : Separate<["--", "-"], "feature">, Flags<[WrapperOnlyOption]>,
-  HelpText<"Specify the '+ptx' freature to use for LTO.">;
-
 def g : Flag<["-"], "g">, HelpText<"Specify that this was a debug compile.">;
 def debug : Flag<["--"], "debug">, Alias<g>;
 
diff --git a/clang/tools/scan-build/bin/scan-build.bat b/clang/tools/scan-build/bin/scan-build.bat
index f765f205b8ec..77be6746318f 100644
--- a/clang/tools/scan-build/bin/scan-build.bat
+++ b/clang/tools/scan-build/bin/scan-build.bat
@@ -1 +1 @@
-perl -S scan-build %*
+perl -S scan-build %*
diff --git a/clang/tools/scan-build/libexec/c++-analyzer.bat b/clang/tools/scan-build/libexec/c++-analyzer.bat
index 83c7172456a5..69f048a91671 100644
--- a/clang/tools/scan-build/libexec/c++-analyzer.bat
+++ b/clang/tools/scan-build/libexec/c++-analyzer.bat
@@ -1 +1 @@
-perl -S c++-analyzer %*
+perl -S c++-analyzer %*
diff --git a/clang/tools/scan-build/libexec/ccc-analyzer.bat b/clang/tools/scan-build/libexec/ccc-analyzer.bat
index fdd36f3bdd04..2a85376eb82b 100644
--- a/clang/tools/scan-build/libexec/ccc-analyzer.bat
+++ b/clang/tools/scan-build/libexec/ccc-analyzer.bat
@@ -1 +1 @@
-perl -S ccc-analyzer %*
+perl -S ccc-analyzer %*
diff --git a/clang/utils/ClangVisualizers/clang.natvis b/clang/utils/ClangVisualizers/clang.natvis
index 611c20dacce1..a7c70186bc46 100644
--- a/clang/utils/ClangVisualizers/clang.natvis
+++ b/clang/utils/ClangVisualizers/clang.natvis
@@ -1,1089 +1,1089 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!--
-Visual Studio Native Debugging Visualizers for LLVM
-
-For Visual Studio 2013 only, put this file into
-"%USERPROFILE%\Documents\Visual Studio 2013\Visualizers" or create a symbolic link so it updates automatically.
-
-For later versions of Visual Studio, no setup is required-->
-<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
-
-  <Type Name="clang::Type">
-    <!-- To visualize clang::Types, we need to look at TypeBits.TC to determine the actual
-         type subclass and manually dispatch accordingly (Visual Studio can't identify the real type
-         because clang::Type has no virtual members hence no RTTI).
-
-         Views:
-           "cmn": Visualization that is common to all clang::Type subclasses
-           "poly": Visualization that is specific to the actual clang::Type subclass. The subtype-specific
-                   <DisplayString> is typically as C++-like as possible (like in dump()) with <Expand>
-                   containing all the gory details.
-           "cpp": Only occasionally used when we need to distinguish between an ordinary view and a C++-like view.
-    -->
-    <DisplayString IncludeView="cmn" Condition="TypeBits.TC==clang::LocInfoType::LocInfo">LocInfoType</DisplayString>
-    <DisplayString IncludeView="cmn">{(clang::Type::TypeClass)TypeBits.TC, en}Type</DisplayString>
-    <!-- Dispatch to visualizers for the actual Type subclass -->
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Builtin" IncludeView="poly">{*(clang::BuiltinType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Pointer" IncludeView="poly">{*(clang::PointerType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Paren" IncludeView="poly">{*(clang::ParenType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::BitInt" IncludeView="poly">{(clang::BitIntType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::LValueReference" IncludeView="poly">{*(clang::LValueReferenceType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::RValueReference" IncludeView="poly">{*(clang::RValueReferenceType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::ConstantArray" IncludeView="poly">{(clang::ConstantArrayType *)this,na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::ConstantArray" IncludeView="left">{(clang::ConstantArrayType *)this,view(left)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::ConstantArray" IncludeView="right">{(clang::ConstantArrayType *)this,view(right)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::VariableArray" IncludeView="poly">{(clang::VariableArrayType *)this,na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::VariableArray" IncludeView="left">{(clang::VariableArrayType *)this,view(left)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::VariableArray" IncludeView="right">{(clang::VariableArrayType *)this,view(right)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::IncompleteArray" IncludeView="poly">{(clang::IncompleteArrayType *)this,na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::IncompleteArray" IncludeView="left">{(clang::IncompleteArrayType *)this,view(left)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::IncompleteArray" IncludeView="right">{(clang::IncompleteArrayType *)this,view(right)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Typedef" IncludeView="poly">{(clang::TypedefType *)this,na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Typedef" IncludeView="cpp">{(clang::TypedefType *)this,view(cpp)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Attributed" IncludeView="poly">{*(clang::AttributedType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Decayed" IncludeView="poly">{(clang::DecayedType *)this,na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Decayed" IncludeView="left">{(clang::DecayedType *)this,view(left)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Decayed" IncludeView="right">{(clang::DecayedType *)this,view(right)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Elaborated" IncludeView="poly">{(clang::ElaboratedType *)this,na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Elaborated" IncludeView="left">{(clang::ElaboratedType *)this,view(left)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Elaborated" IncludeView="right">{(clang::ElaboratedType *)this,view(right)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::TemplateTypeParm" IncludeView="poly">{*(clang::TemplateTypeParmType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::TemplateTypeParm" IncludeView="cpp">{*(clang::TemplateTypeParmType *)this,view(cpp)}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::SubstTemplateTypeParm" IncludeView="poly">{*(clang::SubstTemplateTypeParmType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Record" IncludeView="poly">{*(clang::RecordType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Record" IncludeView="cpp">{*(clang::RecordType *)this,view(cpp)}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::FunctionProto" IncludeView="poly">{(clang::FunctionProtoType *)this,na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::FunctionProto" IncludeView="left">{(clang::FunctionProtoType *)this,view(left)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::FunctionProto" IncludeView="right">{(clang::FunctionProtoType *)this,view(right)na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::TemplateSpecialization" IncludeView="poly">{*(clang::TemplateSpecializationType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::DeducedTemplateSpecialization" IncludeView="poly">{*(clang::DeducedTemplateSpecializationType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::DeducedTemplateSpecialization" IncludeView="cpp">{*(clang::DeducedTemplateSpecializationType *)this,view(cpp)}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::InjectedClassName" IncludeView="poly">{*(clang::InjectedClassNameType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::DependentName" IncludeView="poly">{*(clang::DependentNameType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::PackExpansion" IncludeView="poly">{*(clang::PackExpansionType *)this}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::LocInfoType::LocInfo" IncludeView="poly">{(clang::LocInfoType *)this,na}</DisplayString>
-    <DisplayString Condition="TypeBits.TC==clang::LocInfoType::LocInfo" IncludeView="cpp">{(clang::LocInfoType *)this,view(cpp)na}</DisplayString>
-    <DisplayString IncludeView="cpp">{this,view(poly)na}</DisplayString>
-    <DisplayString IncludeView="left">{*this,view(cpp)}</DisplayString>
-    <DisplayString IncludeView="right"></DisplayString>
-    <DisplayString IncludeView="poly">No visualizer yet for {(clang::Type::TypeClass)TypeBits.TC,en}Type</DisplayString> <!-- Not yet implemented Type subclass -->
-    <DisplayString IncludeView="Dependence" Condition="TypeBits.Dependence">Dependence{" ",en}</DisplayString>
-    <DisplayString IncludeView="Dependence"></DisplayString>
-    <DisplayString IncludeView="Cache" Condition="TypeBits.CacheValid &amp;&amp; TypeBits.CachedLocalOrUnnamed">CachedLinkage: {(clang::Linkage)TypeBits.CachedLinkage,en} CachedLocalOrUnnamed</DisplayString>
-    <DisplayString IncludeView="Cache" Condition="TypeBits.CacheValid &amp;&amp; !TypeBits.CachedLocalOrUnnamed">CachedLinkage: {(clang::Linkage)TypeBits.CachedLinkage,en}{" ",sb}</DisplayString>
-    <DisplayString IncludeView="Cache"></DisplayString>
-    <DisplayString IncludeView="FromAST" Condition="TypeBits.FromAST">FromAST</DisplayString>
-    <DisplayString IncludeView="FromAST"></DisplayString>
-    <DisplayString IncludeView="flags" Condition="!TypeBits.Dependence &amp;&amp; !TypeBits.CacheValid &amp;&amp; !TypeBits.FromAST">
-      No TypeBits set beyond TypeClass
-    </DisplayString>
-    <DisplayString IncludeView="flags">{*this, view(Dependence)}{*this, view(Cache)}{*this, view(FromAST)}</DisplayString>
-    <DisplayString>{*this,view(cmn)}  {{{*this,view(poly)}}}</DisplayString>
-    <Expand>
-      <Item Name="TypeClass" IncludeView="cmn">(clang::Type::TypeClass)TypeBits.TC</Item>
-      <Item Name="Flags" IncludeView="cmn">this,view(flags)na</Item>
-      <Item Name="Canonical" IncludeView="cmn">CanonicalType</Item>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Builtin">*(clang::BuiltinType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Pointer">*(clang::PointerType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Paren">*(clang::ParenType*)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::BitInt">*(clang::BitIntType*)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::LValueReference">*(clang::LValueReferenceType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::RValueReference">*(clang::RValueReferenceType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::ConstantArray">(clang::ConstantArrayType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::VariableArray">(clang::VariableArrayType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::IncompleteArray">(clang::IncompleteArrayType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Attributed">*(clang::AttributedType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Decayed">(clang::DecayedType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Elaborated">(clang::ElaboratedType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::TemplateTypeParm">(clang::TemplateTypeParmType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::SubstTemplateTypeParm">(clang::SubstTemplateTypeParmType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Record">(clang::RecordType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::FunctionProto">(clang::FunctionProtoType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::TemplateSpecialization">(clang::TemplateSpecializationType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::DeducedTemplateSpecialization">(clang::DeducedTemplateSpecializationType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::InjectedClassName">(clang::InjectedClassNameType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::DependentName">(clang::DependentNameType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::PackExpansion">(clang::PackExpansionType *)this</ExpandedItem>
-      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::LocInfoType::LocInfo">(clang::LocInfoType *)this</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::ArrayType">
-    <Expand>
-      <Item Name="ElementType">ElementType</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::ConstantArrayType">
-    <DisplayString IncludeView="left">{ElementType,view(cpp)}</DisplayString>
-    <DisplayString IncludeView="right">[{Size}]</DisplayString>
-    <DisplayString>{ElementType,view(cpp)}[{Size}]</DisplayString>
-    <Expand>
-      <Item Name="Size">Size</Item>
-      <ExpandedItem>(clang::ArrayType *)this</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::IncompleteArrayType">
-    <DisplayString IncludeView="left">{ElementType,view(cpp)}</DisplayString>
-    <DisplayString IncludeView="right">[]</DisplayString>
-    <DisplayString>{ElementType,view(cpp)}[]</DisplayString>
-    <Expand>
-      <ExpandedItem>(clang::ArrayType *)this</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::VariableArrayType">
-    <DisplayString IncludeView="left">{ElementType,view(cpp)}</DisplayString>
-    <DisplayString IncludeView="right">[*]</DisplayString>
-    <DisplayString>{ElementType,view(cpp)}[*]</DisplayString>
-    <Expand>
-      <Item Name="[Size Expression]">(clang::Expr *)SizeExpr</Item>
-      <ExpandedItem>(clang::ArrayType *)this</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::TypedefType">
-    <DisplayString IncludeView="cpp">{Decl,view(name)nd}</DisplayString>
-    <DisplayString>{Decl}</DisplayString>
-    <Expand>
-      <Item Name="Decl">Decl</Item>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::PointerType">
-    <DisplayString>{PointeeType, view(cpp)} *</DisplayString>
-    <Expand>
-      <Item Name="PointeeType">PointeeType</Item>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::ParenType">
-    <DisplayString>{Inner, view(cpp)}</DisplayString>
-    <Expand>
-      <Item Name="Inner">Inner</Item>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::BitIntType">
-    <DisplayString Condition="!IsUnsigned">signed _BitInt({NumBits})</DisplayString>
-    <DisplayString Condition="!IsUnsigned">unsigned _BitInt({NumBits})(</DisplayString>
-    <Expand>
-      <Item Name="NumBits">NumBits</Item>
-      <ExpandedItem>(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <!-- We visualize all inner types for clang reference types. So a rvalue reference to an lvalue reference
-       to an int  would visual as int &amp; &amp;&amp; This is a little different than GetPointeeType(),
-       but more clearly displays the data structure and seems natural -->
-  <Type Name="clang::LValueReferenceType">
-    <DisplayString>{((clang::ReferenceType *)this)-&gt;PointeeType,view(cpp)} &amp;</DisplayString>
-    <Expand>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-      <Item Name="PointeeType">PointeeType</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::RValueReferenceType">
-    <DisplayString>{((clang::ReferenceType *)this)-&gt;PointeeType,view(cpp)} &amp;&amp;</DisplayString>
-    <Expand>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-      <Item Name="PointeeType">PointeeType</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::AttributedType">
-    <DisplayString>{ModifiedType} Attribute={(clang::AttributedType::Kind)AttributedTypeBits.AttrKind}</DisplayString>
-  </Type>
-
-  <!-- Unfortunately, Visual Studio has trouble seeing the PointerBitMask member PointerIntUnion, so I hardwire it to 2 bits-->
-  <Type Name="clang::DeclContext">
-    <DisplayString>{(clang::Decl::Kind)DeclContextBits.DeclKind,en}Decl</DisplayString>
-    <Expand>
-      <Item Name="DeclKind">(clang::Decl::Kind)DeclContextBits.DeclKind,en</Item>
-      <Synthetic Name="Members">
-        <DisplayString></DisplayString>
-        <Expand>
-          <LinkedListItems>
-            <HeadPointer>FirstDecl</HeadPointer>
-            <NextPointer>(clang::Decl *)(*(intptr_t *)NextInContextAndBits.Value.Data &amp; ~3)</NextPointer>
-            <ValueNode>*this</ValueNode>
-          </LinkedListItems>
-        </Expand>
-      </Synthetic>
-    </Expand>
-  </Type>
-  <Type Name="clang::FieldDecl">
-    <DisplayString>Field {{{*(clang::DeclaratorDecl *)this,view(cpp)nd}}}</DisplayString>
-  </Type>
-  <Type Name="clang::CXXMethodDecl">
-    <DisplayString IncludeView="cpp">{*(clang::FunctionDecl *)this,nd}</DisplayString>
-    <DisplayString>Method {{{*this,view(cpp)}}}</DisplayString>
-  </Type>
-  <Type Name="clang::CXXConstructorDecl">
-    <DisplayString>Constructor {{{Name,view(cpp)}({*(clang::FunctionDecl *)this,view(parm0)nd})}}</DisplayString>
-  </Type>
-  <Type Name="clang::CXXDestructorDecl">
-    <DisplayString>Destructor {{~{Name,view(cpp)}()}}</DisplayString>
-  </Type>
-  <Type Name="clang::TemplateTypeParmDecl">
-    <DisplayString IncludeView="TorC" Condition="Typename">typename</DisplayString>
-    <DisplayString IncludeView="TorC" Condition="!Typename">class</DisplayString>
-    <DisplayString IncludeView="MaybeEllipses" Condition="TypeForDecl == nullptr">(not yet known if parameter pack) </DisplayString>
-    <DisplayString IncludeView="MaybeEllipses" Condition="((clang::TemplateTypeParmType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)TypeForDecl-&gt;CanonicalType.Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType)->CanTTPTInfo.ParameterPack">...</DisplayString>
-    <DisplayString IncludeView="MaybeEllipses" Condition="!((clang::TemplateTypeParmType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)TypeForDecl-&gt;CanonicalType.Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType)->CanTTPTInfo.ParameterPack"></DisplayString>
-    <DisplayString IncludeView="DefaultArg" Condition="(*(uintptr_t *)DefaultArgument.ValueOrInherited.Val.Value.Data &amp; 3LL) == 0">{(TypeSourceInfo *)(*(uintptr_t *)DefaultArgument.ValueOrInherited.Val.Value.Data&amp;~3LL),view(cpp)}</DisplayString>
-    <DisplayString IncludeView="DefaultArg">{{InheritedInitializer}}</DisplayString>
-    <DisplayString IncludeView="Initializer" Condition="*(uintptr_t *)DefaultArgument.ValueOrInherited.Val.Value.Data &amp; 3LL">= {this,view(DefaultArg)na}</DisplayString>
-    <DisplayString IncludeView="Initializer"></DisplayString>
-    <DisplayString>{*this,view(TorC)} {*this,view(MaybeEllipses)}{Name,view(cpp)} {this,view(Initializer)na}</DisplayString>
-  </Type>
-  <Type Name="clang::TemplateDecl">
-    <DisplayString IncludeView="cpp">{*TemplatedDecl,view(cpp)}</DisplayString>
-    <DisplayString>template{TemplateParams,na} {*TemplatedDecl};</DisplayString>
-    <Expand>
-      <Item Name="TemplateParams">TemplateParams,na</Item>
-      <Item Name="TemplatedDecl">TemplatedDecl,na</Item>
-    </Expand>
-  </Type>
-  <!-- Unfortunately, visualization of PointerIntPair<PointerUnion> doesn't work due to limitations in natvis, so we will barehad it-->
-  <Type Name="clang::TypedefNameDecl">
-    <DisplayString Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 4)==0" IncludeView="type">{(clang::TypeSourceInfo *)(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; ~7LL),view(cpp)na}</DisplayString>
-    <DisplayString Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 4)!=0" IncludeView="type">{(clang::TypedefNameDecl::ModedTInfo *)(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; ~7LL),view(cpp)na}</DisplayString>
-    <DisplayString IncludeView="name">{(TypeDecl *)this,view(cpp)nand}</DisplayString>
-    <DisplayString>typedef {this,view(type)na} {this,view(name)na};</DisplayString>
-    <Expand>
-      <Item Name="IsTransparent" Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 1)==0">"Not yet calculated",sb</Item>
-      <Item Name="IsTransparent" Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 1)!=0">(bool)(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 2)</Item>
-      <Item Name="TypeSourceInfo" Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 4)==0">(clang::TypeSourceInfo *)(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; ~7LL)</Item>
-      <Item Name="ModedTInfo" Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 4)!=0">(clang::TypedefNameDecl::ModedTInfo *)(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; ~7LL)</Item>
-      <ExpandedItem>(TypeDecl *)this,nd</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::TypeAliasDecl">
-    <DisplayString IncludeView="cpp">{(TypedefNameDecl *)this,view(name)nand}</DisplayString>
-    <DisplayString>using {(TypedefNameDecl *)this,view(name)nand} = {(TypedefNameDecl *)this,view(type)nand}</DisplayString>
-  </Type>
-  <Type Name="clang::AssumedTemplateStorage">
-    <DisplayString>{Name}</DisplayString>
-  </Type>
-  <Type Name="clang::UncommonTemplateNameStorage::BitsTag">
-    <DisplayString>Kind={(UncommonTemplateNameStorage::Kind)Kind,en}, Size={Size}</DisplayString>
-    <Expand>
-      <Item Name="Kind">(UncommonTemplateNameStorage::Kind)Kind</Item>
-      <Item Name="Size">Size</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::UncommonTemplateNameStorage">
-    <DisplayString IncludeView="cmn">{Bits},</DisplayString>
-    <DisplayString Condition="Bits.Kind==UncommonTemplateNameStorage::Overloaded">{this,view(cmn)na},{(OverloadedTemplateStorage*)this,na}</DisplayString>
-    <DisplayString Condition="Bits.Kind==UncommonTemplateNameStorage::Assumed">{this,view(cmn)na},{(AssumedTemplateStorage*)this,na}</DisplayString>
-    <DisplayString Condition="Bits.Kind==UncommonTemplateNameStorage::SubstTemplateTemplateParm">{this,view(cmn)na},{(SubstTemplateTemplateParmStorage*)this,na}</DisplayString>
-    <DisplayString Condition="Bits.Kind==UncommonTemplateNameStorage::SubstTemplateTemplateParmPack">{this,view(cmn)na},{(SubstTemplateTemplateParmPackStorage*)this,na}</DisplayString>
-    <DisplayString>{this,view(cmn)na}</DisplayString>
-    <Expand>
-      <Item Name="Bits">Bits</Item>
-      <ExpandedItem Condition="Bits.Kind==UncommonTemplateNameStorage::Overloaded">(OverloadedTemplateStorage*)this</ExpandedItem>
-      <ExpandedItem Condition="Bits.Kind==UncommonTemplateNameStorage::Assumed">(AssumedTemplateStorage*)this</ExpandedItem>
-      <ExpandedItem Condition="Bits.Kind==UncommonTemplateNameStorage::SubstTemplateTemplateParm">(SubstTemplateTemplateParmStorage*)this</ExpandedItem>
-      <ExpandedItem Condition="Bits.Kind==UncommonTemplateNameStorage::SubstTemplateTemplateParmPack">(SubstTemplateTemplateParmPackStorage*)this</ExpandedItem>
-    </Expand>
-  </Type>
-  <!-- clang::TemplateName::StorageType -->
-  <Type Name="llvm::PointerUnion&lt;clang::TemplateDecl *, clang::UncommonTemplateNameStorage *,
-                          clang::QualifiedTemplateName *, clang::DependentTemplateName *&gt;">
-    <!-- Expand this out by hand to get cpp view -->
-    <DisplayString Condition="(Val.Value &amp;3) == 0" IncludeView="cpp">
-      {(clang::TemplateDecl *)(Val.Value &amp; ~3LL),view(cpp)na}
-    </DisplayString>
-    <DisplayString Condition="(Val.Value &amp;3) == 0">
-      {(clang::TemplateDecl *)(Val.Value &amp; ~3LL),na}
-    </DisplayString>
-    <DisplayString Condition="(Val.Value &amp;3) == 1" IncludeView="cpp">
-      {(clang::UncommonTemplateNameStorage *)(Val.Value &amp; ~3LL),view(cpp)na}
-    </DisplayString>
-    <DisplayString Condition="(Val.Value &amp;3) == 1">
-      {(clang::UncommonTemplateNameStorage *)(Val.Value &amp; ~3LL),na}
-    </DisplayString>
-    <DisplayString Condition="(Val.Value &amp;3) == 2" IncludeView="cpp">
-      {(clang::QualifiedTemplateName *)(Val.Value &amp; ~3LL),view(cpp)na}
-    </DisplayString>
-    <DisplayString Condition="(Val.Value &amp;3) == 2">
-      {(clang::QualifiedTemplateName *)(Val.Value &amp; ~3LL),na}
-    </DisplayString>
-    <DisplayString Condition="(Val.Value &amp;3) == 3" IncludeView="cpp">
-      {(clang::DependentTemplateName *)(Val.Value &amp; ~3LL),view(cpp)na}
-    </DisplayString>
-    <DisplayString Condition="(Val.Value &amp;3) == 3">
-      {(clang::DependentTemplateName *)(Val.Value &amp; ~3LL),na}
-    </DisplayString>
-    <Expand>
-      <Item Name="[Holds]" Condition="(Val.Value &amp;3) == 0">"TemplateDecl",s8b</Item>
-      <Item Name="[Ptr]" Optional="true"  Condition="(Val.Value &amp;3) == 0">
-        (clang::TemplateDecl *)(Val.Value &amp; ~3LL)
-      </Item>
-      <Item Name="[Holds]" Condition="(Val.Value &amp;3) == 1">"UncommonTemplateNameStorage",s8b</Item>
-      <Item Name="[Ptr]" Optional="true"  Condition="(Val.Value &amp;3) == 1">
-        (clang::UncommonTemplateNameStorage *)(Val.Value &amp; ~3LL)
-      </Item>
-      <Item Name="[Holds]" Condition="(Val.Value &amp;3) == 2">"QualifiedTemplateName",s8b</Item>
-      <Item Name="[Ptr]" Optional="true"  Condition="(Val.Value &amp;3) == 2">
-        (clang::QualifiedTemplateName *)(Val.Value &amp; ~3LL)
-      </Item>
-      <Item Name="[Holds]" Condition="(Val.Value &amp;3) == 3">"DependentTemplateName",s8b</Item>
-      <Item Name="[Ptr]" Optional="true"  Condition="(Val.Value &amp;3) == 3">
-        (clang::DependentTemplateName *)(Val.Value &amp; ~3LL)
-      </Item>
-      <Item Name="[Val]">Val</Item>
-
-    </Expand>
-  </Type>
-  <Type Name="clang::TemplateName">
-    <DisplayString IncludeView="cpp">{Storage,view(cpp)na}</DisplayString>
-    <DisplayString>{Storage,na}</DisplayString>
-    <Expand>
-      <ExpandedItem>Storage</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::NamedDecl" >
-    <DisplayString IncludeView="cpp">{Name,view(cpp)}</DisplayString>
-    <DisplayString>{Name}</DisplayString>
-  </Type>
-  <Type Name="clang::TagDecl">
-    <DisplayString IncludeView="implicit" Condition="Implicit">implicit{" ",sb}</DisplayString>
-    <DisplayString IncludeView="implicit"></DisplayString>
-    <DisplayString IncludeView="modifiers">{*this,view(implicit)nd}</DisplayString>
-    <DisplayString IncludeView="cpp">{*this,view(modifiers)}{Name,view(cpp)}</DisplayString>
-    <DisplayString Condition="TagDeclBits.TagDeclKind==clang::TagTypeKind::Struct">{*this,view(modifiers)nd}struct {Name,view(cpp)}</DisplayString>
-    <DisplayString Condition="TagDeclBits.TagDeclKind==clang::TagTypeKind::Interface">{*this,view(modifiers)nd}interface {Name,view(cpp)}</DisplayString>
-    <DisplayString Condition="TagDeclBits.TagDeclKind==clang::TagTypeKind::Union">{*this,view(modifiers)nd}union {Name,view(cpp)}</DisplayString>
-    <DisplayString Condition="TagDeclBits.TagDeclKind==clang::TagTypeKind::Class">{*this,view(modifiers)nd}class {Name,view(cpp)}</DisplayString>
-    <DisplayString Condition="TagDeclBits.TagDeclKind==clang::TagTypeKind::Enum">{*this,view(modifiers)nd}enum {Name,view(cpp)}</DisplayString>
-    <Expand>
-      <ExpandedItem>(clang::DeclContext *)this</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::TagType">
-    <DisplayString IncludeView="cpp">{decl,view(cpp)na}</DisplayString>
-    <DisplayString>{*decl}</DisplayString>
-    <Expand>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-      <Item Name="decl">decl</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::RecordType">
-    <DisplayString IncludeView="cpp">{(clang::TagType *)this,view(cpp)na}</DisplayString>
-    <DisplayString>{(clang::TagType *)this,na}</DisplayString>
-    <Expand>
-      <Item Name="TagType">*(clang::TagType *)this</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::SubstTemplateTypeParmType">
-    <DisplayString>{{{*Replaced,view(cpp)} &lt;= {CanonicalType,view(cpp)}}}</DisplayString>
-    <Expand>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-      <Item Name="Replaced">*Replaced</Item>
-    </Expand>
-  </Type>
-  <!-- We only show the first 5 parameter types in the display string (can't figure out how to loop in DisplayString)
-       but the expansion has all parameters -->
-  <Type Name="clang::FunctionProtoType">
-    <DisplayString IncludeView="left" Condition="FunctionTypeBits.HasTrailingReturn"></DisplayString>
-    <DisplayString IncludeView="left">{ResultType,view(cpp)}</DisplayString>
-    <DisplayString IncludeView="parm0" Condition="FunctionTypeBits.NumParams==0"></DisplayString>
-    <DisplayString IncludeView="parm0">{*(clang::QualType *)(this+1),view(cpp)}{*this,view(parm1)}</DisplayString>
-    <DisplayString IncludeView="parm1" Condition="FunctionTypeBits.NumParams==1"></DisplayString>
-    <DisplayString IncludeView="parm1">, {*((clang::QualType *)(this+1)+1),view(cpp)}{*this,view(parm2)}</DisplayString>
-    <DisplayString IncludeView="parm2" Condition="FunctionTypeBits.NumParams==2"></DisplayString>
-    <DisplayString IncludeView="parm2">, {*((clang::QualType *)(this+1)+2),view(cpp)}{*this,view(parm3)}</DisplayString>
-    <DisplayString IncludeView="parm3" Condition="FunctionTypeBits.NumParams==3"></DisplayString>
-    <DisplayString IncludeView="parm3">, {*((clang::QualType *)(this+1)+3),view(cpp)}{*this,view(parm4)}</DisplayString>
-    <DisplayString IncludeView="parm4" Condition="FunctionTypeBits.NumParams==4"></DisplayString>
-    <DisplayString IncludeView="parm4">, {*((clang::QualType *)(this+1)+4),view(cpp)}{*this,view(parm5)}</DisplayString>
-    <DisplayString IncludeView="parm5" Condition="FunctionTypeBits.NumParams==5"></DisplayString>
-    <DisplayString IncludeView="parm5">, /* expand for more params */</DisplayString>
-    <DisplayString IncludeView="right" Condition="FunctionTypeBits.HasTrailingReturn">({*this,view(parm0)}) -&gt; {ResultType,view(cpp)}</DisplayString>
-    <DisplayString IncludeView="right">({*this,view(parm0)})</DisplayString>
-    <DisplayString>{this,view(left)na}{this,view(right)na}</DisplayString>
-    <Expand>
-      <Item Name="ResultType">ResultType</Item>
-      <Synthetic Name="Parameter Types">
-        <DisplayString>{*this,view(parm0)}</DisplayString>
-        <Expand>
-          <ArrayItems>
-            <Size>FunctionTypeBits.NumParams</Size>
-            <ValuePointer>(clang::QualType *)(this+1)</ValuePointer>
-          </ArrayItems>
-        </Expand>
-      </Synthetic>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-
-  <Type Name="clang::AdjustedType">
-    <DisplayString>{OriginalTy} adjusted to {AdjustedTy}</DisplayString>
-    <Expand>
-      <Item Name="OriginalTy">OriginalTy</Item>
-      <Item Name="AdjustedTy">AdjustedTy</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::DecayedType">
-    <DisplayString IncludeView="left">{OriginalTy,view(left)}</DisplayString>
-    <DisplayString IncludeView="right">{OriginalTy,view(right)}</DisplayString>
-    <DisplayString>{OriginalTy}</DisplayString>
-    <Expand>
-      <ExpandedItem>(clang::AdjustedType *)this</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::ElaboratedType">
-    <DisplayString IncludeView="left">{NamedType,view(left)}</DisplayString>
-    <DisplayString IncludeView="right">{NamedType,view(right)}</DisplayString>
-    <DisplayString>{NamedType}</DisplayString>
-    <Expand>
-      <Item Name="[Keyword]">(clang::ElaboratedTypeKeyword)TypeWithKeywordBits.Keyword</Item>
-      <Item Name="[Nested Name Specifier]">NNS</Item>
-      <Item Name="[Underlying Type]">NamedType,view(cmn)</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::TemplateTypeParmType">
-    <DisplayString IncludeView="cpp" Condition="((clang::TemplateTypeParmType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)CanonicalType.Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType) != this">{TTPDecl->Name,view(cpp)}</DisplayString>
-    <DisplayString Condition="((clang::TemplateTypeParmType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)CanonicalType.Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType) != this">Non-canonical: {*TTPDecl}</DisplayString>
-    <DisplayString>Canonical: {CanTTPTInfo}</DisplayString>
-    <Expand>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::InjectedClassNameType">
-    <DisplayString>{Decl,view(cpp)}</DisplayString>
-    <Expand>
-      <Item Name="Decl">Decl</Item>
-      <Item Name="InjectedType">InjectedType</Item>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::DependentNameType">
-    <DisplayString>{NNS}{Name,view(cpp)na}</DisplayString>
-    <Expand>
-      <Item Name="NNS">NNS</Item>
-      <Item Name="Name">Name</Item>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::NestedNameSpecifier">
-    <DisplayString Condition="!Specifier"></DisplayString>
-    <DisplayString Condition="((*(uintptr_t *)Prefix.Value.Data&gt;&gt;1)&amp;3) == 0">{(IdentifierInfo*)Specifier,view(cpp)na}::</DisplayString>
-    <DisplayString Condition="((*(uintptr_t *)Prefix.Value.Data&gt;&gt;1)&amp;3) == 1">{(NamedDecl*)Specifier,view(cpp)na}::</DisplayString>
-    <DisplayString Condition="((*(uintptr_t *)Prefix.Value.Data&gt;&gt;1)&amp;3) == 2">{(Type*)Specifier,view(cpp)na}::</DisplayString>
-    <Expand>
-      <Item Name="Kind">(NestedNameSpecifier::StoredSpecifierKind)((*(uintptr_t *)Prefix.Value.Data&gt;&gt;1)&amp;3)</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::PackExpansionType">
-    <DisplayString>{Pattern}</DisplayString>
-    <Expand>
-      <Item Name="Pattern">Pattern</Item>
-      <Item Name="NumExpansions">NumExpansions</Item>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::QualType">
-    <DisplayString IncludeView="poly">{((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType,view(poly)}{*this,view(fastQuals)}</DisplayString>
-    <DisplayString IncludeView="cpp">{((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType,view(cpp)}{*this,view(fastQuals)}</DisplayString>
-    <DisplayString IncludeView="left">{((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType,view(left)}{*this,view(fastQuals)}</DisplayString>
-    <DisplayString IncludeView="right">{((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType,view(right)}{*this,view(fastQuals)}</DisplayString>
-    <!-- For the Fast Qualifiers, it is simpler (and probably more efficient) just to list all 8 cases than create
-          views for each qualifier. TODO: Non-fast qualifiers -->
-    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==0"></DisplayString>
-    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==1">{" ",sb}const</DisplayString>
-    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==2">{" ",sb}restrict</DisplayString>
-    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==3">{" ",sb}const restrict</DisplayString>
-    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==4">{" ",sb}volatile</DisplayString>
-    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==5">{" ",sb}const volatile</DisplayString>
-    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==6">{" ",sb}volatile restrict</DisplayString>
-    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==7">{" ",sb}const volatile restrict</DisplayString>
-    <DisplayString IncludeView="fastQuals">Cannot visualize non-fast qualifiers</DisplayString>
-    <DisplayString Condition="(*(uintptr_t *)Value.Value.Data) == 0">Null</DisplayString>
-    <DisplayString>{((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType,na}{*this,view(fastQuals)}</DisplayString>
-    <Expand>
-      <Item Name="Fast Quals">*this,view(fastQuals)</Item>
-      <ExpandedItem>((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType</ExpandedItem>
-    </Expand>
-
-  </Type>
-  <Type Name="clang::LocInfoType">
-    <DisplayString IncludeView="cpp">{DeclInfo,view(cpp)na}</DisplayString>
-    <DisplayString>{DeclInfo,na}</DisplayString>
-    <Expand>
-      <Item Name="DeclInfo">DeclInfo</Item>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::TypeSourceInfo">
-    <DisplayString IncludeView="cpp">{Ty,view(cpp)}</DisplayString>
-    <DisplayString>{Ty}</DisplayString>
-    <Expand>
-      <ExpandedItem>Ty</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::TypeLoc">
-    <DisplayString>{(QualType *)&amp;Ty,na}</DisplayString>
-    <Expand>
-      <Item Name="Ty">(QualType *)&amp;Ty</Item>
-      <Item Name="Data">Data</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::TypeLocBuilder">
-    <DisplayString Optional="true" Condition="LastTy.Value.Value==0">Not building anything</DisplayString>
-    <DisplayString Optional="true">Building a {LastTy}</DisplayString>
-  </Type>
-  <Type Name="clang::TemplateArgumentLoc">
-    <DisplayString IncludeView="cpp">{Argument,view(cpp)}</DisplayString>
-    <DisplayString>{Argument}</DisplayString>
-  </Type>
-  <Type Name="clang::TemplateArgument">
-    <DisplayString IncludeView="cpp" Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Type">{*(clang::QualType *)&amp;TypeOrValue.V,view(cpp)}</DisplayString>
-    <DisplayString Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Type">{(clang::TemplateArgument::ArgKind)TypeOrValue.Kind,en} template argument: {*(clang::QualType *)&amp;TypeOrValue.V}</DisplayString>
-    <DisplayString IncludeView="arg0" Condition="Args.NumArgs==0"></DisplayString>
-    <DisplayString IncludeView="arg0">{Args.Args[0]}{*this,view(arg1)}</DisplayString>
-    <DisplayString IncludeView="arg1" Condition="Args.NumArgs==1"></DisplayString>
-    <DisplayString IncludeView="arg1">, {Args.Args[1]}{*this,view(arg2)}</DisplayString>
-    <DisplayString IncludeView="arg2" Condition="Args.NumArgs==2"></DisplayString>
-    <DisplayString IncludeView="arg2">, {Args.Args[2]}, ...</DisplayString>
-    <DisplayString IncludeView="arg0cpp" Condition="Args.NumArgs==0"></DisplayString>
-    <DisplayString IncludeView="arg0cpp">{Args.Args[0],view(cpp)}{*this,view(arg1cpp)}</DisplayString>
-    <DisplayString IncludeView="arg1cpp" Condition="Args.NumArgs==1"></DisplayString>
-    <DisplayString IncludeView="arg1cpp">, {Args.Args[1],view(cpp)}{*this,view(arg2cpp)}</DisplayString>
-    <DisplayString IncludeView="arg2cpp" Condition="Args.NumArgs==2"></DisplayString>
-    <DisplayString IncludeView="arg2cpp">, {Args.Args[2],view(cpp)}, ...</DisplayString>
-    <DisplayString IncludeView="cpp" Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Pack">{*this,view(arg0cpp)}</DisplayString>
-    <DisplayString Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Pack">{*this,view(arg0)}</DisplayString>
-    <DisplayString Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Expression">{(clang::Expr *)TypeOrValue.V,view(cpp)na}</DisplayString>
-    <DisplayString>{(clang::TemplateArgument::ArgKind)TypeOrValue.Kind,en}</DisplayString>
-    <Expand>
-      <Item Name="QualType" Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Type">*(clang::QualType *)&amp;TypeOrValue.V</Item>
-      <Item Name="Expression" Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Expression">(clang::Expr *)TypeOrValue.V</Item>
-      <ArrayItems Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Pack">
-        <Size>Args.NumArgs</Size>
-        <ValuePointer>Args.Args</ValuePointer>
-      </ArrayItems>
-      <!-- TODO: Other kinds-->
-    </Expand>
-  </Type>
-  <Type Name="clang::TemplateArgumentListInfo">
-    <DisplayString IncludeView ="elt0" Condition="Arguments.Size == 0"></DisplayString>
-    <DisplayString IncludeView ="elt0">{((TemplateArgumentLoc*)Arguments.BeginX)[0],view(cpp)}{*this,view(elt1)}</DisplayString>
-    <DisplayString IncludeView ="elt1" Condition="Arguments.Size == 1"></DisplayString>
-    <DisplayString IncludeView ="elt1">, {((TemplateArgumentLoc*)Arguments.BeginX)[1],view(cpp)}{*this,view(elt2)}</DisplayString>
-    <DisplayString IncludeView ="elt2" Condition="Arguments.Size == 2"></DisplayString>
-    <DisplayString IncludeView ="elt2">, {((TemplateArgumentLoc*)Arguments.BeginX)[2],view(cpp)}{*this,view(elt3)}</DisplayString>
-    <DisplayString IncludeView ="elt3" Condition="Arguments.Size == 3"></DisplayString>
-    <DisplayString IncludeView ="elt3">, {((TemplateArgumentLoc*)Arguments.BeginX)[3],view(cpp)}{*this,view(elt4)}</DisplayString>
-    <DisplayString IncludeView ="elt4" Condition="Arguments.Size == 4"></DisplayString>
-    <DisplayString IncludeView ="elt4">, ...</DisplayString>
-    <DisplayString Condition="Arguments.Size == 0">empty</DisplayString>
-    <DisplayString Condition="Arguments.Size != 0">&lt;{*this,view(elt0)}&gt;</DisplayString>
-    <DisplayString>Uninitialized</DisplayString>
-  </Type>
-  <Type Name="clang::TemplateArgumentList">
-    <DisplayString IncludeView="arg0" Condition="NumArguments==0"></DisplayString>
-    <DisplayString IncludeView="arg0">{Arguments[0],view(cpp)}{*this,view(arg1)}</DisplayString>
-    <DisplayString IncludeView="arg1" Condition="NumArguments==1"></DisplayString>
-    <DisplayString IncludeView="arg1">, {Arguments[1],view(cpp)}{*this,view(arg2)}</DisplayString>
-    <DisplayString IncludeView="arg2" Condition="NumArguments==2"></DisplayString>
-    <DisplayString IncludeView="arg2">, {Arguments[1],view(cpp)}, ...</DisplayString>
-    <DisplayString>&lt;{*this,view(arg0)}&gt;</DisplayString>
-    <Expand>
-      <Item Name="NumArguments">NumArguments</Item>
-      <ArrayItems>
-        <Size>NumArguments</Size>
-        <ValuePointer>Arguments</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-  <Type Name="llvm::ArrayRef&lt;clang::TemplateArgument&gt;">
-    <DisplayString IncludeView="arg0" Condition="Length==0"></DisplayString>
-    <DisplayString IncludeView="arg0">{Data[0],view(cpp)}{*this,view(arg1)}</DisplayString>
-    <DisplayString IncludeView="arg1" Condition="Length==1"></DisplayString>
-    <DisplayString IncludeView="arg1">, {Data[1],view(cpp)}{*this,view(arg2)}</DisplayString>
-    <DisplayString IncludeView="arg2" Condition="Length==2"></DisplayString>
-    <DisplayString IncludeView="arg2">, {Data[2],view(cpp)}, ...</DisplayString>
-    <DisplayString>&lt;{*this,view(arg0)}&gt;</DisplayString>
-    <Expand>
-      <Item Name="Length">Length</Item>
-      <Synthetic Name="Data">
-        <Expand>
-          <ArrayItems>
-            <Size>Length</Size>
-            <ValuePointer>Data</ValuePointer>
-          </ArrayItems>
-        </Expand>
-      </Synthetic>
-    </Expand>
-  </Type>
-  <Type Name="clang::MultiLevelTemplateArgumentList">
-    <DisplayString IncludeView="level0" Condition="(llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.EndX - (llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX==0"></DisplayString>
-    <DisplayString IncludeView="level0">{((llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX)[0],view(cpp)}{*this,view(level1)}</DisplayString>
-    <DisplayString IncludeView="level1" Condition="(llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.EndX - (llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX==1"></DisplayString>
-    <DisplayString IncludeView="level1">::{((llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX)[1],view(cpp)}{*this,view(level2)}</DisplayString>
-    <DisplayString IncludeView="level2" Condition="(llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.EndX - (llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX==2"></DisplayString>
-    <DisplayString IncludeView="level2">::{((llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX)[2],view(cpp)}, ...</DisplayString>
-    <DisplayString>{*this,view(level0)}</DisplayString>
-    <Expand>
-      <Item Name="TemplateList">TemplateArgumentLists</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::ParsedTemplateArgument">
-    <DisplayString Condition="Kind==clang::ParsedTemplateArgument::Type" IncludeView="cpp">{(clang::QualType *)Arg,view(cpp)na}</DisplayString>
-    <DisplayString Condition="Kind==clang::ParsedTemplateArgument::Type">Type template argument: {*(clang::QualType *)Arg}</DisplayString>
-    <DisplayString Condition="Kind==clang::ParsedTemplateArgument::NonType">Non-type template argument: {*(clang::Expr *)Arg}</DisplayString>
-    <DisplayString Condition="Kind==clang::ParsedTemplateArgument::Template">Template template argument: {*(clang::TemplateName *)Arg</DisplayString>
-    <Expand>
-      <Item Name="Kind">Kind,en</Item>
-      <Item Name="Arg" Condition="Kind==clang::ParsedTemplateArgument::Type">(clang::QualType *)Arg</Item>
-      <Item Name="Arg" Condition="Kind==clang::ParsedTemplateArgument::NonType">(clang::Expr *)Arg</Item>
-      <Item Name="Arg" Condition="Kind==clang::ParsedTemplateArgument::Template">(clang::TemplateName *)Arg</Item>
-    </Expand>
-  </Type>
-  <!-- Builtin types that have C++ keywords are manually displayed as that keyword. Otherwise, just use the enum name -->
-  <Type Name="clang::BuiltinType">
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Void">void</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Bool">bool</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Char_U">char</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::UChar">unsigned char</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::WChar_U">wchar_t</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Char16">char16_t</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Char32">char32_t</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::UShort">unsigned short</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::UInt">unsigned int</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::ULong">unsigned long</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::ULongLong">unsigned long long</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::UInt128">__uint128_t</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Char_S">char</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::SChar">signed char</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::WChar_S">wchar_t</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Short">short</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Int">int</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Long">long</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::LongLong">long long</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Int128">__int128_t</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Half">__fp16</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Float">float</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Double">double</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::LongDouble">long double</DisplayString>
-    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::NullPtr">nullptr_t</DisplayString>
-    <DisplayString>{(clang::BuiltinType::Kind)BuiltinTypeBits.Kind, en}</DisplayString>
-    <Expand>
-      <Item Name="Kind">(clang::BuiltinType::Kind)BuiltinTypeBits.Kind</Item>
-    </Expand>
-  </Type>
-
-  <Type Name="clang::TemplateSpecializationType">
-    <DisplayString IncludeView="arg0" Condition="TemplateSpecializationTypeBits.NumArgs==0"></DisplayString>
-    <DisplayString IncludeView="arg0">{((clang::TemplateArgument *)(this+1))[0],view(cpp)}{*this,view(arg1)}</DisplayString>
-    <DisplayString IncludeView="arg1" Condition="TemplateSpecializationTypeBits.NumArgs==1"></DisplayString>
-    <DisplayString IncludeView="arg1">, {((clang::TemplateArgument *)(this+1))[1],view(cpp)}{*this,view(arg2)}</DisplayString>
-    <DisplayString IncludeView="arg2" Condition="TemplateSpecializationTypeBits.NumArgs==2"></DisplayString>
-    <DisplayString IncludeView="arg2">, {((clang::TemplateArgument *)(this+1))[2],view(cpp)}{*this,view(arg3)}</DisplayString>
-    <DisplayString Condition="(Template.Storage.Val.Value &amp; 3) == 0">
-      {*((clang::TemplateDecl *)(Template.Storage.Val.Value))->TemplatedDecl,view(cpp)}&lt;{*this,view(arg0)}&gt;
-    </DisplayString>
-    <DisplayString>Can't visualize this TemplateSpecializationType</DisplayString>
-    <Expand>
-      <Item Name="Template">Template.Storage</Item>
-      <ArrayItems>
-        <Size>TemplateSpecializationTypeBits.NumArgs</Size>
-        <ValuePointer>(clang::TemplateArgument *)(this+1)</ValuePointer>
-      </ArrayItems>
-      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::DeducedType">
-    <Expand>
-      <Item Name="isDeduced">(CanonicalType.Value.Value != this) || TypeBits.Dependent</Item>
-      <ExpandedItem>*(clang::Type *)this,view(cmn)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::DeducedTemplateSpecializationType">
-    <DisplayString Condition="(CanonicalType.Value.Value != this) || TypeBits.Dependent">{CanonicalType,view(cpp)}</DisplayString>
-    <DisplayString IncludeView="cpp">{Template,view(cpp)}</DisplayString>
-    <DisplayString>{Template}</DisplayString>
-    <Expand>
-      <Item Name="Template">Template</Item>
-      <Item Name="Deduced As" Condition="(CanonicalType.Value.Value != this) || TypeBits.Dependent">CanonicalType,view(cpp)</Item>
-      <ExpandedItem>(clang::DeducedType *)this</ExpandedItem>
-      <Item Name="Template">Template</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::ClassTemplateSpecializationDecl">
-    <DisplayString>{*(CXXRecordDecl *)this,nd}{*TemplateArgs}</DisplayString>
-    <Expand>
-      <ExpandedItem>(CXXRecordDecl *)this,nd</ExpandedItem>
-      <Item Name="TemplateArgs">TemplateArgs</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::IdentifierInfo">
-    <DisplayString Condition="Entry != 0">{((llvm::StringMapEntry&lt;clang::IdentifierInfo *&gt;*)Entry)+1,sb}</DisplayString>
-    <Expand>
-      <Item Condition="Entry != 0" Name="[Identifier]">((llvm::StringMapEntry&lt;clang::IdentifierInfo *&gt;*)Entry)+1,s</Item>
-      <Item Name="Token Kind">(clang::tok::TokenKind)TokenID</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::DeclarationName">
-    <DisplayString Condition="Ptr == 0" IncludeView="cpp"></DisplayString>
-    <DisplayString Condition="Ptr == 0">Empty</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredIdentifier" IncludeView="cpp">{*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask)}</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredIdentifier">{{Identifier ({*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask)})}}</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredObjCZeroArgSelector">{{ObjC Zero Arg Selector (*{(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask)})}}</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredObjCOneArgSelector">{{ObjC One Arg Selector (*{(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask)})}}</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredCXXConstructorName" IncludeView="cpp">{(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask),view(cpp)na}</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredCXXConstructorName">C++ Constructor {{{(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask),view(cpp)na}}}</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredCXXDestructorName">C++ Destructor {{*(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask)}}</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredCXXConversionFunctionName">C++ Conversion function {{*(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask)}}</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredCXXOperatorName">C++ Operator {{*(clang::detail::CXXOperatorIdName *)(Ptr &amp; ~PtrMask)}}</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredDeclarationNameExtra"
-                   IncludeView="cpp">{*(clang::detail::DeclarationNameExtra *)(Ptr &amp; ~PtrMask),view(cpp)}</DisplayString>
-    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredDeclarationNameExtra">{{Extra ({*(clang::detail::DeclarationNameExtra *)(Ptr &amp; ~PtrMask)})}}</DisplayString>
-    <Expand>
-      <Item Name="Kind">StoredNameKind(Ptr &amp; PtrMask),en</Item>
-      <Item Condition="(Ptr &amp; PtrMask) == StoredIdentifier" Name="[Identifier]">*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask),na</Item>
-      <Item Condition="(Ptr &amp; PtrMask) == StoredObjCZeroArgSelector" Name="[ObjC Zero Arg Selector]">*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask),na</Item>
-      <Item Condition="(Ptr &amp; PtrMask) == StoredObjCOneArgSelector" Name="[ObjC One Arg Selector]">*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask),na</Item>
-      <Item Condition="(Ptr &amp; PtrMask) == StoredCXXConstructorName" Name="[C++ Constructor]">*(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask),na</Item>
-      <Item Condition="(Ptr &amp; PtrMask) == StoredCXXDestructorName" Name="[C++ Destructor]">*(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask),na</Item>
-      <Item Condition="(Ptr &amp; PtrMask) == StoredCXXConversionFunctionName" Name="[C++ Conversion function]">*(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask),na</Item>
-      <Item Condition="(Ptr &amp; PtrMask) == StoredCXXOperatorName" Name="[C++ Operator]">*(clang::detail::CXXOperatorIdName *)(Ptr &amp; ~PtrMask),na</Item>
-      <Item Condition="(Ptr &amp; PtrMask) == StoredDeclarationNameExtra" Name="[Extra]">(clang::detail::DeclarationNameExtra *)(Ptr &amp; ~PtrMask),na</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::detail::DeclarationNameExtra">
-    <DisplayString Condition="ExtraKindOrNumArgs == CXXDeductionGuideName" IncludeView="cpp">
-      {(CXXDeductionGuideNameExtra *)this,view(cpp)nand}
-    </DisplayString>
-    <DisplayString Condition="ExtraKindOrNumArgs == CXXDeductionGuideName">
-      {(CXXDeductionGuideNameExtra *)this,nand}
-    </DisplayString>
-    <DisplayString Condition="ExtraKindOrNumArgs == CXXLiteralOperatorName">C++ Literal operator</DisplayString>
-    <DisplayString Condition="ExtraKindOrNumArgs == CXXUsingDirective">C++ Using directive</DisplayString>
-    <DisplayString Condition="ExtraKindOrNumArgs == ObjCMultiArgSelector">Objective-C MultiArg selector</DisplayString>
-    <DisplayString>{(clang::detail::DeclarationNameExtra::ExtraKind)ExtraKindOrNumArgs,en}{"  ",sb}{*this,view(cpp)}</DisplayString>
-    <Expand>
-      <ExpandedItem Condition="ExtraKindOrNumArgs == CXXDeductionGuideName">(CXXDeductionGuideNameExtra *)this</ExpandedItem>
-      <Item Name="ExtraKindOrNumArgs" Condition="ExtraKindOrNumArgs != CXXDeductionGuideName">ExtraKindOrNumArgs</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::detail::CXXDeductionGuideNameExtra">
-    <DisplayString IncludeView="cpp">{Template->TemplatedDecl,view(cpp)}</DisplayString>
-    <DisplayString>C++ Deduction guide for {Template->TemplatedDecl,view(cpp)na}</DisplayString>
-  </Type>
-  <Type Name="clang::detail::CXXSpecialNameExtra">
-    <DisplayString IncludeView="cpp">{Type,view(cpp)}</DisplayString>
-    <DisplayString>{Type}</DisplayString>
-  </Type>
-  <Type Name="clang::DeclarationNameInfo">
-    <DisplayString>{Name}</DisplayString>
-  </Type>
-  <Type Name="clang::TemplateIdAnnotation">
-    <DisplayString IncludeView="arg0" Condition="NumArgs==0"></DisplayString>
-    <DisplayString IncludeView="arg0">{(ParsedTemplateArgument *)(this+1),view(cpp)na}{this,view(arg1)na}</DisplayString>
-    <DisplayString IncludeView="arg1" Condition="NumArgs==1"></DisplayString>
-    <DisplayString IncludeView="arg1">, {((ParsedTemplateArgument *)(this+1))+1,view(cpp)na}{this,view(arg2)na}</DisplayString>
-    <DisplayString IncludeView="arg2" Condition="NumArgs==2"></DisplayString>
-    <DisplayString IncludeView="arg1">, ...</DisplayString>
-    <DisplayString>{Name,na}&lt;{this,view(arg0)na}&gt;</DisplayString>
-    <Expand>
-      <Item Name="Name">Name</Item>
-      <Synthetic Name="Arguments">
-        <DisplayString>{this,view(arg0)na}</DisplayString>
-        <Expand>
-          <ArrayItems>
-            <Size>NumArgs</Size>
-            <ValuePointer>(ParsedTemplateArgument *)(this+1)</ValuePointer>
-          </ArrayItems>
-        </Expand>
-      </Synthetic>
-      <Item Name="Operator">Operator</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::Token">
-    <DisplayString Condition="Kind == clang::tok::annot_template_id">{{annot_template_id ({(clang::TemplateIdAnnotation *)(PtrData),na})}}</DisplayString>
-    <DisplayString Condition="Kind == clang::tok::identifier">{{Identifier ({(clang::IdentifierInfo *)(PtrData),na})}}</DisplayString>
-    <DisplayString>{(clang::tok::TokenKind)Kind,en}</DisplayString>
-  </Type>
-  <Type Name="clang::Lexer">
-    <DisplayString>{BufferPtr,nasb}</DisplayString>
-  </Type>
-  <Type Name="clang::Preprocessor::IncludeStackInfo">
-    <DisplayString Condition="TheLexer._Mypair._Myval2 != 0">{TheLexer._Mypair._Myval2,na}</DisplayString>
-    <DisplayString Condition="TheTokenLexer._Mypair._Myval2 != 0">Expanding Macro: {TheTokenLexer._Mypair._Myval2,na}</DisplayString>
-    <DisplayString></DisplayString>
-  </Type>
-  <Type Name="clang::Preprocessor">
-    <DisplayString IncludeView="cached" Condition="CachedLexPos &lt; CachedTokens.Size">
-      [{(Token *)(CachedTokens.BeginX) + CachedLexPos,na}] {IncludeMacroStack._Mypair._Myval2._Mylast - 1,na}
-    </DisplayString>
-    <DisplayString IncludeView="cached"> {IncludeMacroStack._Mypair._Myval2._Mylast - 1,na}</DisplayString>
-    <DisplayString Condition="CurLexer._Mypair._Myval2 != 0">{CurLexer._Mypair._Myval2,na}</DisplayString>
-    <DisplayString Condition="CurTokenLexer._Mypair._Myval2 != 0">Expanding Macro: {CurTokenLexer._Mypair._Myval2,na}</DisplayString>
-    <!-- Can't use CurLexerCallback because natvis sees the type rather than the variable -->
-    <DisplayString Condition="IncludeMacroStack._Mypair._Myval2._Mylast - IncludeMacroStack._Mypair._Myval2._Myfirst">
-      {this,view(cached)}
-    </DisplayString>
-    <DisplayString>CLK_LexAfterModuleImport</DisplayString>
-  </Type>
-  <Type Name="clang::Parser">
-    <DisplayString>[{Tok}] {PP,na}</DisplayString>
-  </Type>
-  <Type Name="clang::LambdaIntroducer::LambdaCapture">
-    <DisplayString Condition="Kind == LCK_This">this</DisplayString>
-    <DisplayString Condition="Kind == LCK_StarThis">*this</DisplayString>
-    <DisplayString Condition="Kind == LCK_ByCopy">{Id}</DisplayString>
-    <DisplayString Condition="Kind == LCK_ByRef">&amp;{Id}</DisplayString>
-    <DisplayString>No visualizer for {Kind}</DisplayString>
-  </Type>
-  <Type Name="clang::LambdaIntroducer">
-    <DisplayString IncludeView="default" Condition="Default==LCD_None"></DisplayString>
-    <DisplayString IncludeView="default" Condition="Default==LCD_ByCopy">=,</DisplayString>
-    <DisplayString IncludeView="default" Condition="Default==LCD_ByRef">&amp;,</DisplayString>
-    <DisplayString IncludeView="capture0" Condition="Captures.Size==0"></DisplayString>
-    <DisplayString IncludeView="capture0">{(LambdaCapture *)(Captures.BeginX),na}{this,view(capture1)na}</DisplayString>
-    <DisplayString IncludeView="capture1" Condition="Captures.Size==1"></DisplayString>
-    <DisplayString IncludeView="capture1">,{(LambdaCapture *)(Captures.BeginX)+1,na}{this,view(capture2)na}</DisplayString>
-    <DisplayString IncludeView="capture2" Condition="Captures.Size==2"></DisplayString>
-    <DisplayString IncludeView="capture2">,{(LambdaCapture *)(Captures.BeginX)+2,na}{this,view(capture3)na}</DisplayString>
-    <DisplayString IncludeView="capture3" Condition="Captures.Size==3"></DisplayString>
-    <DisplayString IncludeView="capture3">,...</DisplayString>
-    <DisplayString>[{this,view(default)na}{this,view(capture0)na}]</DisplayString>
-  </Type>
-  <Type Name="clang::DeclSpec">
-    <DisplayString IncludeView="extra" Condition="TypeSpecType == TST_typename || TypeSpecType == TST_typeofType || TypeSpecType == TST_underlying_type || TypeSpecType == TST_atomic">
-      , [{TypeRep}]
-    </DisplayString>
-    <DisplayString IncludeView="extra" Condition="TypeSpecType == TST_typeofExpr || TypeSpecType == TST_decltype">
-      , [{ExprRep}]
-    </DisplayString>
-    <DisplayString IncludeView="extra" Condition="TypeSpecType == TST_enum || TypeSpecType == TST_struct || TypeSpecType == TST_interface || TypeSpecType == TST_union || TypeSpecType == TST_class">
-      , [{DeclRep}]
-    </DisplayString>
-    <DisplayString IncludeView="extra"></DisplayString>
-    <DisplayString>[{(clang::DeclSpec::SCS)StorageClassSpec,en}], [{(clang::TypeSpecifierType)TypeSpecType,en}]{this,view(extra)na}</DisplayString>
-    <Expand>
-      <Item Name="StorageClassSpec">(clang::DeclSpec::SCS)StorageClassSpec</Item>
-      <Item Name="TypeSpecType">(clang::TypeSpecifierType)TypeSpecType</Item>
-      <Item Name="TypeRep" Condition="TypeSpecType == TST_typename || TypeSpecType == TST_typeofType || TypeSpecType == TST_underlying_type || TypeSpecType == TST_atomic">
-        TypeRep
-      </Item>
-      <Item Name="ExprRep" Condition="TypeSpecType == TST_typeofExpr || TypeSpecType == TST_decltype">
-        ExprRep
-      </Item>
-      <Item Name="DeclRep" Condition="TypeSpecType == TST_enum || TypeSpecType == TST_struct || TypeSpecType == TST_interface || TypeSpecType == TST_union || TypeSpecType == TST_class">
-        DeclRep
-      </Item>
-
-    </Expand>
-  </Type>
-  <Type Name="clang::PragmaHandler">
-    <DisplayString>{Name,s}</DisplayString>
-  </Type>
-  <Type Name="clang::FileEntry">
-    <DisplayString>{RealPathName,s}</DisplayString>
-  </Type>
-  <Type Name="clang::DirectoryEntry">
-    <DisplayString>{Name,s}</DisplayString>
-  </Type>
-  <Type Name="clang::VarDecl::VarDeclBitfields">
-    <Expand>
-      <Item Name="StorageClass">(clang::StorageClass)SClass</Item>
-      <Item Name="ThreadStorageClass">(clang::ThreadStorageClassSpecifier)TSCSpec</Item>
-      <Item Name="InitStyle">(clang::VarDecl::InitializationStyle)InitStyle</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::DeclaratorDecl">
-    <DisplayString>{DeclType,view(left)} {Name,view(cpp)}{DeclType,view(right)}</DisplayString>
-    <Expand>
-      <Item Name="Name">Name</Item>
-      <Item Name="DeclType">DeclType</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::VarDecl">
-    <DisplayString>{(DeclaratorDecl*)this,nand}</DisplayString>
-    <Expand>
-      <ExpandedItem>(DeclaratorDecl*)this,nd</ExpandedItem>
-      <Item Name="Init">Init</Item>
-      <Item Name="VarDeclBits">VarDeclBits</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::ParmVarDecl">
-    <DisplayString>{*(VarDecl*)this,nd}</DisplayString>
-    <Expand>
-      <Item Name="ParmVarDeclBits">ParmVarDeclBits</Item>
-      <ExpandedItem>*(VarDecl*)this,nd</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::ExplicitSpecifier">
-    <DisplayString Condition="((*(uintptr_t *)ExplicitSpec.Value.Data&gt;&gt;1)&amp;3) == ExplicitSpecKind::ResolvedTrue" IncludeView="cpp">{"explicit ",sb}</DisplayString>
-    <DisplayString Condition="((*(uintptr_t *)ExplicitSpec.Value.Data&gt;&gt;1)&amp;3) == ExplicitSpecKind::ResolvedFalse" IncludeView="cpp"></DisplayString>
-    <DisplayString Condition="((*(uintptr_t *)ExplicitSpec.Value.Data&gt;&gt;1)&amp;3) == ExplicitSpecKind::Unresolved" IncludeView="cpp">explicit({ExplicitSpec,view(ptr)na})</DisplayString>
-    <DisplayString Condition="((*(uintptr_t *)ExplicitSpec.Value.Data)&amp;~7) == 0">{ExplicitSpec,view(int)en}</DisplayString>
-    <DisplayString>{ExplicitSpec,view(int)en} : {ExplicitSpec,view(ptr)na}</DisplayString>
-  </Type>
-  <Type Name="clang::CXXDeductionGuideDecl">
-    <DisplayString>{ExplicitSpec,view(cpp)}{Name,view(cpp)nd}({(FunctionDecl*)this,view(parm0)nand}) -&gt; {((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)(((uintptr_t)DeclType.Value.Value) &amp; ~15))-&gt;BaseType)->ResultType,view(cpp)}</DisplayString>
-    <Expand>
-      <Item Name="ExplicitSpec">ExplicitSpec</Item>
-      <Item Name="IsCopyDeductionCandidate">(bool)FunctionDeclBits.IsCopyDeductionCandidate</Item>
-      <ExpandedItem>(FunctionDecl*)this,nd</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::FunctionDecl">
-    <DisplayString IncludeView="retType">{((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)->ResultType,view(cpp)}</DisplayString>
-    <DisplayString IncludeView="parm0" Condition="0 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
-    <DisplayString IncludeView="parm0">{ParamInfo[0],na}{*this,view(parm1)nd}</DisplayString>
-    <DisplayString IncludeView="parm1" Condition="1 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
-    <DisplayString IncludeView="parm1">, {ParamInfo[1],na}{*this,view(parm2)nd}</DisplayString>
-    <DisplayString IncludeView="parm2" Condition="2 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
-    <DisplayString IncludeView="parm2">, {ParamInfo[2],na}{*this,view(parm3)nd}</DisplayString>
-    <DisplayString IncludeView="parm3" Condition="3 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
-    <DisplayString IncludeView="parm3">, {ParamInfo[3],na}{*this,view(parm4)nd}</DisplayString>
-    <DisplayString IncludeView="parm4" Condition="4 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
-    <DisplayString IncludeView="parm4">, {ParamInfo[4],na}{*this,view(parm5)nd}</DisplayString>
-    <DisplayString IncludeView="parm5" Condition="5 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
-    <DisplayString IncludeView="parm5">, /* expand for more params */</DisplayString>
-    <DisplayString Condition="((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.HasTrailingReturn">
-      auto {Name,view(cpp)nd}({*this,view(parm0)nd}) -&gt; {((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)->ResultType,view(cpp)}
-    </DisplayString>
-    <DisplayString>{this,view(retType)nand} {Name,view(cpp)nd}({*this,view(parm0)nd})</DisplayString>
-    <Expand>
-      <ExpandedItem>(clang::DeclaratorDecl *)this,nd</ExpandedItem>
-      <Item Name="ReturnType">((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)->ResultType</Item>
-      <Synthetic Name="Parameter Types">
-        <DisplayString>{*this,view(parm0)nd}</DisplayString>
-        <Expand>
-          <ArrayItems>
-            <Size>((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams</Size>
-            <ValuePointer>ParamInfo</ValuePointer>
-          </ArrayItems>
-        </Expand>
-      </Synthetic>
-      <Item Name="TemplateOrSpecialization">TemplateOrSpecialization</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::OpaquePtr&lt;*&gt;">
-    <DisplayString>{*($T1*)&amp;Ptr}</DisplayString>
-    <Expand>
-      <ExpandedItem>($T1*)&amp;Ptr</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::UnionOpaquePtr&lt;*&gt;">
-    <DisplayString>{($T1 *)Ptr}</DisplayString>
-    <Expand>
-      <ExpandedItem>($T1 *)Ptr</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::TemplateParameterList">
-    <DisplayString IncludeView="parm0" Condition="NumParams==0"></DisplayString>
-    <DisplayString IncludeView="parm0">{*((NamedDecl **)(this+1))[0],view(cpp)}{*this,view(parm1)}</DisplayString>
-    <DisplayString IncludeView="parm1" Condition="NumParams==1"></DisplayString>
-    <DisplayString IncludeView="parm1">, {*((NamedDecl **)(this+1))[1],view(cpp)}{*this,view(parm2)}</DisplayString>
-    <DisplayString IncludeView="parm2" Condition="NumParams==2"></DisplayString>
-    <DisplayString IncludeView="parm2">, {*((NamedDecl **)(this+1))[2],view(cpp)}{*this,view(parm3)}</DisplayString>
-    <DisplayString IncludeView="parm3" Condition="NumParams==3"></DisplayString>
-    <DisplayString IncludeView="parm3">, {*((NamedDecl **)(this+1))[3],view(cpp)}{*this,view(parm4)}</DisplayString>
-    <DisplayString IncludeView="parm4" Condition="NumParams==4"></DisplayString>
-    <DisplayString IncludeView="parm4">, {*((NamedDecl **)(this+1))[4],view(cpp)}{*this,view(parm5)}</DisplayString>
-    <DisplayString IncludeView="parm5" Condition="NumParams==5"></DisplayString>
-    <DisplayString IncludeView="parm5">, /* Expand for more params */</DisplayString>
-    <DisplayString>&lt;{*this,view(parm0)}&gt;</DisplayString>
-    <Expand>
-      <ArrayItems>
-        <Size>NumParams</Size>
-      <ValuePointer>(NamedDecl **)(this+1)</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-  <Type Name="clang::Stmt">
-    <DisplayString>{(clang::Stmt::StmtClass)StmtBits.sClass,en}</DisplayString>
-    <Expand>
-      <Item Name="Class">(clang::Stmt::StmtClass)StmtBits.sClass,en</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::Expr">
-    <DisplayString Condition="StmtBits.sClass==clang::Stmt::StmtClass::StringLiteralClass" IncludeView="poly">{*(clang::StringLiteral *)this}</DisplayString>
-    <DisplayString>Expression of class {(clang::Stmt::StmtClass)StmtBits.sClass,en} and type {TR,view(cpp)}</DisplayString>
-  </Type>
-  <Type Name="clang::StringLiteral">
-    <Expand>
-      <Item Name="Length">*(unsigned *)(((clang::StringLiteral *)this)+1)</Item>
-      <Item Name="Data" Condition="StringLiteralBits.NumConcatenated==1">(const char *)(((clang::StringLiteral *)this)+1)+4+4,[*(unsigned *)(((clang::StringLiteral *)this)+1)]s8</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::DeclAccessPair">
-    <DisplayString IncludeView="access" Condition="(Ptr&amp;Mask) == clang::AS_public">public</DisplayString>
-    <DisplayString IncludeView="access" Condition="(Ptr&amp;Mask) == clang::AS_protected">protected</DisplayString>
-    <DisplayString IncludeView="access" Condition="(Ptr&amp;Mask) == clang::AS_private">private</DisplayString>
-    <DisplayString IncludeView="access" Condition="(Ptr&amp;Mask) == clang::AS_none"></DisplayString>
-    <DisplayString IncludeView="decl">{*(clang::NamedDecl *)(Ptr&amp;~Mask)}</DisplayString>
-    <DisplayString>{*this,view(access)} {*this,view(decl)}</DisplayString>
-    <Expand>
-      <Item Name="access">(clang::AccessSpecifier)(Ptr&amp;Mask),en</Item>
-      <Item Name="decl">*(clang::NamedDecl *)(Ptr&amp;~Mask)</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::UnqualifiedId">
-    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_Identifier">[IK_Identifier] {*Identifier}</DisplayString>
-    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_OperatorFunctionId">[IK_OperatorFunctionId] {OperatorFunctionId}</DisplayString>
-    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_ConversionFunctionId">[IK_ConversionFunctionId] {ConversionFunctionId}</DisplayString>
-    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_ConstructorName">[IK_ConstructorName] {ConstructorName}</DisplayString>
-    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_DestructorName">[IK_DestructorName] {DestructorName}</DisplayString>
-    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_DeductionGuideName">[IK_DeductionGuideName] {TemplateName}</DisplayString>
-    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_TemplateId">[IK_TemplateId] {TemplateId}</DisplayString>
-    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_ConstructorTemplateId">[IK_ConstructorTemplateId] {TemplateId}</DisplayString>
-    <DisplayString>Kind</DisplayString>
-    <Expand>
-      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_Identifier">Identifier</ExpandedItem>
-      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_OperatorFunctionId">OperatorFunctionId</ExpandedItem>
-      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_ConversionFunctionId">ConversionFunctionId</ExpandedItem>
-      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_ConstructorName">ConstructorName</ExpandedItem>
-      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_DestructorName">DestructorName</ExpandedItem>
-      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_DeductionGuideName">TemplateName</ExpandedItem>
-      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_TemplateId">TemplateId</ExpandedItem>
-      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_ConstructorTemplateId">TemplateId</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::DeclGroup">
-    <DisplayString>NumDecls={NumDecls}</DisplayString>
-    <Expand>
-      <ArrayItems>
-        <Size>NumDecls</Size>
-        <ValuePointer>(Decl **)(this+1)</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-  <Type Name="clang::DeclGroupRef">
-    <DisplayString Condition="(Kind)((uintptr_t)D&amp;1)==SingleDeclKind">{*D}</DisplayString>
-    <DisplayString>{*(DeclGroup *)((uintptr_t)D&amp;~1)}</DisplayString>
-    <Expand>
-      <ExpandedItem Condition="(Kind)((uintptr_t)D&amp;1)==SingleDeclKind">D</ExpandedItem>
-      <ExpandedItem Condition="(Kind)((uintptr_t)D&amp;1)==DeclGroupKind">(DeclGroup *)((uintptr_t)D&amp;~1)</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::Declarator">
-    <DisplayString>{DS} {Name}</DisplayString>
-  </Type>
-  <Type Name="clang::UnresolvedSet&lt;*&gt;">
-    <DisplayString>{Decls}</DisplayString>
-    <Expand>
-      <ExpandedItem>Decls</ExpandedItem>
-    </Expand>
-  </Type>
-  <Type Name="clang::LookupResult">
-    <DisplayString Condition="ResultKind == clang::LookupResult::Ambiguous">{Ambiguity,en}: {Decls}</DisplayString>
-    <DisplayString>{ResultKind,en}: {Decls}</DisplayString>
-  </Type>
-  <Type Name="clang::ActionResult&lt;*, 0&gt;">
-    <DisplayString Condition="Invalid">Invalid</DisplayString>
-    <DisplayString Condition="!*(void **)&amp;Val">Unset</DisplayString>
-    <DisplayString>{Val}</DisplayString>
-  </Type>
-  <Type Name="clang::ActionResult&lt;*, 1&gt;">
-    <DisplayString Condition="Value&amp;1">Invalid</DisplayString>
-    <DisplayString Condition="Value==0">Unset</DisplayString>
-    <DisplayString>{($T1)(Value&amp;~1)}</DisplayString>
-    <Expand>
-      <Item Name="Invalid">(bool)(Value&amp;1)</Item>
-      <Item Name="Val">($T1)(Value&amp;~1)</Item>
-    </Expand>
-  </Type>
-</AutoVisualizer>
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+Visual Studio Native Debugging Visualizers for LLVM
+
+For Visual Studio 2013 only, put this file into
+"%USERPROFILE%\Documents\Visual Studio 2013\Visualizers" or create a symbolic link so it updates automatically.
+
+For later versions of Visual Studio, no setup is required-->
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+
+  <Type Name="clang::Type">
+    <!-- To visualize clang::Types, we need to look at TypeBits.TC to determine the actual
+         type subclass and manually dispatch accordingly (Visual Studio can't identify the real type
+         because clang::Type has no virtual members hence no RTTI).
+
+         Views:
+           "cmn": Visualization that is common to all clang::Type subclasses
+           "poly": Visualization that is specific to the actual clang::Type subclass. The subtype-specific
+                   <DisplayString> is typically as C++-like as possible (like in dump()) with <Expand>
+                   containing all the gory details.
+           "cpp": Only occasionally used when we need to distinguish between an ordinary view and a C++-like view.
+    -->
+    <DisplayString IncludeView="cmn" Condition="TypeBits.TC==clang::LocInfoType::LocInfo">LocInfoType</DisplayString>
+    <DisplayString IncludeView="cmn">{(clang::Type::TypeClass)TypeBits.TC, en}Type</DisplayString>
+    <!-- Dispatch to visualizers for the actual Type subclass -->
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Builtin" IncludeView="poly">{*(clang::BuiltinType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Pointer" IncludeView="poly">{*(clang::PointerType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Paren" IncludeView="poly">{*(clang::ParenType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::BitInt" IncludeView="poly">{(clang::BitIntType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::LValueReference" IncludeView="poly">{*(clang::LValueReferenceType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::RValueReference" IncludeView="poly">{*(clang::RValueReferenceType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::ConstantArray" IncludeView="poly">{(clang::ConstantArrayType *)this,na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::ConstantArray" IncludeView="left">{(clang::ConstantArrayType *)this,view(left)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::ConstantArray" IncludeView="right">{(clang::ConstantArrayType *)this,view(right)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::VariableArray" IncludeView="poly">{(clang::VariableArrayType *)this,na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::VariableArray" IncludeView="left">{(clang::VariableArrayType *)this,view(left)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::VariableArray" IncludeView="right">{(clang::VariableArrayType *)this,view(right)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::IncompleteArray" IncludeView="poly">{(clang::IncompleteArrayType *)this,na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::IncompleteArray" IncludeView="left">{(clang::IncompleteArrayType *)this,view(left)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::IncompleteArray" IncludeView="right">{(clang::IncompleteArrayType *)this,view(right)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Typedef" IncludeView="poly">{(clang::TypedefType *)this,na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Typedef" IncludeView="cpp">{(clang::TypedefType *)this,view(cpp)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Attributed" IncludeView="poly">{*(clang::AttributedType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Decayed" IncludeView="poly">{(clang::DecayedType *)this,na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Decayed" IncludeView="left">{(clang::DecayedType *)this,view(left)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Decayed" IncludeView="right">{(clang::DecayedType *)this,view(right)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Elaborated" IncludeView="poly">{(clang::ElaboratedType *)this,na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Elaborated" IncludeView="left">{(clang::ElaboratedType *)this,view(left)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Elaborated" IncludeView="right">{(clang::ElaboratedType *)this,view(right)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::TemplateTypeParm" IncludeView="poly">{*(clang::TemplateTypeParmType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::TemplateTypeParm" IncludeView="cpp">{*(clang::TemplateTypeParmType *)this,view(cpp)}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::SubstTemplateTypeParm" IncludeView="poly">{*(clang::SubstTemplateTypeParmType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Record" IncludeView="poly">{*(clang::RecordType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::Record" IncludeView="cpp">{*(clang::RecordType *)this,view(cpp)}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::FunctionProto" IncludeView="poly">{(clang::FunctionProtoType *)this,na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::FunctionProto" IncludeView="left">{(clang::FunctionProtoType *)this,view(left)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::FunctionProto" IncludeView="right">{(clang::FunctionProtoType *)this,view(right)na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::TemplateSpecialization" IncludeView="poly">{*(clang::TemplateSpecializationType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::DeducedTemplateSpecialization" IncludeView="poly">{*(clang::DeducedTemplateSpecializationType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::DeducedTemplateSpecialization" IncludeView="cpp">{*(clang::DeducedTemplateSpecializationType *)this,view(cpp)}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::InjectedClassName" IncludeView="poly">{*(clang::InjectedClassNameType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::DependentName" IncludeView="poly">{*(clang::DependentNameType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::Type::TypeClass::PackExpansion" IncludeView="poly">{*(clang::PackExpansionType *)this}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::LocInfoType::LocInfo" IncludeView="poly">{(clang::LocInfoType *)this,na}</DisplayString>
+    <DisplayString Condition="TypeBits.TC==clang::LocInfoType::LocInfo" IncludeView="cpp">{(clang::LocInfoType *)this,view(cpp)na}</DisplayString>
+    <DisplayString IncludeView="cpp">{this,view(poly)na}</DisplayString>
+    <DisplayString IncludeView="left">{*this,view(cpp)}</DisplayString>
+    <DisplayString IncludeView="right"></DisplayString>
+    <DisplayString IncludeView="poly">No visualizer yet for {(clang::Type::TypeClass)TypeBits.TC,en}Type</DisplayString> <!-- Not yet implemented Type subclass -->
+    <DisplayString IncludeView="Dependence" Condition="TypeBits.Dependence">Dependence{" ",en}</DisplayString>
+    <DisplayString IncludeView="Dependence"></DisplayString>
+    <DisplayString IncludeView="Cache" Condition="TypeBits.CacheValid &amp;&amp; TypeBits.CachedLocalOrUnnamed">CachedLinkage: {(clang::Linkage)TypeBits.CachedLinkage,en} CachedLocalOrUnnamed</DisplayString>
+    <DisplayString IncludeView="Cache" Condition="TypeBits.CacheValid &amp;&amp; !TypeBits.CachedLocalOrUnnamed">CachedLinkage: {(clang::Linkage)TypeBits.CachedLinkage,en}{" ",sb}</DisplayString>
+    <DisplayString IncludeView="Cache"></DisplayString>
+    <DisplayString IncludeView="FromAST" Condition="TypeBits.FromAST">FromAST</DisplayString>
+    <DisplayString IncludeView="FromAST"></DisplayString>
+    <DisplayString IncludeView="flags" Condition="!TypeBits.Dependence &amp;&amp; !TypeBits.CacheValid &amp;&amp; !TypeBits.FromAST">
+      No TypeBits set beyond TypeClass
+    </DisplayString>
+    <DisplayString IncludeView="flags">{*this, view(Dependence)}{*this, view(Cache)}{*this, view(FromAST)}</DisplayString>
+    <DisplayString>{*this,view(cmn)}  {{{*this,view(poly)}}}</DisplayString>
+    <Expand>
+      <Item Name="TypeClass" IncludeView="cmn">(clang::Type::TypeClass)TypeBits.TC</Item>
+      <Item Name="Flags" IncludeView="cmn">this,view(flags)na</Item>
+      <Item Name="Canonical" IncludeView="cmn">CanonicalType</Item>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Builtin">*(clang::BuiltinType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Pointer">*(clang::PointerType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Paren">*(clang::ParenType*)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::BitInt">*(clang::BitIntType*)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::LValueReference">*(clang::LValueReferenceType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::RValueReference">*(clang::RValueReferenceType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::ConstantArray">(clang::ConstantArrayType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::VariableArray">(clang::VariableArrayType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::IncompleteArray">(clang::IncompleteArrayType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Attributed">*(clang::AttributedType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Decayed">(clang::DecayedType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Elaborated">(clang::ElaboratedType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::TemplateTypeParm">(clang::TemplateTypeParmType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::SubstTemplateTypeParm">(clang::SubstTemplateTypeParmType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::Record">(clang::RecordType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::FunctionProto">(clang::FunctionProtoType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::TemplateSpecialization">(clang::TemplateSpecializationType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::DeducedTemplateSpecialization">(clang::DeducedTemplateSpecializationType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::InjectedClassName">(clang::InjectedClassNameType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::DependentName">(clang::DependentNameType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::Type::TypeClass::PackExpansion">(clang::PackExpansionType *)this</ExpandedItem>
+      <ExpandedItem ExcludeView="cmn" Condition="TypeBits.TC==clang::LocInfoType::LocInfo">(clang::LocInfoType *)this</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::ArrayType">
+    <Expand>
+      <Item Name="ElementType">ElementType</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::ConstantArrayType">
+    <DisplayString IncludeView="left">{ElementType,view(cpp)}</DisplayString>
+    <DisplayString IncludeView="right">[{Size}]</DisplayString>
+    <DisplayString>{ElementType,view(cpp)}[{Size}]</DisplayString>
+    <Expand>
+      <Item Name="Size">Size</Item>
+      <ExpandedItem>(clang::ArrayType *)this</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::IncompleteArrayType">
+    <DisplayString IncludeView="left">{ElementType,view(cpp)}</DisplayString>
+    <DisplayString IncludeView="right">[]</DisplayString>
+    <DisplayString>{ElementType,view(cpp)}[]</DisplayString>
+    <Expand>
+      <ExpandedItem>(clang::ArrayType *)this</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::VariableArrayType">
+    <DisplayString IncludeView="left">{ElementType,view(cpp)}</DisplayString>
+    <DisplayString IncludeView="right">[*]</DisplayString>
+    <DisplayString>{ElementType,view(cpp)}[*]</DisplayString>
+    <Expand>
+      <Item Name="[Size Expression]">(clang::Expr *)SizeExpr</Item>
+      <ExpandedItem>(clang::ArrayType *)this</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::TypedefType">
+    <DisplayString IncludeView="cpp">{Decl,view(name)nd}</DisplayString>
+    <DisplayString>{Decl}</DisplayString>
+    <Expand>
+      <Item Name="Decl">Decl</Item>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::PointerType">
+    <DisplayString>{PointeeType, view(cpp)} *</DisplayString>
+    <Expand>
+      <Item Name="PointeeType">PointeeType</Item>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::ParenType">
+    <DisplayString>{Inner, view(cpp)}</DisplayString>
+    <Expand>
+      <Item Name="Inner">Inner</Item>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::BitIntType">
+    <DisplayString Condition="!IsUnsigned">signed _BitInt({NumBits})</DisplayString>
+    <DisplayString Condition="!IsUnsigned">unsigned _BitInt({NumBits})(</DisplayString>
+    <Expand>
+      <Item Name="NumBits">NumBits</Item>
+      <ExpandedItem>(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <!-- We visualize all inner types for clang reference types. So a rvalue reference to an lvalue reference
+       to an int  would visual as int &amp; &amp;&amp; This is a little different than GetPointeeType(),
+       but more clearly displays the data structure and seems natural -->
+  <Type Name="clang::LValueReferenceType">
+    <DisplayString>{((clang::ReferenceType *)this)-&gt;PointeeType,view(cpp)} &amp;</DisplayString>
+    <Expand>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+      <Item Name="PointeeType">PointeeType</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::RValueReferenceType">
+    <DisplayString>{((clang::ReferenceType *)this)-&gt;PointeeType,view(cpp)} &amp;&amp;</DisplayString>
+    <Expand>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+      <Item Name="PointeeType">PointeeType</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::AttributedType">
+    <DisplayString>{ModifiedType} Attribute={(clang::AttributedType::Kind)AttributedTypeBits.AttrKind}</DisplayString>
+  </Type>
+
+  <!-- Unfortunately, Visual Studio has trouble seeing the PointerBitMask member PointerIntUnion, so I hardwire it to 2 bits-->
+  <Type Name="clang::DeclContext">
+    <DisplayString>{(clang::Decl::Kind)DeclContextBits.DeclKind,en}Decl</DisplayString>
+    <Expand>
+      <Item Name="DeclKind">(clang::Decl::Kind)DeclContextBits.DeclKind,en</Item>
+      <Synthetic Name="Members">
+        <DisplayString></DisplayString>
+        <Expand>
+          <LinkedListItems>
+            <HeadPointer>FirstDecl</HeadPointer>
+            <NextPointer>(clang::Decl *)(*(intptr_t *)NextInContextAndBits.Value.Data &amp; ~3)</NextPointer>
+            <ValueNode>*this</ValueNode>
+          </LinkedListItems>
+        </Expand>
+      </Synthetic>
+    </Expand>
+  </Type>
+  <Type Name="clang::FieldDecl">
+    <DisplayString>Field {{{*(clang::DeclaratorDecl *)this,view(cpp)nd}}}</DisplayString>
+  </Type>
+  <Type Name="clang::CXXMethodDecl">
+    <DisplayString IncludeView="cpp">{*(clang::FunctionDecl *)this,nd}</DisplayString>
+    <DisplayString>Method {{{*this,view(cpp)}}}</DisplayString>
+  </Type>
+  <Type Name="clang::CXXConstructorDecl">
+    <DisplayString>Constructor {{{Name,view(cpp)}({*(clang::FunctionDecl *)this,view(parm0)nd})}}</DisplayString>
+  </Type>
+  <Type Name="clang::CXXDestructorDecl">
+    <DisplayString>Destructor {{~{Name,view(cpp)}()}}</DisplayString>
+  </Type>
+  <Type Name="clang::TemplateTypeParmDecl">
+    <DisplayString IncludeView="TorC" Condition="Typename">typename</DisplayString>
+    <DisplayString IncludeView="TorC" Condition="!Typename">class</DisplayString>
+    <DisplayString IncludeView="MaybeEllipses" Condition="TypeForDecl == nullptr">(not yet known if parameter pack) </DisplayString>
+    <DisplayString IncludeView="MaybeEllipses" Condition="((clang::TemplateTypeParmType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)TypeForDecl-&gt;CanonicalType.Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType)->CanTTPTInfo.ParameterPack">...</DisplayString>
+    <DisplayString IncludeView="MaybeEllipses" Condition="!((clang::TemplateTypeParmType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)TypeForDecl-&gt;CanonicalType.Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType)->CanTTPTInfo.ParameterPack"></DisplayString>
+    <DisplayString IncludeView="DefaultArg" Condition="(*(uintptr_t *)DefaultArgument.ValueOrInherited.Val.Value.Data &amp; 3LL) == 0">{(TypeSourceInfo *)(*(uintptr_t *)DefaultArgument.ValueOrInherited.Val.Value.Data&amp;~3LL),view(cpp)}</DisplayString>
+    <DisplayString IncludeView="DefaultArg">{{InheritedInitializer}}</DisplayString>
+    <DisplayString IncludeView="Initializer" Condition="*(uintptr_t *)DefaultArgument.ValueOrInherited.Val.Value.Data &amp; 3LL">= {this,view(DefaultArg)na}</DisplayString>
+    <DisplayString IncludeView="Initializer"></DisplayString>
+    <DisplayString>{*this,view(TorC)} {*this,view(MaybeEllipses)}{Name,view(cpp)} {this,view(Initializer)na}</DisplayString>
+  </Type>
+  <Type Name="clang::TemplateDecl">
+    <DisplayString IncludeView="cpp">{*TemplatedDecl,view(cpp)}</DisplayString>
+    <DisplayString>template{TemplateParams,na} {*TemplatedDecl};</DisplayString>
+    <Expand>
+      <Item Name="TemplateParams">TemplateParams,na</Item>
+      <Item Name="TemplatedDecl">TemplatedDecl,na</Item>
+    </Expand>
+  </Type>
+  <!-- Unfortunately, visualization of PointerIntPair<PointerUnion> doesn't work due to limitations in natvis, so we will barehad it-->
+  <Type Name="clang::TypedefNameDecl">
+    <DisplayString Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 4)==0" IncludeView="type">{(clang::TypeSourceInfo *)(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; ~7LL),view(cpp)na}</DisplayString>
+    <DisplayString Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 4)!=0" IncludeView="type">{(clang::TypedefNameDecl::ModedTInfo *)(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; ~7LL),view(cpp)na}</DisplayString>
+    <DisplayString IncludeView="name">{(TypeDecl *)this,view(cpp)nand}</DisplayString>
+    <DisplayString>typedef {this,view(type)na} {this,view(name)na};</DisplayString>
+    <Expand>
+      <Item Name="IsTransparent" Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 1)==0">"Not yet calculated",sb</Item>
+      <Item Name="IsTransparent" Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 1)!=0">(bool)(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 2)</Item>
+      <Item Name="TypeSourceInfo" Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 4)==0">(clang::TypeSourceInfo *)(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; ~7LL)</Item>
+      <Item Name="ModedTInfo" Condition="(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; 4)!=0">(clang::TypedefNameDecl::ModedTInfo *)(*(uintptr_t *)MaybeModedTInfo.Value.Data &amp; ~7LL)</Item>
+      <ExpandedItem>(TypeDecl *)this,nd</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::TypeAliasDecl">
+    <DisplayString IncludeView="cpp">{(TypedefNameDecl *)this,view(name)nand}</DisplayString>
+    <DisplayString>using {(TypedefNameDecl *)this,view(name)nand} = {(TypedefNameDecl *)this,view(type)nand}</DisplayString>
+  </Type>
+  <Type Name="clang::AssumedTemplateStorage">
+    <DisplayString>{Name}</DisplayString>
+  </Type>
+  <Type Name="clang::UncommonTemplateNameStorage::BitsTag">
+    <DisplayString>Kind={(UncommonTemplateNameStorage::Kind)Kind,en}, Size={Size}</DisplayString>
+    <Expand>
+      <Item Name="Kind">(UncommonTemplateNameStorage::Kind)Kind</Item>
+      <Item Name="Size">Size</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::UncommonTemplateNameStorage">
+    <DisplayString IncludeView="cmn">{Bits},</DisplayString>
+    <DisplayString Condition="Bits.Kind==UncommonTemplateNameStorage::Overloaded">{this,view(cmn)na},{(OverloadedTemplateStorage*)this,na}</DisplayString>
+    <DisplayString Condition="Bits.Kind==UncommonTemplateNameStorage::Assumed">{this,view(cmn)na},{(AssumedTemplateStorage*)this,na}</DisplayString>
+    <DisplayString Condition="Bits.Kind==UncommonTemplateNameStorage::SubstTemplateTemplateParm">{this,view(cmn)na},{(SubstTemplateTemplateParmStorage*)this,na}</DisplayString>
+    <DisplayString Condition="Bits.Kind==UncommonTemplateNameStorage::SubstTemplateTemplateParmPack">{this,view(cmn)na},{(SubstTemplateTemplateParmPackStorage*)this,na}</DisplayString>
+    <DisplayString>{this,view(cmn)na}</DisplayString>
+    <Expand>
+      <Item Name="Bits">Bits</Item>
+      <ExpandedItem Condition="Bits.Kind==UncommonTemplateNameStorage::Overloaded">(OverloadedTemplateStorage*)this</ExpandedItem>
+      <ExpandedItem Condition="Bits.Kind==UncommonTemplateNameStorage::Assumed">(AssumedTemplateStorage*)this</ExpandedItem>
+      <ExpandedItem Condition="Bits.Kind==UncommonTemplateNameStorage::SubstTemplateTemplateParm">(SubstTemplateTemplateParmStorage*)this</ExpandedItem>
+      <ExpandedItem Condition="Bits.Kind==UncommonTemplateNameStorage::SubstTemplateTemplateParmPack">(SubstTemplateTemplateParmPackStorage*)this</ExpandedItem>
+    </Expand>
+  </Type>
+  <!-- clang::TemplateName::StorageType -->
+  <Type Name="llvm::PointerUnion&lt;clang::TemplateDecl *, clang::UncommonTemplateNameStorage *,
+                          clang::QualifiedTemplateName *, clang::DependentTemplateName *&gt;">
+    <!-- Expand this out by hand to get cpp view -->
+    <DisplayString Condition="(Val.Value &amp;3) == 0" IncludeView="cpp">
+      {(clang::TemplateDecl *)(Val.Value &amp; ~3LL),view(cpp)na}
+    </DisplayString>
+    <DisplayString Condition="(Val.Value &amp;3) == 0">
+      {(clang::TemplateDecl *)(Val.Value &amp; ~3LL),na}
+    </DisplayString>
+    <DisplayString Condition="(Val.Value &amp;3) == 1" IncludeView="cpp">
+      {(clang::UncommonTemplateNameStorage *)(Val.Value &amp; ~3LL),view(cpp)na}
+    </DisplayString>
+    <DisplayString Condition="(Val.Value &amp;3) == 1">
+      {(clang::UncommonTemplateNameStorage *)(Val.Value &amp; ~3LL),na}
+    </DisplayString>
+    <DisplayString Condition="(Val.Value &amp;3) == 2" IncludeView="cpp">
+      {(clang::QualifiedTemplateName *)(Val.Value &amp; ~3LL),view(cpp)na}
+    </DisplayString>
+    <DisplayString Condition="(Val.Value &amp;3) == 2">
+      {(clang::QualifiedTemplateName *)(Val.Value &amp; ~3LL),na}
+    </DisplayString>
+    <DisplayString Condition="(Val.Value &amp;3) == 3" IncludeView="cpp">
+      {(clang::DependentTemplateName *)(Val.Value &amp; ~3LL),view(cpp)na}
+    </DisplayString>
+    <DisplayString Condition="(Val.Value &amp;3) == 3">
+      {(clang::DependentTemplateName *)(Val.Value &amp; ~3LL),na}
+    </DisplayString>
+    <Expand>
+      <Item Name="[Holds]" Condition="(Val.Value &amp;3) == 0">"TemplateDecl",s8b</Item>
+      <Item Name="[Ptr]" Optional="true"  Condition="(Val.Value &amp;3) == 0">
+        (clang::TemplateDecl *)(Val.Value &amp; ~3LL)
+      </Item>
+      <Item Name="[Holds]" Condition="(Val.Value &amp;3) == 1">"UncommonTemplateNameStorage",s8b</Item>
+      <Item Name="[Ptr]" Optional="true"  Condition="(Val.Value &amp;3) == 1">
+        (clang::UncommonTemplateNameStorage *)(Val.Value &amp; ~3LL)
+      </Item>
+      <Item Name="[Holds]" Condition="(Val.Value &amp;3) == 2">"QualifiedTemplateName",s8b</Item>
+      <Item Name="[Ptr]" Optional="true"  Condition="(Val.Value &amp;3) == 2">
+        (clang::QualifiedTemplateName *)(Val.Value &amp; ~3LL)
+      </Item>
+      <Item Name="[Holds]" Condition="(Val.Value &amp;3) == 3">"DependentTemplateName",s8b</Item>
+      <Item Name="[Ptr]" Optional="true"  Condition="(Val.Value &amp;3) == 3">
+        (clang::DependentTemplateName *)(Val.Value &amp; ~3LL)
+      </Item>
+      <Item Name="[Val]">Val</Item>
+
+    </Expand>
+  </Type>
+  <Type Name="clang::TemplateName">
+    <DisplayString IncludeView="cpp">{Storage,view(cpp)na}</DisplayString>
+    <DisplayString>{Storage,na}</DisplayString>
+    <Expand>
+      <ExpandedItem>Storage</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::NamedDecl" >
+    <DisplayString IncludeView="cpp">{Name,view(cpp)}</DisplayString>
+    <DisplayString>{Name}</DisplayString>
+  </Type>
+  <Type Name="clang::TagDecl">
+    <DisplayString IncludeView="implicit" Condition="Implicit">implicit{" ",sb}</DisplayString>
+    <DisplayString IncludeView="implicit"></DisplayString>
+    <DisplayString IncludeView="modifiers">{*this,view(implicit)nd}</DisplayString>
+    <DisplayString IncludeView="cpp">{*this,view(modifiers)}{Name,view(cpp)}</DisplayString>
+    <DisplayString Condition="TagDeclBits.TagDeclKind==clang::TagTypeKind::Struct">{*this,view(modifiers)nd}struct {Name,view(cpp)}</DisplayString>
+    <DisplayString Condition="TagDeclBits.TagDeclKind==clang::TagTypeKind::Interface">{*this,view(modifiers)nd}interface {Name,view(cpp)}</DisplayString>
+    <DisplayString Condition="TagDeclBits.TagDeclKind==clang::TagTypeKind::Union">{*this,view(modifiers)nd}union {Name,view(cpp)}</DisplayString>
+    <DisplayString Condition="TagDeclBits.TagDeclKind==clang::TagTypeKind::Class">{*this,view(modifiers)nd}class {Name,view(cpp)}</DisplayString>
+    <DisplayString Condition="TagDeclBits.TagDeclKind==clang::TagTypeKind::Enum">{*this,view(modifiers)nd}enum {Name,view(cpp)}</DisplayString>
+    <Expand>
+      <ExpandedItem>(clang::DeclContext *)this</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::TagType">
+    <DisplayString IncludeView="cpp">{decl,view(cpp)na}</DisplayString>
+    <DisplayString>{*decl}</DisplayString>
+    <Expand>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+      <Item Name="decl">decl</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::RecordType">
+    <DisplayString IncludeView="cpp">{(clang::TagType *)this,view(cpp)na}</DisplayString>
+    <DisplayString>{(clang::TagType *)this,na}</DisplayString>
+    <Expand>
+      <Item Name="TagType">*(clang::TagType *)this</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::SubstTemplateTypeParmType">
+    <DisplayString>{{{*Replaced,view(cpp)} &lt;= {CanonicalType,view(cpp)}}}</DisplayString>
+    <Expand>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+      <Item Name="Replaced">*Replaced</Item>
+    </Expand>
+  </Type>
+  <!-- We only show the first 5 parameter types in the display string (can't figure out how to loop in DisplayString)
+       but the expansion has all parameters -->
+  <Type Name="clang::FunctionProtoType">
+    <DisplayString IncludeView="left" Condition="FunctionTypeBits.HasTrailingReturn"></DisplayString>
+    <DisplayString IncludeView="left">{ResultType,view(cpp)}</DisplayString>
+    <DisplayString IncludeView="parm0" Condition="FunctionTypeBits.NumParams==0"></DisplayString>
+    <DisplayString IncludeView="parm0">{*(clang::QualType *)(this+1),view(cpp)}{*this,view(parm1)}</DisplayString>
+    <DisplayString IncludeView="parm1" Condition="FunctionTypeBits.NumParams==1"></DisplayString>
+    <DisplayString IncludeView="parm1">, {*((clang::QualType *)(this+1)+1),view(cpp)}{*this,view(parm2)}</DisplayString>
+    <DisplayString IncludeView="parm2" Condition="FunctionTypeBits.NumParams==2"></DisplayString>
+    <DisplayString IncludeView="parm2">, {*((clang::QualType *)(this+1)+2),view(cpp)}{*this,view(parm3)}</DisplayString>
+    <DisplayString IncludeView="parm3" Condition="FunctionTypeBits.NumParams==3"></DisplayString>
+    <DisplayString IncludeView="parm3">, {*((clang::QualType *)(this+1)+3),view(cpp)}{*this,view(parm4)}</DisplayString>
+    <DisplayString IncludeView="parm4" Condition="FunctionTypeBits.NumParams==4"></DisplayString>
+    <DisplayString IncludeView="parm4">, {*((clang::QualType *)(this+1)+4),view(cpp)}{*this,view(parm5)}</DisplayString>
+    <DisplayString IncludeView="parm5" Condition="FunctionTypeBits.NumParams==5"></DisplayString>
+    <DisplayString IncludeView="parm5">, /* expand for more params */</DisplayString>
+    <DisplayString IncludeView="right" Condition="FunctionTypeBits.HasTrailingReturn">({*this,view(parm0)}) -&gt; {ResultType,view(cpp)}</DisplayString>
+    <DisplayString IncludeView="right">({*this,view(parm0)})</DisplayString>
+    <DisplayString>{this,view(left)na}{this,view(right)na}</DisplayString>
+    <Expand>
+      <Item Name="ResultType">ResultType</Item>
+      <Synthetic Name="Parameter Types">
+        <DisplayString>{*this,view(parm0)}</DisplayString>
+        <Expand>
+          <ArrayItems>
+            <Size>FunctionTypeBits.NumParams</Size>
+            <ValuePointer>(clang::QualType *)(this+1)</ValuePointer>
+          </ArrayItems>
+        </Expand>
+      </Synthetic>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+
+  <Type Name="clang::AdjustedType">
+    <DisplayString>{OriginalTy} adjusted to {AdjustedTy}</DisplayString>
+    <Expand>
+      <Item Name="OriginalTy">OriginalTy</Item>
+      <Item Name="AdjustedTy">AdjustedTy</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::DecayedType">
+    <DisplayString IncludeView="left">{OriginalTy,view(left)}</DisplayString>
+    <DisplayString IncludeView="right">{OriginalTy,view(right)}</DisplayString>
+    <DisplayString>{OriginalTy}</DisplayString>
+    <Expand>
+      <ExpandedItem>(clang::AdjustedType *)this</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::ElaboratedType">
+    <DisplayString IncludeView="left">{NamedType,view(left)}</DisplayString>
+    <DisplayString IncludeView="right">{NamedType,view(right)}</DisplayString>
+    <DisplayString>{NamedType}</DisplayString>
+    <Expand>
+      <Item Name="[Keyword]">(clang::ElaboratedTypeKeyword)TypeWithKeywordBits.Keyword</Item>
+      <Item Name="[Nested Name Specifier]">NNS</Item>
+      <Item Name="[Underlying Type]">NamedType,view(cmn)</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::TemplateTypeParmType">
+    <DisplayString IncludeView="cpp" Condition="((clang::TemplateTypeParmType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)CanonicalType.Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType) != this">{TTPDecl->Name,view(cpp)}</DisplayString>
+    <DisplayString Condition="((clang::TemplateTypeParmType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)CanonicalType.Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType) != this">Non-canonical: {*TTPDecl}</DisplayString>
+    <DisplayString>Canonical: {CanTTPTInfo}</DisplayString>
+    <Expand>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::InjectedClassNameType">
+    <DisplayString>{Decl,view(cpp)}</DisplayString>
+    <Expand>
+      <Item Name="Decl">Decl</Item>
+      <Item Name="InjectedType">InjectedType</Item>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::DependentNameType">
+    <DisplayString>{NNS}{Name,view(cpp)na}</DisplayString>
+    <Expand>
+      <Item Name="NNS">NNS</Item>
+      <Item Name="Name">Name</Item>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::NestedNameSpecifier">
+    <DisplayString Condition="!Specifier"></DisplayString>
+    <DisplayString Condition="((*(uintptr_t *)Prefix.Value.Data&gt;&gt;1)&amp;3) == 0">{(IdentifierInfo*)Specifier,view(cpp)na}::</DisplayString>
+    <DisplayString Condition="((*(uintptr_t *)Prefix.Value.Data&gt;&gt;1)&amp;3) == 1">{(NamedDecl*)Specifier,view(cpp)na}::</DisplayString>
+    <DisplayString Condition="((*(uintptr_t *)Prefix.Value.Data&gt;&gt;1)&amp;3) == 2">{(Type*)Specifier,view(cpp)na}::</DisplayString>
+    <Expand>
+      <Item Name="Kind">(NestedNameSpecifier::StoredSpecifierKind)((*(uintptr_t *)Prefix.Value.Data&gt;&gt;1)&amp;3)</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::PackExpansionType">
+    <DisplayString>{Pattern}</DisplayString>
+    <Expand>
+      <Item Name="Pattern">Pattern</Item>
+      <Item Name="NumExpansions">NumExpansions</Item>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::QualType">
+    <DisplayString IncludeView="poly">{((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType,view(poly)}{*this,view(fastQuals)}</DisplayString>
+    <DisplayString IncludeView="cpp">{((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType,view(cpp)}{*this,view(fastQuals)}</DisplayString>
+    <DisplayString IncludeView="left">{((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType,view(left)}{*this,view(fastQuals)}</DisplayString>
+    <DisplayString IncludeView="right">{((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType,view(right)}{*this,view(fastQuals)}</DisplayString>
+    <!-- For the Fast Qualifiers, it is simpler (and probably more efficient) just to list all 8 cases than create
+          views for each qualifier. TODO: Non-fast qualifiers -->
+    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==0"></DisplayString>
+    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==1">{" ",sb}const</DisplayString>
+    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==2">{" ",sb}restrict</DisplayString>
+    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==3">{" ",sb}const restrict</DisplayString>
+    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==4">{" ",sb}volatile</DisplayString>
+    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==5">{" ",sb}const volatile</DisplayString>
+    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==6">{" ",sb}volatile restrict</DisplayString>
+    <DisplayString IncludeView="fastQuals" Condition="(((*(uintptr_t *)Value.Value.Data) &gt;&gt; 1) &amp; 7)==7">{" ",sb}const volatile restrict</DisplayString>
+    <DisplayString IncludeView="fastQuals">Cannot visualize non-fast qualifiers</DisplayString>
+    <DisplayString Condition="(*(uintptr_t *)Value.Value.Data) == 0">Null</DisplayString>
+    <DisplayString>{((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType,na}{*this,view(fastQuals)}</DisplayString>
+    <Expand>
+      <Item Name="Fast Quals">*this,view(fastQuals)</Item>
+      <ExpandedItem>((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)Value.Value.Data) &amp; ~(uintptr_t)((1U &lt;&lt; clang::TypeAlignmentInBits) - 1U)))-&gt;BaseType</ExpandedItem>
+    </Expand>
+
+  </Type>
+  <Type Name="clang::LocInfoType">
+    <DisplayString IncludeView="cpp">{DeclInfo,view(cpp)na}</DisplayString>
+    <DisplayString>{DeclInfo,na}</DisplayString>
+    <Expand>
+      <Item Name="DeclInfo">DeclInfo</Item>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::TypeSourceInfo">
+    <DisplayString IncludeView="cpp">{Ty,view(cpp)}</DisplayString>
+    <DisplayString>{Ty}</DisplayString>
+    <Expand>
+      <ExpandedItem>Ty</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::TypeLoc">
+    <DisplayString>{(QualType *)&amp;Ty,na}</DisplayString>
+    <Expand>
+      <Item Name="Ty">(QualType *)&amp;Ty</Item>
+      <Item Name="Data">Data</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::TypeLocBuilder">
+    <DisplayString Optional="true" Condition="LastTy.Value.Value==0">Not building anything</DisplayString>
+    <DisplayString Optional="true">Building a {LastTy}</DisplayString>
+  </Type>
+  <Type Name="clang::TemplateArgumentLoc">
+    <DisplayString IncludeView="cpp">{Argument,view(cpp)}</DisplayString>
+    <DisplayString>{Argument}</DisplayString>
+  </Type>
+  <Type Name="clang::TemplateArgument">
+    <DisplayString IncludeView="cpp" Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Type">{*(clang::QualType *)&amp;TypeOrValue.V,view(cpp)}</DisplayString>
+    <DisplayString Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Type">{(clang::TemplateArgument::ArgKind)TypeOrValue.Kind,en} template argument: {*(clang::QualType *)&amp;TypeOrValue.V}</DisplayString>
+    <DisplayString IncludeView="arg0" Condition="Args.NumArgs==0"></DisplayString>
+    <DisplayString IncludeView="arg0">{Args.Args[0]}{*this,view(arg1)}</DisplayString>
+    <DisplayString IncludeView="arg1" Condition="Args.NumArgs==1"></DisplayString>
+    <DisplayString IncludeView="arg1">, {Args.Args[1]}{*this,view(arg2)}</DisplayString>
+    <DisplayString IncludeView="arg2" Condition="Args.NumArgs==2"></DisplayString>
+    <DisplayString IncludeView="arg2">, {Args.Args[2]}, ...</DisplayString>
+    <DisplayString IncludeView="arg0cpp" Condition="Args.NumArgs==0"></DisplayString>
+    <DisplayString IncludeView="arg0cpp">{Args.Args[0],view(cpp)}{*this,view(arg1cpp)}</DisplayString>
+    <DisplayString IncludeView="arg1cpp" Condition="Args.NumArgs==1"></DisplayString>
+    <DisplayString IncludeView="arg1cpp">, {Args.Args[1],view(cpp)}{*this,view(arg2cpp)}</DisplayString>
+    <DisplayString IncludeView="arg2cpp" Condition="Args.NumArgs==2"></DisplayString>
+    <DisplayString IncludeView="arg2cpp">, {Args.Args[2],view(cpp)}, ...</DisplayString>
+    <DisplayString IncludeView="cpp" Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Pack">{*this,view(arg0cpp)}</DisplayString>
+    <DisplayString Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Pack">{*this,view(arg0)}</DisplayString>
+    <DisplayString Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Expression">{(clang::Expr *)TypeOrValue.V,view(cpp)na}</DisplayString>
+    <DisplayString>{(clang::TemplateArgument::ArgKind)TypeOrValue.Kind,en}</DisplayString>
+    <Expand>
+      <Item Name="QualType" Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Type">*(clang::QualType *)&amp;TypeOrValue.V</Item>
+      <Item Name="Expression" Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Expression">(clang::Expr *)TypeOrValue.V</Item>
+      <ArrayItems Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Pack">
+        <Size>Args.NumArgs</Size>
+        <ValuePointer>Args.Args</ValuePointer>
+      </ArrayItems>
+      <!-- TODO: Other kinds-->
+    </Expand>
+  </Type>
+  <Type Name="clang::TemplateArgumentListInfo">
+    <DisplayString IncludeView ="elt0" Condition="Arguments.Size == 0"></DisplayString>
+    <DisplayString IncludeView ="elt0">{((TemplateArgumentLoc*)Arguments.BeginX)[0],view(cpp)}{*this,view(elt1)}</DisplayString>
+    <DisplayString IncludeView ="elt1" Condition="Arguments.Size == 1"></DisplayString>
+    <DisplayString IncludeView ="elt1">, {((TemplateArgumentLoc*)Arguments.BeginX)[1],view(cpp)}{*this,view(elt2)}</DisplayString>
+    <DisplayString IncludeView ="elt2" Condition="Arguments.Size == 2"></DisplayString>
+    <DisplayString IncludeView ="elt2">, {((TemplateArgumentLoc*)Arguments.BeginX)[2],view(cpp)}{*this,view(elt3)}</DisplayString>
+    <DisplayString IncludeView ="elt3" Condition="Arguments.Size == 3"></DisplayString>
+    <DisplayString IncludeView ="elt3">, {((TemplateArgumentLoc*)Arguments.BeginX)[3],view(cpp)}{*this,view(elt4)}</DisplayString>
+    <DisplayString IncludeView ="elt4" Condition="Arguments.Size == 4"></DisplayString>
+    <DisplayString IncludeView ="elt4">, ...</DisplayString>
+    <DisplayString Condition="Arguments.Size == 0">empty</DisplayString>
+    <DisplayString Condition="Arguments.Size != 0">&lt;{*this,view(elt0)}&gt;</DisplayString>
+    <DisplayString>Uninitialized</DisplayString>
+  </Type>
+  <Type Name="clang::TemplateArgumentList">
+    <DisplayString IncludeView="arg0" Condition="NumArguments==0"></DisplayString>
+    <DisplayString IncludeView="arg0">{Arguments[0],view(cpp)}{*this,view(arg1)}</DisplayString>
+    <DisplayString IncludeView="arg1" Condition="NumArguments==1"></DisplayString>
+    <DisplayString IncludeView="arg1">, {Arguments[1],view(cpp)}{*this,view(arg2)}</DisplayString>
+    <DisplayString IncludeView="arg2" Condition="NumArguments==2"></DisplayString>
+    <DisplayString IncludeView="arg2">, {Arguments[1],view(cpp)}, ...</DisplayString>
+    <DisplayString>&lt;{*this,view(arg0)}&gt;</DisplayString>
+    <Expand>
+      <Item Name="NumArguments">NumArguments</Item>
+      <ArrayItems>
+        <Size>NumArguments</Size>
+        <ValuePointer>Arguments</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+  <Type Name="llvm::ArrayRef&lt;clang::TemplateArgument&gt;">
+    <DisplayString IncludeView="arg0" Condition="Length==0"></DisplayString>
+    <DisplayString IncludeView="arg0">{Data[0],view(cpp)}{*this,view(arg1)}</DisplayString>
+    <DisplayString IncludeView="arg1" Condition="Length==1"></DisplayString>
+    <DisplayString IncludeView="arg1">, {Data[1],view(cpp)}{*this,view(arg2)}</DisplayString>
+    <DisplayString IncludeView="arg2" Condition="Length==2"></DisplayString>
+    <DisplayString IncludeView="arg2">, {Data[2],view(cpp)}, ...</DisplayString>
+    <DisplayString>&lt;{*this,view(arg0)}&gt;</DisplayString>
+    <Expand>
+      <Item Name="Length">Length</Item>
+      <Synthetic Name="Data">
+        <Expand>
+          <ArrayItems>
+            <Size>Length</Size>
+            <ValuePointer>Data</ValuePointer>
+          </ArrayItems>
+        </Expand>
+      </Synthetic>
+    </Expand>
+  </Type>
+  <Type Name="clang::MultiLevelTemplateArgumentList">
+    <DisplayString IncludeView="level0" Condition="(llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.EndX - (llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX==0"></DisplayString>
+    <DisplayString IncludeView="level0">{((llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX)[0],view(cpp)}{*this,view(level1)}</DisplayString>
+    <DisplayString IncludeView="level1" Condition="(llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.EndX - (llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX==1"></DisplayString>
+    <DisplayString IncludeView="level1">::{((llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX)[1],view(cpp)}{*this,view(level2)}</DisplayString>
+    <DisplayString IncludeView="level2" Condition="(llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.EndX - (llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX==2"></DisplayString>
+    <DisplayString IncludeView="level2">::{((llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX)[2],view(cpp)}, ...</DisplayString>
+    <DisplayString>{*this,view(level0)}</DisplayString>
+    <Expand>
+      <Item Name="TemplateList">TemplateArgumentLists</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::ParsedTemplateArgument">
+    <DisplayString Condition="Kind==clang::ParsedTemplateArgument::Type" IncludeView="cpp">{(clang::QualType *)Arg,view(cpp)na}</DisplayString>
+    <DisplayString Condition="Kind==clang::ParsedTemplateArgument::Type">Type template argument: {*(clang::QualType *)Arg}</DisplayString>
+    <DisplayString Condition="Kind==clang::ParsedTemplateArgument::NonType">Non-type template argument: {*(clang::Expr *)Arg}</DisplayString>
+    <DisplayString Condition="Kind==clang::ParsedTemplateArgument::Template">Template template argument: {*(clang::TemplateName *)Arg</DisplayString>
+    <Expand>
+      <Item Name="Kind">Kind,en</Item>
+      <Item Name="Arg" Condition="Kind==clang::ParsedTemplateArgument::Type">(clang::QualType *)Arg</Item>
+      <Item Name="Arg" Condition="Kind==clang::ParsedTemplateArgument::NonType">(clang::Expr *)Arg</Item>
+      <Item Name="Arg" Condition="Kind==clang::ParsedTemplateArgument::Template">(clang::TemplateName *)Arg</Item>
+    </Expand>
+  </Type>
+  <!-- Builtin types that have C++ keywords are manually displayed as that keyword. Otherwise, just use the enum name -->
+  <Type Name="clang::BuiltinType">
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Void">void</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Bool">bool</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Char_U">char</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::UChar">unsigned char</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::WChar_U">wchar_t</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Char16">char16_t</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Char32">char32_t</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::UShort">unsigned short</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::UInt">unsigned int</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::ULong">unsigned long</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::ULongLong">unsigned long long</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::UInt128">__uint128_t</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Char_S">char</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::SChar">signed char</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::WChar_S">wchar_t</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Short">short</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Int">int</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Long">long</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::LongLong">long long</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Int128">__int128_t</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Half">__fp16</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Float">float</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::Double">double</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::LongDouble">long double</DisplayString>
+    <DisplayString Condition="BuiltinTypeBits.Kind==clang::BuiltinType::NullPtr">nullptr_t</DisplayString>
+    <DisplayString>{(clang::BuiltinType::Kind)BuiltinTypeBits.Kind, en}</DisplayString>
+    <Expand>
+      <Item Name="Kind">(clang::BuiltinType::Kind)BuiltinTypeBits.Kind</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="clang::TemplateSpecializationType">
+    <DisplayString IncludeView="arg0" Condition="TemplateSpecializationTypeBits.NumArgs==0"></DisplayString>
+    <DisplayString IncludeView="arg0">{((clang::TemplateArgument *)(this+1))[0],view(cpp)}{*this,view(arg1)}</DisplayString>
+    <DisplayString IncludeView="arg1" Condition="TemplateSpecializationTypeBits.NumArgs==1"></DisplayString>
+    <DisplayString IncludeView="arg1">, {((clang::TemplateArgument *)(this+1))[1],view(cpp)}{*this,view(arg2)}</DisplayString>
+    <DisplayString IncludeView="arg2" Condition="TemplateSpecializationTypeBits.NumArgs==2"></DisplayString>
+    <DisplayString IncludeView="arg2">, {((clang::TemplateArgument *)(this+1))[2],view(cpp)}{*this,view(arg3)}</DisplayString>
+    <DisplayString Condition="(Template.Storage.Val.Value &amp; 3) == 0">
+      {*((clang::TemplateDecl *)(Template.Storage.Val.Value))->TemplatedDecl,view(cpp)}&lt;{*this,view(arg0)}&gt;
+    </DisplayString>
+    <DisplayString>Can't visualize this TemplateSpecializationType</DisplayString>
+    <Expand>
+      <Item Name="Template">Template.Storage</Item>
+      <ArrayItems>
+        <Size>TemplateSpecializationTypeBits.NumArgs</Size>
+        <ValuePointer>(clang::TemplateArgument *)(this+1)</ValuePointer>
+      </ArrayItems>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::DeducedType">
+    <Expand>
+      <Item Name="isDeduced">(CanonicalType.Value.Value != this) || TypeBits.Dependent</Item>
+      <ExpandedItem>*(clang::Type *)this,view(cmn)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::DeducedTemplateSpecializationType">
+    <DisplayString Condition="(CanonicalType.Value.Value != this) || TypeBits.Dependent">{CanonicalType,view(cpp)}</DisplayString>
+    <DisplayString IncludeView="cpp">{Template,view(cpp)}</DisplayString>
+    <DisplayString>{Template}</DisplayString>
+    <Expand>
+      <Item Name="Template">Template</Item>
+      <Item Name="Deduced As" Condition="(CanonicalType.Value.Value != this) || TypeBits.Dependent">CanonicalType,view(cpp)</Item>
+      <ExpandedItem>(clang::DeducedType *)this</ExpandedItem>
+      <Item Name="Template">Template</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::ClassTemplateSpecializationDecl">
+    <DisplayString>{*(CXXRecordDecl *)this,nd}{*TemplateArgs}</DisplayString>
+    <Expand>
+      <ExpandedItem>(CXXRecordDecl *)this,nd</ExpandedItem>
+      <Item Name="TemplateArgs">TemplateArgs</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::IdentifierInfo">
+    <DisplayString Condition="Entry != 0">{((llvm::StringMapEntry&lt;clang::IdentifierInfo *&gt;*)Entry)+1,sb}</DisplayString>
+    <Expand>
+      <Item Condition="Entry != 0" Name="[Identifier]">((llvm::StringMapEntry&lt;clang::IdentifierInfo *&gt;*)Entry)+1,s</Item>
+      <Item Name="Token Kind">(clang::tok::TokenKind)TokenID</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::DeclarationName">
+    <DisplayString Condition="Ptr == 0" IncludeView="cpp"></DisplayString>
+    <DisplayString Condition="Ptr == 0">Empty</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredIdentifier" IncludeView="cpp">{*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask)}</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredIdentifier">{{Identifier ({*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask)})}}</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredObjCZeroArgSelector">{{ObjC Zero Arg Selector (*{(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask)})}}</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredObjCOneArgSelector">{{ObjC One Arg Selector (*{(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask)})}}</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredCXXConstructorName" IncludeView="cpp">{(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask),view(cpp)na}</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredCXXConstructorName">C++ Constructor {{{(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask),view(cpp)na}}}</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredCXXDestructorName">C++ Destructor {{*(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask)}}</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredCXXConversionFunctionName">C++ Conversion function {{*(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask)}}</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredCXXOperatorName">C++ Operator {{*(clang::detail::CXXOperatorIdName *)(Ptr &amp; ~PtrMask)}}</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredDeclarationNameExtra"
+                   IncludeView="cpp">{*(clang::detail::DeclarationNameExtra *)(Ptr &amp; ~PtrMask),view(cpp)}</DisplayString>
+    <DisplayString Condition="(Ptr &amp; PtrMask) == StoredDeclarationNameExtra">{{Extra ({*(clang::detail::DeclarationNameExtra *)(Ptr &amp; ~PtrMask)})}}</DisplayString>
+    <Expand>
+      <Item Name="Kind">StoredNameKind(Ptr &amp; PtrMask),en</Item>
+      <Item Condition="(Ptr &amp; PtrMask) == StoredIdentifier" Name="[Identifier]">*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask),na</Item>
+      <Item Condition="(Ptr &amp; PtrMask) == StoredObjCZeroArgSelector" Name="[ObjC Zero Arg Selector]">*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask),na</Item>
+      <Item Condition="(Ptr &amp; PtrMask) == StoredObjCOneArgSelector" Name="[ObjC One Arg Selector]">*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask),na</Item>
+      <Item Condition="(Ptr &amp; PtrMask) == StoredCXXConstructorName" Name="[C++ Constructor]">*(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask),na</Item>
+      <Item Condition="(Ptr &amp; PtrMask) == StoredCXXDestructorName" Name="[C++ Destructor]">*(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask),na</Item>
+      <Item Condition="(Ptr &amp; PtrMask) == StoredCXXConversionFunctionName" Name="[C++ Conversion function]">*(clang::detail::CXXSpecialNameExtra *)(Ptr &amp; ~PtrMask),na</Item>
+      <Item Condition="(Ptr &amp; PtrMask) == StoredCXXOperatorName" Name="[C++ Operator]">*(clang::detail::CXXOperatorIdName *)(Ptr &amp; ~PtrMask),na</Item>
+      <Item Condition="(Ptr &amp; PtrMask) == StoredDeclarationNameExtra" Name="[Extra]">(clang::detail::DeclarationNameExtra *)(Ptr &amp; ~PtrMask),na</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::detail::DeclarationNameExtra">
+    <DisplayString Condition="ExtraKindOrNumArgs == CXXDeductionGuideName" IncludeView="cpp">
+      {(CXXDeductionGuideNameExtra *)this,view(cpp)nand}
+    </DisplayString>
+    <DisplayString Condition="ExtraKindOrNumArgs == CXXDeductionGuideName">
+      {(CXXDeductionGuideNameExtra *)this,nand}
+    </DisplayString>
+    <DisplayString Condition="ExtraKindOrNumArgs == CXXLiteralOperatorName">C++ Literal operator</DisplayString>
+    <DisplayString Condition="ExtraKindOrNumArgs == CXXUsingDirective">C++ Using directive</DisplayString>
+    <DisplayString Condition="ExtraKindOrNumArgs == ObjCMultiArgSelector">Objective-C MultiArg selector</DisplayString>
+    <DisplayString>{(clang::detail::DeclarationNameExtra::ExtraKind)ExtraKindOrNumArgs,en}{"  ",sb}{*this,view(cpp)}</DisplayString>
+    <Expand>
+      <ExpandedItem Condition="ExtraKindOrNumArgs == CXXDeductionGuideName">(CXXDeductionGuideNameExtra *)this</ExpandedItem>
+      <Item Name="ExtraKindOrNumArgs" Condition="ExtraKindOrNumArgs != CXXDeductionGuideName">ExtraKindOrNumArgs</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::detail::CXXDeductionGuideNameExtra">
+    <DisplayString IncludeView="cpp">{Template->TemplatedDecl,view(cpp)}</DisplayString>
+    <DisplayString>C++ Deduction guide for {Template->TemplatedDecl,view(cpp)na}</DisplayString>
+  </Type>
+  <Type Name="clang::detail::CXXSpecialNameExtra">
+    <DisplayString IncludeView="cpp">{Type,view(cpp)}</DisplayString>
+    <DisplayString>{Type}</DisplayString>
+  </Type>
+  <Type Name="clang::DeclarationNameInfo">
+    <DisplayString>{Name}</DisplayString>
+  </Type>
+  <Type Name="clang::TemplateIdAnnotation">
+    <DisplayString IncludeView="arg0" Condition="NumArgs==0"></DisplayString>
+    <DisplayString IncludeView="arg0">{(ParsedTemplateArgument *)(this+1),view(cpp)na}{this,view(arg1)na}</DisplayString>
+    <DisplayString IncludeView="arg1" Condition="NumArgs==1"></DisplayString>
+    <DisplayString IncludeView="arg1">, {((ParsedTemplateArgument *)(this+1))+1,view(cpp)na}{this,view(arg2)na}</DisplayString>
+    <DisplayString IncludeView="arg2" Condition="NumArgs==2"></DisplayString>
+    <DisplayString IncludeView="arg1">, ...</DisplayString>
+    <DisplayString>{Name,na}&lt;{this,view(arg0)na}&gt;</DisplayString>
+    <Expand>
+      <Item Name="Name">Name</Item>
+      <Synthetic Name="Arguments">
+        <DisplayString>{this,view(arg0)na}</DisplayString>
+        <Expand>
+          <ArrayItems>
+            <Size>NumArgs</Size>
+            <ValuePointer>(ParsedTemplateArgument *)(this+1)</ValuePointer>
+          </ArrayItems>
+        </Expand>
+      </Synthetic>
+      <Item Name="Operator">Operator</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::Token">
+    <DisplayString Condition="Kind == clang::tok::annot_template_id">{{annot_template_id ({(clang::TemplateIdAnnotation *)(PtrData),na})}}</DisplayString>
+    <DisplayString Condition="Kind == clang::tok::identifier">{{Identifier ({(clang::IdentifierInfo *)(PtrData),na})}}</DisplayString>
+    <DisplayString>{(clang::tok::TokenKind)Kind,en}</DisplayString>
+  </Type>
+  <Type Name="clang::Lexer">
+    <DisplayString>{BufferPtr,nasb}</DisplayString>
+  </Type>
+  <Type Name="clang::Preprocessor::IncludeStackInfo">
+    <DisplayString Condition="TheLexer._Mypair._Myval2 != 0">{TheLexer._Mypair._Myval2,na}</DisplayString>
+    <DisplayString Condition="TheTokenLexer._Mypair._Myval2 != 0">Expanding Macro: {TheTokenLexer._Mypair._Myval2,na}</DisplayString>
+    <DisplayString></DisplayString>
+  </Type>
+  <Type Name="clang::Preprocessor">
+    <DisplayString IncludeView="cached" Condition="CachedLexPos &lt; CachedTokens.Size">
+      [{(Token *)(CachedTokens.BeginX) + CachedLexPos,na}] {IncludeMacroStack._Mypair._Myval2._Mylast - 1,na}
+    </DisplayString>
+    <DisplayString IncludeView="cached"> {IncludeMacroStack._Mypair._Myval2._Mylast - 1,na}</DisplayString>
+    <DisplayString Condition="CurLexer._Mypair._Myval2 != 0">{CurLexer._Mypair._Myval2,na}</DisplayString>
+    <DisplayString Condition="CurTokenLexer._Mypair._Myval2 != 0">Expanding Macro: {CurTokenLexer._Mypair._Myval2,na}</DisplayString>
+    <!-- Can't use CurLexerCallback because natvis sees the type rather than the variable -->
+    <DisplayString Condition="IncludeMacroStack._Mypair._Myval2._Mylast - IncludeMacroStack._Mypair._Myval2._Myfirst">
+      {this,view(cached)}
+    </DisplayString>
+    <DisplayString>CLK_LexAfterModuleImport</DisplayString>
+  </Type>
+  <Type Name="clang::Parser">
+    <DisplayString>[{Tok}] {PP,na}</DisplayString>
+  </Type>
+  <Type Name="clang::LambdaIntroducer::LambdaCapture">
+    <DisplayString Condition="Kind == LCK_This">this</DisplayString>
+    <DisplayString Condition="Kind == LCK_StarThis">*this</DisplayString>
+    <DisplayString Condition="Kind == LCK_ByCopy">{Id}</DisplayString>
+    <DisplayString Condition="Kind == LCK_ByRef">&amp;{Id}</DisplayString>
+    <DisplayString>No visualizer for {Kind}</DisplayString>
+  </Type>
+  <Type Name="clang::LambdaIntroducer">
+    <DisplayString IncludeView="default" Condition="Default==LCD_None"></DisplayString>
+    <DisplayString IncludeView="default" Condition="Default==LCD_ByCopy">=,</DisplayString>
+    <DisplayString IncludeView="default" Condition="Default==LCD_ByRef">&amp;,</DisplayString>
+    <DisplayString IncludeView="capture0" Condition="Captures.Size==0"></DisplayString>
+    <DisplayString IncludeView="capture0">{(LambdaCapture *)(Captures.BeginX),na}{this,view(capture1)na}</DisplayString>
+    <DisplayString IncludeView="capture1" Condition="Captures.Size==1"></DisplayString>
+    <DisplayString IncludeView="capture1">,{(LambdaCapture *)(Captures.BeginX)+1,na}{this,view(capture2)na}</DisplayString>
+    <DisplayString IncludeView="capture2" Condition="Captures.Size==2"></DisplayString>
+    <DisplayString IncludeView="capture2">,{(LambdaCapture *)(Captures.BeginX)+2,na}{this,view(capture3)na}</DisplayString>
+    <DisplayString IncludeView="capture3" Condition="Captures.Size==3"></DisplayString>
+    <DisplayString IncludeView="capture3">,...</DisplayString>
+    <DisplayString>[{this,view(default)na}{this,view(capture0)na}]</DisplayString>
+  </Type>
+  <Type Name="clang::DeclSpec">
+    <DisplayString IncludeView="extra" Condition="TypeSpecType == TST_typename || TypeSpecType == TST_typeofType || TypeSpecType == TST_underlying_type || TypeSpecType == TST_atomic">
+      , [{TypeRep}]
+    </DisplayString>
+    <DisplayString IncludeView="extra" Condition="TypeSpecType == TST_typeofExpr || TypeSpecType == TST_decltype">
+      , [{ExprRep}]
+    </DisplayString>
+    <DisplayString IncludeView="extra" Condition="TypeSpecType == TST_enum || TypeSpecType == TST_struct || TypeSpecType == TST_interface || TypeSpecType == TST_union || TypeSpecType == TST_class">
+      , [{DeclRep}]
+    </DisplayString>
+    <DisplayString IncludeView="extra"></DisplayString>
+    <DisplayString>[{(clang::DeclSpec::SCS)StorageClassSpec,en}], [{(clang::TypeSpecifierType)TypeSpecType,en}]{this,view(extra)na}</DisplayString>
+    <Expand>
+      <Item Name="StorageClassSpec">(clang::DeclSpec::SCS)StorageClassSpec</Item>
+      <Item Name="TypeSpecType">(clang::TypeSpecifierType)TypeSpecType</Item>
+      <Item Name="TypeRep" Condition="TypeSpecType == TST_typename || TypeSpecType == TST_typeofType || TypeSpecType == TST_underlying_type || TypeSpecType == TST_atomic">
+        TypeRep
+      </Item>
+      <Item Name="ExprRep" Condition="TypeSpecType == TST_typeofExpr || TypeSpecType == TST_decltype">
+        ExprRep
+      </Item>
+      <Item Name="DeclRep" Condition="TypeSpecType == TST_enum || TypeSpecType == TST_struct || TypeSpecType == TST_interface || TypeSpecType == TST_union || TypeSpecType == TST_class">
+        DeclRep
+      </Item>
+
+    </Expand>
+  </Type>
+  <Type Name="clang::PragmaHandler">
+    <DisplayString>{Name,s}</DisplayString>
+  </Type>
+  <Type Name="clang::FileEntry">
+    <DisplayString>{RealPathName,s}</DisplayString>
+  </Type>
+  <Type Name="clang::DirectoryEntry">
+    <DisplayString>{Name,s}</DisplayString>
+  </Type>
+  <Type Name="clang::VarDecl::VarDeclBitfields">
+    <Expand>
+      <Item Name="StorageClass">(clang::StorageClass)SClass</Item>
+      <Item Name="ThreadStorageClass">(clang::ThreadStorageClassSpecifier)TSCSpec</Item>
+      <Item Name="InitStyle">(clang::VarDecl::InitializationStyle)InitStyle</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::DeclaratorDecl">
+    <DisplayString>{DeclType,view(left)} {Name,view(cpp)}{DeclType,view(right)}</DisplayString>
+    <Expand>
+      <Item Name="Name">Name</Item>
+      <Item Name="DeclType">DeclType</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::VarDecl">
+    <DisplayString>{(DeclaratorDecl*)this,nand}</DisplayString>
+    <Expand>
+      <ExpandedItem>(DeclaratorDecl*)this,nd</ExpandedItem>
+      <Item Name="Init">Init</Item>
+      <Item Name="VarDeclBits">VarDeclBits</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::ParmVarDecl">
+    <DisplayString>{*(VarDecl*)this,nd}</DisplayString>
+    <Expand>
+      <Item Name="ParmVarDeclBits">ParmVarDeclBits</Item>
+      <ExpandedItem>*(VarDecl*)this,nd</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::ExplicitSpecifier">
+    <DisplayString Condition="((*(uintptr_t *)ExplicitSpec.Value.Data&gt;&gt;1)&amp;3) == ExplicitSpecKind::ResolvedTrue" IncludeView="cpp">{"explicit ",sb}</DisplayString>
+    <DisplayString Condition="((*(uintptr_t *)ExplicitSpec.Value.Data&gt;&gt;1)&amp;3) == ExplicitSpecKind::ResolvedFalse" IncludeView="cpp"></DisplayString>
+    <DisplayString Condition="((*(uintptr_t *)ExplicitSpec.Value.Data&gt;&gt;1)&amp;3) == ExplicitSpecKind::Unresolved" IncludeView="cpp">explicit({ExplicitSpec,view(ptr)na})</DisplayString>
+    <DisplayString Condition="((*(uintptr_t *)ExplicitSpec.Value.Data)&amp;~7) == 0">{ExplicitSpec,view(int)en}</DisplayString>
+    <DisplayString>{ExplicitSpec,view(int)en} : {ExplicitSpec,view(ptr)na}</DisplayString>
+  </Type>
+  <Type Name="clang::CXXDeductionGuideDecl">
+    <DisplayString>{ExplicitSpec,view(cpp)}{Name,view(cpp)nd}({(FunctionDecl*)this,view(parm0)nand}) -&gt; {((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)(((uintptr_t)DeclType.Value.Value) &amp; ~15))-&gt;BaseType)->ResultType,view(cpp)}</DisplayString>
+    <Expand>
+      <Item Name="ExplicitSpec">ExplicitSpec</Item>
+      <Item Name="IsCopyDeductionCandidate">(bool)FunctionDeclBits.IsCopyDeductionCandidate</Item>
+      <ExpandedItem>(FunctionDecl*)this,nd</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::FunctionDecl">
+    <DisplayString IncludeView="retType">{((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)->ResultType,view(cpp)}</DisplayString>
+    <DisplayString IncludeView="parm0" Condition="0 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
+    <DisplayString IncludeView="parm0">{ParamInfo[0],na}{*this,view(parm1)nd}</DisplayString>
+    <DisplayString IncludeView="parm1" Condition="1 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
+    <DisplayString IncludeView="parm1">, {ParamInfo[1],na}{*this,view(parm2)nd}</DisplayString>
+    <DisplayString IncludeView="parm2" Condition="2 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
+    <DisplayString IncludeView="parm2">, {ParamInfo[2],na}{*this,view(parm3)nd}</DisplayString>
+    <DisplayString IncludeView="parm3" Condition="3 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
+    <DisplayString IncludeView="parm3">, {ParamInfo[3],na}{*this,view(parm4)nd}</DisplayString>
+    <DisplayString IncludeView="parm4" Condition="4 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
+    <DisplayString IncludeView="parm4">, {ParamInfo[4],na}{*this,view(parm5)nd}</DisplayString>
+    <DisplayString IncludeView="parm5" Condition="5 == ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams"></DisplayString>
+    <DisplayString IncludeView="parm5">, /* expand for more params */</DisplayString>
+    <DisplayString Condition="((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.HasTrailingReturn">
+      auto {Name,view(cpp)nd}({*this,view(parm0)nd}) -&gt; {((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)->ResultType,view(cpp)}
+    </DisplayString>
+    <DisplayString>{this,view(retType)nand} {Name,view(cpp)nd}({*this,view(parm0)nd})</DisplayString>
+    <Expand>
+      <ExpandedItem>(clang::DeclaratorDecl *)this,nd</ExpandedItem>
+      <Item Name="ReturnType">((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)->ResultType</Item>
+      <Synthetic Name="Parameter Types">
+        <DisplayString>{*this,view(parm0)nd}</DisplayString>
+        <Expand>
+          <ArrayItems>
+            <Size>((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)((*(uintptr_t *)DeclType.Value.Value.Data) &amp; ~15))-&gt;BaseType)-&gt;FunctionTypeBits.NumParams</Size>
+            <ValuePointer>ParamInfo</ValuePointer>
+          </ArrayItems>
+        </Expand>
+      </Synthetic>
+      <Item Name="TemplateOrSpecialization">TemplateOrSpecialization</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::OpaquePtr&lt;*&gt;">
+    <DisplayString>{*($T1*)&amp;Ptr}</DisplayString>
+    <Expand>
+      <ExpandedItem>($T1*)&amp;Ptr</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::UnionOpaquePtr&lt;*&gt;">
+    <DisplayString>{($T1 *)Ptr}</DisplayString>
+    <Expand>
+      <ExpandedItem>($T1 *)Ptr</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::TemplateParameterList">
+    <DisplayString IncludeView="parm0" Condition="NumParams==0"></DisplayString>
+    <DisplayString IncludeView="parm0">{*((NamedDecl **)(this+1))[0],view(cpp)}{*this,view(parm1)}</DisplayString>
+    <DisplayString IncludeView="parm1" Condition="NumParams==1"></DisplayString>
+    <DisplayString IncludeView="parm1">, {*((NamedDecl **)(this+1))[1],view(cpp)}{*this,view(parm2)}</DisplayString>
+    <DisplayString IncludeView="parm2" Condition="NumParams==2"></DisplayString>
+    <DisplayString IncludeView="parm2">, {*((NamedDecl **)(this+1))[2],view(cpp)}{*this,view(parm3)}</DisplayString>
+    <DisplayString IncludeView="parm3" Condition="NumParams==3"></DisplayString>
+    <DisplayString IncludeView="parm3">, {*((NamedDecl **)(this+1))[3],view(cpp)}{*this,view(parm4)}</DisplayString>
+    <DisplayString IncludeView="parm4" Condition="NumParams==4"></DisplayString>
+    <DisplayString IncludeView="parm4">, {*((NamedDecl **)(this+1))[4],view(cpp)}{*this,view(parm5)}</DisplayString>
+    <DisplayString IncludeView="parm5" Condition="NumParams==5"></DisplayString>
+    <DisplayString IncludeView="parm5">, /* Expand for more params */</DisplayString>
+    <DisplayString>&lt;{*this,view(parm0)}&gt;</DisplayString>
+    <Expand>
+      <ArrayItems>
+        <Size>NumParams</Size>
+      <ValuePointer>(NamedDecl **)(this+1)</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+  <Type Name="clang::Stmt">
+    <DisplayString>{(clang::Stmt::StmtClass)StmtBits.sClass,en}</DisplayString>
+    <Expand>
+      <Item Name="Class">(clang::Stmt::StmtClass)StmtBits.sClass,en</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::Expr">
+    <DisplayString Condition="StmtBits.sClass==clang::Stmt::StmtClass::StringLiteralClass" IncludeView="poly">{*(clang::StringLiteral *)this}</DisplayString>
+    <DisplayString>Expression of class {(clang::Stmt::StmtClass)StmtBits.sClass,en} and type {TR,view(cpp)}</DisplayString>
+  </Type>
+  <Type Name="clang::StringLiteral">
+    <Expand>
+      <Item Name="Length">*(unsigned *)(((clang::StringLiteral *)this)+1)</Item>
+      <Item Name="Data" Condition="StringLiteralBits.NumConcatenated==1">(const char *)(((clang::StringLiteral *)this)+1)+4+4,[*(unsigned *)(((clang::StringLiteral *)this)+1)]s8</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::DeclAccessPair">
+    <DisplayString IncludeView="access" Condition="(Ptr&amp;Mask) == clang::AS_public">public</DisplayString>
+    <DisplayString IncludeView="access" Condition="(Ptr&amp;Mask) == clang::AS_protected">protected</DisplayString>
+    <DisplayString IncludeView="access" Condition="(Ptr&amp;Mask) == clang::AS_private">private</DisplayString>
+    <DisplayString IncludeView="access" Condition="(Ptr&amp;Mask) == clang::AS_none"></DisplayString>
+    <DisplayString IncludeView="decl">{*(clang::NamedDecl *)(Ptr&amp;~Mask)}</DisplayString>
+    <DisplayString>{*this,view(access)} {*this,view(decl)}</DisplayString>
+    <Expand>
+      <Item Name="access">(clang::AccessSpecifier)(Ptr&amp;Mask),en</Item>
+      <Item Name="decl">*(clang::NamedDecl *)(Ptr&amp;~Mask)</Item>
+    </Expand>
+  </Type>
+  <Type Name="clang::UnqualifiedId">
+    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_Identifier">[IK_Identifier] {*Identifier}</DisplayString>
+    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_OperatorFunctionId">[IK_OperatorFunctionId] {OperatorFunctionId}</DisplayString>
+    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_ConversionFunctionId">[IK_ConversionFunctionId] {ConversionFunctionId}</DisplayString>
+    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_ConstructorName">[IK_ConstructorName] {ConstructorName}</DisplayString>
+    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_DestructorName">[IK_DestructorName] {DestructorName}</DisplayString>
+    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_DeductionGuideName">[IK_DeductionGuideName] {TemplateName}</DisplayString>
+    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_TemplateId">[IK_TemplateId] {TemplateId}</DisplayString>
+    <DisplayString Condition="Kind==UnqualifiedIdKind::IK_ConstructorTemplateId">[IK_ConstructorTemplateId] {TemplateId}</DisplayString>
+    <DisplayString>Kind</DisplayString>
+    <Expand>
+      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_Identifier">Identifier</ExpandedItem>
+      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_OperatorFunctionId">OperatorFunctionId</ExpandedItem>
+      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_ConversionFunctionId">ConversionFunctionId</ExpandedItem>
+      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_ConstructorName">ConstructorName</ExpandedItem>
+      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_DestructorName">DestructorName</ExpandedItem>
+      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_DeductionGuideName">TemplateName</ExpandedItem>
+      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_TemplateId">TemplateId</ExpandedItem>
+      <ExpandedItem Condition="Kind==UnqualifiedIdKind::IK_ConstructorTemplateId">TemplateId</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::DeclGroup">
+    <DisplayString>NumDecls={NumDecls}</DisplayString>
+    <Expand>
+      <ArrayItems>
+        <Size>NumDecls</Size>
+        <ValuePointer>(Decl **)(this+1)</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+  <Type Name="clang::DeclGroupRef">
+    <DisplayString Condition="(Kind)((uintptr_t)D&amp;1)==SingleDeclKind">{*D}</DisplayString>
+    <DisplayString>{*(DeclGroup *)((uintptr_t)D&amp;~1)}</DisplayString>
+    <Expand>
+      <ExpandedItem Condition="(Kind)((uintptr_t)D&amp;1)==SingleDeclKind">D</ExpandedItem>
+      <ExpandedItem Condition="(Kind)((uintptr_t)D&amp;1)==DeclGroupKind">(DeclGroup *)((uintptr_t)D&amp;~1)</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::Declarator">
+    <DisplayString>{DS} {Name}</DisplayString>
+  </Type>
+  <Type Name="clang::UnresolvedSet&lt;*&gt;">
+    <DisplayString>{Decls}</DisplayString>
+    <Expand>
+      <ExpandedItem>Decls</ExpandedItem>
+    </Expand>
+  </Type>
+  <Type Name="clang::LookupResult">
+    <DisplayString Condition="ResultKind == clang::LookupResult::Ambiguous">{Ambiguity,en}: {Decls}</DisplayString>
+    <DisplayString>{ResultKind,en}: {Decls}</DisplayString>
+  </Type>
+  <Type Name="clang::ActionResult&lt;*, 0&gt;">
+    <DisplayString Condition="Invalid">Invalid</DisplayString>
+    <DisplayString Condition="!*(void **)&amp;Val">Unset</DisplayString>
+    <DisplayString>{Val}</DisplayString>
+  </Type>
+  <Type Name="clang::ActionResult&lt;*, 1&gt;">
+    <DisplayString Condition="Value&amp;1">Invalid</DisplayString>
+    <DisplayString Condition="Value==0">Unset</DisplayString>
+    <DisplayString>{($T1)(Value&amp;~1)}</DisplayString>
+    <Expand>
+      <Item Name="Invalid">(bool)(Value&amp;1)</Item>
+      <Item Name="Val">($T1)(Value&amp;~1)</Item>
+    </Expand>
+  </Type>
+</AutoVisualizer>
diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp
index 9aed36b96ce9..25d79544b2f3 100644
--- a/compiler-rt/lib/lsan/lsan_common.cpp
+++ b/compiler-rt/lib/lsan/lsan_common.cpp
@@ -293,6 +293,27 @@ struct DirectMemoryAccessor {
   void Init(uptr begin, uptr end) {};
   void *LoadPtr(uptr p) const { return *reinterpret_cast<void **>(p); }
 };
+
+struct CopyMemoryAccessor {
+  void Init(uptr begin, uptr end) {
+    this->begin = begin;
+    buffer.clear();
+    buffer.resize(end - begin);
+    MemCpyAccessible(buffer.data(), reinterpret_cast<void *>(begin),
+                     buffer.size());
+  };
+
+  void *LoadPtr(uptr p) const {
+    uptr offset = p - begin;
+    CHECK_LE(offset + sizeof(void *), reinterpret_cast<uptr>(buffer.size()));
+    return *reinterpret_cast<void **>(offset +
+                                      reinterpret_cast<uptr>(buffer.data()));
+  }
+
+ private:
+  uptr begin;
+  InternalMmapVector<char> buffer;
+};
 }  // namespace
 
 // Scans the memory range, looking for byte patterns that point into allocator
@@ -373,10 +394,10 @@ void ScanGlobalRange(uptr begin, uptr end, Frontier *frontier) {
 }
 
 template <class Accessor>
-void ScanExtraStack(const InternalMmapVector<Range> &ranges, Frontier *frontier,
-                    Accessor &accessor) {
+void ScanRanges(const InternalMmapVector<Range> &ranges, Frontier *frontier,
+                const char *region_type, Accessor &accessor) {
   for (uptr i = 0; i < ranges.size(); i++) {
-    ScanForPointers(ranges[i].begin, ranges[i].end, frontier, "FAKE STACK",
+    ScanForPointers(ranges[i].begin, ranges[i].end, frontier, region_type,
                     kReachable, accessor);
   }
 }
@@ -384,7 +405,7 @@ void ScanExtraStack(const InternalMmapVector<Range> &ranges, Frontier *frontier,
 void ScanExtraStackRanges(const InternalMmapVector<Range> &ranges,
                           Frontier *frontier) {
   DirectMemoryAccessor accessor;
-  ScanExtraStack(ranges, frontier, accessor);
+  ScanRanges(ranges, frontier, "FAKE STACK", accessor);
 }
 
 #  if SANITIZER_FUCHSIA
@@ -478,7 +499,7 @@ static void ProcessThread(tid_t os_id, uptr sp,
     ScanForPointers(stack_begin, stack_end, frontier, "STACK", kReachable,
                     accessor);
     GetThreadExtraStackRangesLocked(os_id, &extra_ranges);
-    ScanExtraStack(extra_ranges, frontier, accessor);
+    ScanRanges(extra_ranges, frontier, "FAKE STACK", accessor);
   }
 
   if (flags()->use_tls) {
@@ -535,6 +556,7 @@ static void ProcessThread(tid_t os_id, uptr sp,
 static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
                            Frontier *frontier, tid_t caller_tid,
                            uptr caller_sp) {
+  InternalMmapVector<tid_t> done_threads;
   InternalMmapVector<uptr> registers;
   InternalMmapVector<Range> extra_ranges;
   for (uptr i = 0; i < suspended_threads.ThreadCount(); i++) {
@@ -559,6 +581,25 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
 
     DirectMemoryAccessor accessor;
     ProcessThread(os_id, sp, registers, extra_ranges, frontier, accessor);
+    if (flags()->use_detached)
+      done_threads.push_back(os_id);
+  }
+
+  if (flags()->use_detached) {
+    CopyMemoryAccessor accessor;
+    InternalMmapVector<tid_t> known_threads;
+    GetRunningThreadsLocked(&known_threads);
+    Sort(done_threads.data(), done_threads.size());
+    for (tid_t os_id : known_threads) {
+      registers.clear();
+      extra_ranges.clear();
+
+      uptr i = InternalLowerBound(done_threads, os_id);
+      if (i >= done_threads.size() || done_threads[i] != os_id) {
+        uptr sp = (os_id == caller_tid) ? caller_sp : 0;
+        ProcessThread(os_id, sp, registers, extra_ranges, frontier, accessor);
+      }
+    }
   }
 
   // Add pointers reachable from ThreadContexts
diff --git a/compiler-rt/lib/lsan/lsan_flags.inc b/compiler-rt/lib/lsan/lsan_flags.inc
index c97b021ba5c0..e0b4aa4a3299 100644
--- a/compiler-rt/lib/lsan/lsan_flags.inc
+++ b/compiler-rt/lib/lsan/lsan_flags.inc
@@ -41,6 +41,8 @@ LSAN_FLAG(bool, use_ld_allocations, true,
 LSAN_FLAG(bool, use_unaligned, false, "Consider unaligned pointers valid.")
 LSAN_FLAG(bool, use_poisoned, false,
           "Consider pointers found in poisoned memory to be valid.")
+LSAN_FLAG(bool, use_detached, false,
+          "Scan threads even if attaching to them failed.")
 LSAN_FLAG(bool, log_pointers, false, "Debug logging")
 LSAN_FLAG(bool, log_threads, false, "Debug logging")
 LSAN_FLAG(int, tries, 1, "Debug option to repeat leak checking multiple times")
diff --git a/flang/test/Driver/msvc-dependent-lib-flags.f90 b/flang/test/Driver/msvc-dependent-lib-flags.f90
index 1b7ecb604ad6..765917f07d8e 100644
--- a/flang/test/Driver/msvc-dependent-lib-flags.f90
+++ b/flang/test/Driver/msvc-dependent-lib-flags.f90
@@ -1,36 +1,36 @@
-! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC
-! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=static_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DEBUG
-! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL
-! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL-DEBUG
-
-! MSVC: -fc1
-! MSVC-SAME: --dependent-lib=clang_rt.builtins.lib
-! MSVC-SAME: -D_MT
-! MSVC-SAME: --dependent-lib=libcmt
-! MSVC-SAME: --dependent-lib=FortranRuntime.static.lib
-! MSVC-SAME: --dependent-lib=FortranDecimal.static.lib
-
-! MSVC-DEBUG: -fc1
-! MSVC-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib
-! MSVC-DEBUG-SAME: -D_MT
-! MSVC-DEBUG-SAME: -D_DEBUG
-! MSVC-DEBUG-SAME: --dependent-lib=libcmtd
-! MSVC-DEBUG-SAME: --dependent-lib=FortranRuntime.static_dbg.lib
-! MSVC-DEBUG-SAME: --dependent-lib=FortranDecimal.static_dbg.lib
-
-! MSVC-DLL: -fc1
-! MSVC-DLL-SAME: --dependent-lib=clang_rt.builtins.lib
-! MSVC-DLL-SAME: -D_MT
-! MSVC-DLL-SAME: -D_DLL
-! MSVC-DLL-SAME: --dependent-lib=msvcrt
-! MSVC-DLL-SAME: --dependent-lib=FortranRuntime.dynamic.lib
-! MSVC-DLL-SAME: --dependent-lib=FortranDecimal.dynamic.lib
-
-! MSVC-DLL-DEBUG: -fc1
-! MSVC-DLL-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib
-! MSVC-DLL-DEBUG-SAME: -D_MT
-! MSVC-DLL-DEBUG-SAME: -D_DEBUG
-! MSVC-DLL-DEBUG-SAME: -D_DLL
-! MSVC-DLL-DEBUG-SAME: --dependent-lib=msvcrtd
-! MSVC-DLL-DEBUG-SAME: --dependent-lib=FortranRuntime.dynamic_dbg.lib
-! MSVC-DLL-DEBUG-SAME: --dependent-lib=FortranDecimal.dynamic_dbg.lib
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=static_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DEBUG
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL-DEBUG
+
+! MSVC: -fc1
+! MSVC-SAME: --dependent-lib=clang_rt.builtins.lib
+! MSVC-SAME: -D_MT
+! MSVC-SAME: --dependent-lib=libcmt
+! MSVC-SAME: --dependent-lib=FortranRuntime.static.lib
+! MSVC-SAME: --dependent-lib=FortranDecimal.static.lib
+
+! MSVC-DEBUG: -fc1
+! MSVC-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib
+! MSVC-DEBUG-SAME: -D_MT
+! MSVC-DEBUG-SAME: -D_DEBUG
+! MSVC-DEBUG-SAME: --dependent-lib=libcmtd
+! MSVC-DEBUG-SAME: --dependent-lib=FortranRuntime.static_dbg.lib
+! MSVC-DEBUG-SAME: --dependent-lib=FortranDecimal.static_dbg.lib
+
+! MSVC-DLL: -fc1
+! MSVC-DLL-SAME: --dependent-lib=clang_rt.builtins.lib
+! MSVC-DLL-SAME: -D_MT
+! MSVC-DLL-SAME: -D_DLL
+! MSVC-DLL-SAME: --dependent-lib=msvcrt
+! MSVC-DLL-SAME: --dependent-lib=FortranRuntime.dynamic.lib
+! MSVC-DLL-SAME: --dependent-lib=FortranDecimal.dynamic.lib
+
+! MSVC-DLL-DEBUG: -fc1
+! MSVC-DLL-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib
+! MSVC-DLL-DEBUG-SAME: -D_MT
+! MSVC-DLL-DEBUG-SAME: -D_DEBUG
+! MSVC-DLL-DEBUG-SAME: -D_DLL
+! MSVC-DLL-DEBUG-SAME: --dependent-lib=msvcrtd
+! MSVC-DLL-DEBUG-SAME: --dependent-lib=FortranRuntime.dynamic_dbg.lib
+! MSVC-DLL-DEBUG-SAME: --dependent-lib=FortranDecimal.dynamic_dbg.lib
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index d89093b2117c..2cc54e8a4b97 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -567,7 +567,9 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.llogbf16
     libc.src.math.llrintf16
     libc.src.math.llroundf16
+    libc.src.math.log2f16
     libc.src.math.logbf16
+    libc.src.math.logf16
     libc.src.math.lrintf16
     libc.src.math.lroundf16
     libc.src.math.modff16
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 7314dbc660f3..06ea7bba81f3 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -660,7 +660,9 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.llogbf16
     libc.src.math.llrintf16
     libc.src.math.llroundf16
+    libc.src.math.log2f16
     libc.src.math.logbf16
+    libc.src.math.logf16
     libc.src.math.lrintf16
     libc.src.math.lroundf16
     libc.src.math.modff16
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 010377a90f6e..6591cbbdc155 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -310,7 +310,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | lgamma    |                  |                 |                        |                      |                        | 7.12.8.3               | F.10.5.3                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| log       | |check|          | |check|         |                        |                      |                        | 7.12.6.11              | F.10.3.11                  |
+| log       | |check|          | |check|         |                        | |check|              |                        | 7.12.6.11              | F.10.3.11                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | log10     | |check|          | |check|         |                        |                      |                        | 7.12.6.12              | F.10.3.12                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
@@ -318,7 +318,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | log1p     | |check|          | |check|         |                        |                      |                        | 7.12.6.14              | F.10.3.14                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| log2      | |check|          | |check|         |                        |                      |                        | 7.12.6.15              | F.10.3.15                  |
+| log2      | |check|          | |check|         |                        | |check|              |                        | 7.12.6.15              | F.10.3.15                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | log2p1    |                  |                 |                        |                      |                        | 7.12.6.16              | F.10.3.16                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 196dab9f81b3..d2a073847503 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -648,9 +648,11 @@ def StdC : StandardSpec<"stdc"> {
 
           FunctionSpec<"log2", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"log2f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
+          GuardedFunctionSpec<"log2f16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
 
           FunctionSpec<"log", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"logf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
+          GuardedFunctionSpec<"logf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
 
           FunctionSpec<"logb", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"logbf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 8427b550ab4c..516bed499b19 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -340,9 +340,11 @@ add_math_entrypoint_object(log1pf)
 
 add_math_entrypoint_object(log2)
 add_math_entrypoint_object(log2f)
+add_math_entrypoint_object(log2f16)
 
 add_math_entrypoint_object(log)
 add_math_entrypoint_object(logf)
+add_math_entrypoint_object(logf16)
 
 add_math_entrypoint_object(logb)
 add_math_entrypoint_object(logbf)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 81b3e44db792..d7c7a3431d3d 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -2251,6 +2251,28 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  log2f16
+  SRCS
+    log2f16.cpp
+  HDRS
+    ../log2f16.h
+  DEPENDS
+    .expxf16
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.cpu_features
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
   log
   SRCS
     log.cpp
@@ -2290,6 +2312,28 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  logf16
+  SRCS
+    logf16.cpp
+  HDRS
+    ../logf16.h
+  DEPENDS
+    .expxf16
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.cpu_features
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
   logb
   SRCS
     logb.cpp
diff --git a/libc/src/math/generic/expxf16.h b/libc/src/math/generic/expxf16.h
index 7202b1b11319..56ed6ee7cc00 100644
--- a/libc/src/math/generic/expxf16.h
+++ b/libc/src/math/generic/expxf16.h
@@ -288,6 +288,48 @@ template <bool IsSinh> LIBC_INLINE float16 eval_sinh_or_cosh(float16 x) {
       lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum));
 }
 
+// Generated by Sollya with the following commands:
+//   > display = hexadecimal;
+//   > for i from 0 to 31 do print(round(log(1 + i * 2^-5), SG, RN));
+constexpr cpp::array<float, 32> LOGF_F = {
+    0x0p+0f,        0x1.f829bp-6f,  0x1.f0a30cp-5f, 0x1.6f0d28p-4f,
+    0x1.e27076p-4f, 0x1.29553p-3f,  0x1.5ff308p-3f, 0x1.9525aap-3f,
+    0x1.c8ff7cp-3f, 0x1.fb9186p-3f, 0x1.1675cap-2f, 0x1.2e8e2cp-2f,
+    0x1.4618bcp-2f, 0x1.5d1bdcp-2f, 0x1.739d8p-2f,  0x1.89a338p-2f,
+    0x1.9f323ep-2f, 0x1.b44f78p-2f, 0x1.c8ff7cp-2f, 0x1.dd46ap-2f,
+    0x1.f128f6p-2f, 0x1.02552ap-1f, 0x1.0be72ep-1f, 0x1.154c3ep-1f,
+    0x1.1e85f6p-1f, 0x1.2795e2p-1f, 0x1.307d74p-1f, 0x1.393e0ep-1f,
+    0x1.41d8fep-1f, 0x1.4a4f86p-1f, 0x1.52a2d2p-1f, 0x1.5ad404p-1f,
+};
+
+// Generated by Sollya with the following commands:
+//   > display = hexadecimal;
+//   > for i from 0 to 31 do print(round(log2(1 + i * 2^-5), SG, RN));
+constexpr cpp::array<float, 32> LOG2F_F = {
+    0x0p+0f,        0x1.6bad38p-5f, 0x1.663f7p-4f,  0x1.08c588p-3f,
+    0x1.5c01a4p-3f, 0x1.acf5e2p-3f, 0x1.fbc16cp-3f, 0x1.24407ap-2f,
+    0x1.49a784p-2f, 0x1.6e221cp-2f, 0x1.91bba8p-2f, 0x1.b47ecp-2f,
+    0x1.d6753ep-2f, 0x1.f7a856p-2f, 0x1.0c105p-1f,  0x1.1bf312p-1f,
+    0x1.2b8034p-1f, 0x1.3abb4p-1f,  0x1.49a784p-1f, 0x1.584822p-1f,
+    0x1.66a008p-1f, 0x1.74b1fep-1f, 0x1.82809ep-1f, 0x1.900e62p-1f,
+    0x1.9d5dap-1f,  0x1.aa709p-1f,  0x1.b74948p-1f, 0x1.c3e9cap-1f,
+    0x1.d053f6p-1f, 0x1.dc899ap-1f, 0x1.e88c6cp-1f, 0x1.f45e08p-1f,
+};
+
+// Generated by Sollya with the following commands:
+//   > display = hexadecimal;
+//   > for i from 0 to 31 do print(round(1 / (1 + i * 2^-5), SG, RN));
+constexpr cpp::array<float, 32> ONE_OVER_F_F = {
+    0x1p+0f,        0x1.f07c2p-1f,  0x1.e1e1e2p-1f, 0x1.d41d42p-1f,
+    0x1.c71c72p-1f, 0x1.bacf92p-1f, 0x1.af286cp-1f, 0x1.a41a42p-1f,
+    0x1.99999ap-1f, 0x1.8f9c18p-1f, 0x1.861862p-1f, 0x1.7d05f4p-1f,
+    0x1.745d18p-1f, 0x1.6c16c2p-1f, 0x1.642c86p-1f, 0x1.5c9882p-1f,
+    0x1.555556p-1f, 0x1.4e5e0ap-1f, 0x1.47ae14p-1f, 0x1.414142p-1f,
+    0x1.3b13b2p-1f, 0x1.3521dp-1f,  0x1.2f684cp-1f, 0x1.29e412p-1f,
+    0x1.24924ap-1f, 0x1.1f7048p-1f, 0x1.1a7b96p-1f, 0x1.15b1e6p-1f,
+    0x1.111112p-1f, 0x1.0c9714p-1f, 0x1.08421p-1f,  0x1.041042p-1f,
+};
+
 } // namespace LIBC_NAMESPACE_DECL
 
 #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
diff --git a/libc/src/math/generic/log2f16.cpp b/libc/src/math/generic/log2f16.cpp
new file mode 100644
index 000000000000..ff4e0268b53d
--- /dev/null
+++ b/libc/src/math/generic/log2f16.cpp
@@ -0,0 +1,149 @@
+//===-- Half-precision log2(x) function -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log2f16.h"
+#include "expxf16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/cpu_features.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+static constexpr size_t N_LOG2F16_EXCEPTS = 2;
+#else
+static constexpr size_t N_LOG2F16_EXCEPTS = 9;
+#endif
+
+static constexpr fputil::ExceptValues<float16, N_LOG2F16_EXCEPTS>
+    LOG2F16_EXCEPTS = {{
+// (input, RZ output, RU offset, RD offset, RN offset)
+#ifndef LIBC_TARGET_CPU_HAS_FMA
+        // x = 0x1.224p-1, log2f16(x) = -0x1.a34p-1 (RZ)
+        {0x3889U, 0xba8dU, 0U, 1U, 0U},
+        // x = 0x1.e34p-1, log2f16(x) = -0x1.558p-4 (RZ)
+        {0x3b8dU, 0xad56U, 0U, 1U, 0U},
+#endif
+        // x = 0x1.e8cp-1, log2f16(x) = -0x1.128p-4 (RZ)
+        {0x3ba3U, 0xac4aU, 0U, 1U, 0U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA
+        // x = 0x1.f98p-1, log2f16(x) = -0x1.2ep-6 (RZ)
+        {0x3be6U, 0xa4b8U, 0U, 1U, 0U},
+        // x = 0x1.facp-1, log2f16(x) = -0x1.e7p-7 (RZ)
+        {0x3bebU, 0xa39cU, 0U, 1U, 1U},
+#endif
+        // x = 0x1.fb4p-1, log2f16(x) = -0x1.b88p-7 (RZ)
+        {0x3bedU, 0xa2e2U, 0U, 1U, 1U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA
+        // x = 0x1.fecp-1, log2f16(x) = -0x1.cep-9 (RZ)
+        {0x3bfbU, 0x9b38U, 0U, 1U, 1U},
+        // x = 0x1.ffcp-1, log2f16(x) = -0x1.714p-11 (RZ)
+        {0x3bffU, 0x91c5U, 0U, 1U, 1U},
+        // x = 0x1.224p+0, log2f16(x) = 0x1.72cp-3 (RZ)
+        {0x3c89U, 0x31cbU, 1U, 0U, 1U},
+#endif
+    }};
+
+LLVM_LIBC_FUNCTION(float16, log2f16, (float16 x)) {
+  using FPBits = fputil::FPBits<float16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+
+  // If x <= 0, or x is 1, or x is +inf, or x is NaN.
+  if (LIBC_UNLIKELY(x_u == 0U || x_u == 0x3c00U || x_u >= 0x7c00U)) {
+    // log2(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // log2(+/-0) = −inf
+    if ((x_u & 0x7fffU) == 0U) {
+      fputil::raise_except_if_required(FE_DIVBYZERO);
+      return FPBits::inf(Sign::NEG).get_val();
+    }
+
+    if (x_u == 0x3c00U)
+      return FPBits::zero().get_val();
+
+    // When x < 0.
+    if (x_u > 0x8000U) {
+      fputil::set_errno_if_required(EDOM);
+      fputil::raise_except_if_required(FE_INVALID);
+      return FPBits::quiet_nan().get_val();
+    }
+
+    // log2(+inf) = +inf
+    return FPBits::inf().get_val();
+  }
+
+  if (auto r = LOG2F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+
+  // To compute log2(x), we perform the following range reduction:
+  //   x = 2^m * 1.mant,
+  //   log2(x) = m + log2(1.mant).
+  // To compute log2(1.mant), let f be the highest 6 bits including the hidden
+  // bit, and d be the difference (1.mant - f), i.e., the remaining 5 bits of
+  // the mantissa, then:
+  //   log2(1.mant) = log2(f) + log2(1.mant / f)
+  //                = log2(f) + log2(1 + d/f)
+  // since d/f is sufficiently small.
+  // We store log2(f) and 1/f in the lookup tables LOG2F_F and ONE_OVER_F_F
+  // respectively.
+
+  int m = -FPBits::EXP_BIAS;
+
+  // When x is subnormal, normalize it.
+  if ((x_u & FPBits::EXP_MASK) == 0U) {
+    // Can't pass an integer to fputil::cast directly.
+    constexpr float NORMALIZE_EXP = 1U << FPBits::FRACTION_LEN;
+    x_bits = FPBits(x_bits.get_val() * fputil::cast<float16>(NORMALIZE_EXP));
+    x_u = x_bits.uintval();
+    m -= FPBits::FRACTION_LEN;
+  }
+
+  uint16_t mant = x_bits.get_mantissa();
+  // Leading 10 - 5 = 5 bits of the mantissa.
+  int f = mant >> 5;
+  // Unbiased exponent.
+  m += x_u >> FPBits::FRACTION_LEN;
+
+  // Set bits to 1.mant instead of 2^m * 1.mant.
+  x_bits.set_biased_exponent(FPBits::EXP_BIAS);
+  float mant_f = x_bits.get_val();
+  // v = 1.mant * 1/f - 1 = d/f
+  float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f);
+
+  // Degree-3 minimax polynomial generated by Sollya with the following
+  // commands:
+  //   > display = hexadecimal;
+  //   > P = fpminimax(log2(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
+  //   > x * P;
+  float log2p1_d_over_f =
+      v * fputil::polyeval(v, 0x1.715476p+0f, -0x1.71771ap-1f, 0x1.ecb38ep-2f);
+  // log2(1.mant) = log2(f) + log2(1 + d/f)
+  float log2_1_mant = LOG2F_F[f] + log2p1_d_over_f;
+  return fputil::cast<float16>(static_cast<float>(m) + log2_1_mant);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/logf16.cpp b/libc/src/math/generic/logf16.cpp
new file mode 100644
index 000000000000..802225a81055
--- /dev/null
+++ b/libc/src/math/generic/logf16.cpp
@@ -0,0 +1,157 @@
+//===-- Half-precision log(x) function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/logf16.h"
+#include "expxf16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/cpu_features.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+static constexpr size_t N_LOGF16_EXCEPTS = 5;
+#else
+static constexpr size_t N_LOGF16_EXCEPTS = 11;
+#endif
+
+static constexpr fputil::ExceptValues<float16, N_LOGF16_EXCEPTS>
+    LOGF16_EXCEPTS = {{
+// (input, RZ output, RU offset, RD offset, RN offset)
+#ifndef LIBC_TARGET_CPU_HAS_FMA
+        // x = 0x1.61cp-13, logf16(x) = -0x1.16p+3 (RZ)
+        {0x0987U, 0xc858U, 0U, 1U, 0U},
+        // x = 0x1.f2p-12, logf16(x) = -0x1.e98p+2 (RZ)
+        {0x0fc8U, 0xc7a6U, 0U, 1U, 1U},
+#endif
+        // x = 0x1.4d4p-9, logf16(x) = -0x1.7e4p+2 (RZ)
+        {0x1935U, 0xc5f9U, 0U, 1U, 0U},
+        // x = 0x1.5ep-8, logf16(x) = -0x1.4ecp+2 (RZ)
+        {0x1d78U, 0xc53bU, 0U, 1U, 0U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA
+        // x = 0x1.fdp-1, logf16(x) = -0x1.81p-8 (RZ)
+        {0x3bf4U, 0x9e04U, 0U, 1U, 1U},
+        // x = 0x1.fep-1, logf16(x) = -0x1.008p-8 (RZ)
+        {0x3bf8U, 0x9c02U, 0U, 1U, 0U},
+#endif
+        // x = 0x1.ffp-1, logf16(x) = -0x1.004p-9 (RZ)
+        {0x3bfcU, 0x9801U, 0U, 1U, 0U},
+        // x = 0x1.ff8p-1, logf16(x) = -0x1p-10 (RZ)
+        {0x3bfeU, 0x9400U, 0U, 1U, 1U},
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+        // x = 0x1.4c4p+1, logf16(x) = 0x1.e84p-1 (RZ)
+        {0x4131U, 0x3ba1U, 1U, 0U, 1U},
+#else
+        // x = 0x1.75p+2, logf16(x) = 0x1.c34p+0 (RZ)
+        {0x45d4U, 0x3f0dU, 1U, 0U, 0U},
+        // x = 0x1.75p+2, logf16(x) = 0x1.c34p+0 (RZ)
+        {0x45d4U, 0x3f0dU, 1U, 0U, 0U},
+        // x = 0x1.d5p+9, logf16(x) = 0x1.b5cp+2 (RZ)
+        {0x6354U, 0x46d7U, 1U, 0U, 1U},
+#endif
+    }};
+
+LLVM_LIBC_FUNCTION(float16, logf16, (float16 x)) {
+  using FPBits = fputil::FPBits<float16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+
+  // If x <= 0, or x is 1, or x is +inf, or x is NaN.
+  if (LIBC_UNLIKELY(x_u == 0U || x_u == 0x3c00U || x_u >= 0x7c00U)) {
+    // log(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // log(+/-0) = −inf
+    if ((x_u & 0x7fffU) == 0U) {
+      fputil::raise_except_if_required(FE_DIVBYZERO);
+      return FPBits::inf(Sign::NEG).get_val();
+    }
+
+    if (x_u == 0x3c00U)
+      return FPBits::zero().get_val();
+
+    // When x < 0.
+    if (x_u > 0x8000U) {
+      fputil::set_errno_if_required(EDOM);
+      fputil::raise_except_if_required(FE_INVALID);
+      return FPBits::quiet_nan().get_val();
+    }
+
+    // log(+inf) = +inf
+    return FPBits::inf().get_val();
+  }
+
+  if (auto r = LOGF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+
+  // To compute log(x), we perform the following range reduction:
+  //   x = 2^m * 1.mant,
+  //   log(x) = m * log(2) + log(1.mant).
+  // To compute log(1.mant), let f be the highest 6 bits including the hidden
+  // bit, and d be the difference (1.mant - f), i.e., the remaining 5 bits of
+  // the mantissa, then:
+  //   log(1.mant) = log(f) + log(1.mant / f)
+  //               = log(f) + log(1 + d/f)
+  // since d/f is sufficiently small.
+  // We store log(f) and 1/f in the lookup tables LOGF_F and ONE_OVER_F_F
+  // respectively.
+
+  int m = -FPBits::EXP_BIAS;
+
+  // When x is subnormal, normalize it.
+  if ((x_u & FPBits::EXP_MASK) == 0U) {
+    // Can't pass an integer to fputil::cast directly.
+    constexpr float NORMALIZE_EXP = 1U << FPBits::FRACTION_LEN;
+    x_bits = FPBits(x_bits.get_val() * fputil::cast<float16>(NORMALIZE_EXP));
+    x_u = x_bits.uintval();
+    m -= FPBits::FRACTION_LEN;
+  }
+
+  uint16_t mant = x_bits.get_mantissa();
+  // Leading 10 - 5 = 5 bits of the mantissa.
+  int f = mant >> 5;
+  // Unbiased exponent.
+  m += x_u >> FPBits::FRACTION_LEN;
+
+  // Set bits to 1.mant instead of 2^m * 1.mant.
+  x_bits.set_biased_exponent(FPBits::EXP_BIAS);
+  float mant_f = x_bits.get_val();
+  // v = 1.mant * 1/f - 1 = d/f
+  float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f);
+
+  // Degree-3 minimax polynomial generated by Sollya with the following
+  // commands:
+  //   > display = hexadecimal;
+  //   > P = fpminimax(log(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
+  //   > x * P;
+  float log1p_d_over_f =
+      v * fputil::polyeval(v, 0x1p+0f, -0x1.001804p-1f, 0x1.557ef6p-2f);
+  // log(1.mant) = log(f) + log(1 + d/f)
+  float log_1_mant = LOGF_F[f] + log1p_d_over_f;
+  return fputil::cast<float16>(
+      fputil::multiply_add(static_cast<float>(m), LOGF_2, log_1_mant));
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/log2f16.h b/libc/src/math/log2f16.h
new file mode 100644
index 000000000000..d89f9f398e2a
--- /dev/null
+++ b/libc/src/math/log2f16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for log2f16 -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LOG2F16_H
+#define LLVM_LIBC_SRC_MATH_LOG2F16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 log2f16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_LOG2F16_H
diff --git a/libc/src/math/logf16.h b/libc/src/math/logf16.h
new file mode 100644
index 000000000000..e2d296b1d908
--- /dev/null
+++ b/libc/src/math/logf16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for logf16 ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LOGF16_H
+#define LLVM_LIBC_SRC_MATH_LOGF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 logf16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_LOGF16_H
diff --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp
index 136db2a3773e..ecdac52e84bb 100644
--- a/libc/src/stdio/scanf_core/int_converter.cpp
+++ b/libc/src/stdio/scanf_core/int_converter.cpp
@@ -124,13 +124,24 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
 
       if (to_lower(cur_char) == 'x') {
         // This is a valid hex prefix.
+
+        is_number = false;
+        // A valid hex prefix is not necessarily a valid number. For the
+        // conversion to be valid it needs to use all of the characters it
+        // consumes. From the standard:
+        // 7.23.6.2 paragraph 9: "An input item is defined as the longest
+        // sequence of input characters which does not exceed any specified
+        // field width and which is, or is a prefix of, a matching input
+        // sequence."
+        // 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
+        // the execution of the directive fails: this condition is a matching
+        // failure"
         base = 16;
         if (max_width > 1) {
           --max_width;
           cur_char = reader->getc();
         } else {
-          write_int_with_length(0, to_conv);
-          return READ_OK;
+          return MATCHING_FAILURE;
         }
 
       } else {
@@ -198,6 +209,9 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
   // last one back.
   reader->ungetc(cur_char);
 
+  if (!is_number)
+    return MATCHING_FAILURE;
+
   if (has_overflow) {
     write_int_with_length(MAX, to_conv);
   } else {
@@ -207,8 +221,6 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
     write_int_with_length(result, to_conv);
   }
 
-  if (!is_number)
-    return MATCHING_FAILURE;
   return READ_OK;
 }
 
diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h
index e1a33ea326ec..bdcc22ef94e7 100644
--- a/libc/test/UnitTest/FPMatcher.h
+++ b/libc/test/UnitTest/FPMatcher.h
@@ -400,4 +400,17 @@ private:
   EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_MODE(                                   \
       (expected), (actual), (expected_except), RoundingMode::TowardZero)
 
+#define EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING(expected, actual,             \
+                                                 expected_except)              \
+  do {                                                                         \
+    EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST((expected), (actual),         \
+                                                 (expected_except));           \
+    EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD((expected), (actual),          \
+                                                (expected_except));            \
+    EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD((expected), (actual),        \
+                                                  (expected_except));          \
+    EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO((expected), (actual),     \
+                                                     (expected_except));       \
+  } while (0)
+
 #endif // LLVM_LIBC_TEST_UNITTEST_FPMATCHER_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 11342e6dfa04..24a5abec898a 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1773,6 +1773,17 @@ add_fp_unittest(
 )
 
 add_fp_unittest(
+  logf16_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    logf16_test.cpp
+  DEPENDS
+    libc.src.math.logf16
+)
+
+add_fp_unittest(
 log2_test
  NEED_MPFR
  SUITE
@@ -1799,6 +1810,17 @@ add_fp_unittest(
 )
 
 add_fp_unittest(
+  log2f16_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    log2f16_test.cpp
+  DEPENDS
+    libc.src.math.log2f16
+)
+
+add_fp_unittest(
  log10_test
  NEED_MPFR
  SUITE
diff --git a/libc/test/src/math/log2f16_test.cpp b/libc/test/src/math/log2f16_test.cpp
new file mode 100644
index 000000000000..6630ca877d8d
--- /dev/null
+++ b/libc/test/src/math/log2f16_test.cpp
@@ -0,0 +1,40 @@
+//===-- Exhaustive test for log2f16 ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log2f16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+using LlvmLibcLog2f16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+// Range: [0, Inf];
+static constexpr uint16_t POS_START = 0x0000U;
+static constexpr uint16_t POS_STOP = 0x7c00U;
+
+// Range: [-Inf, 0];
+static constexpr uint16_t NEG_START = 0x8000U;
+static constexpr uint16_t NEG_STOP = 0xfc00U;
+
+TEST_F(LlvmLibcLog2f16Test, PositiveRange) {
+  for (uint16_t v = POS_START; v <= POS_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log2, x,
+                                   LIBC_NAMESPACE::log2f16(x), 0.5);
+  }
+}
+
+TEST_F(LlvmLibcLog2f16Test, NegativeRange) {
+  for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log2, x,
+                                   LIBC_NAMESPACE::log2f16(x), 0.5);
+  }
+}
diff --git a/libc/test/src/math/logf16_test.cpp b/libc/test/src/math/logf16_test.cpp
new file mode 100644
index 000000000000..922918b092b2
--- /dev/null
+++ b/libc/test/src/math/logf16_test.cpp
@@ -0,0 +1,40 @@
+//===-- Exhaustive test for logf16 ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/logf16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+using LlvmLibcLogf16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+// Range: [0, Inf];
+static constexpr uint16_t POS_START = 0x0000U;
+static constexpr uint16_t POS_STOP = 0x7c00U;
+
+// Range: [-Inf, 0];
+static constexpr uint16_t NEG_START = 0x8000U;
+static constexpr uint16_t NEG_STOP = 0xfc00U;
+
+TEST_F(LlvmLibcLogf16Test, PositiveRange) {
+  for (uint16_t v = POS_START; v <= POS_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log, x,
+                                   LIBC_NAMESPACE::logf16(x), 0.5);
+  }
+}
+
+TEST_F(LlvmLibcLogf16Test, NegativeRange) {
+  for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log, x,
+                                   LIBC_NAMESPACE::logf16(x), 0.5);
+  }
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 899c9d2df453..3c077240356b 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -3559,6 +3559,19 @@ add_fp_unittest(
 )
 
 add_fp_unittest(
+  logf16_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    logf16_test.cpp
+  DEPENDS
+    libc.hdr.fenv_macros
+    libc.src.errno.errno
+    libc.src.math.logf16
+    libc.src.__support.FPUtil.cast
+)
+
+add_fp_unittest(
   log2_test
   SUITE
     libc-math-smoke-tests
@@ -3583,6 +3596,19 @@ add_fp_unittest(
 )
 
 add_fp_unittest(
+  log2f16_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    log2f16_test.cpp
+  DEPENDS
+    libc.hdr.fenv_macros
+    libc.src.errno.errno
+    libc.src.math.log2f16
+    libc.src.__support.FPUtil.cast
+)
+
+add_fp_unittest(
   log10_test
   SUITE
     libc-math-smoke-tests
diff --git a/libc/test/src/math/smoke/log2f16_test.cpp b/libc/test/src/math/smoke/log2f16_test.cpp
new file mode 100644
index 000000000000..6d98482aa449
--- /dev/null
+++ b/libc/test/src/math/smoke/log2f16_test.cpp
@@ -0,0 +1,50 @@
+//===-- Unittests for log2f16 ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/errno/libc_errno.h"
+#include "src/math/log2f16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcLog2f16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+TEST_F(LlvmLibcLog2f16Test, SpecialNumbers) {
+  LIBC_NAMESPACE::libc_errno = 0;
+
+  EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::log2f16(aNaN));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::log2f16(sNaN), FE_INVALID);
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::log2f16(inf));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::log2f16(neg_inf));
+  EXPECT_MATH_ERRNO(EDOM);
+
+  EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING(
+      neg_inf, LIBC_NAMESPACE::log2f16(zero), FE_DIVBYZERO);
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING(
+      neg_inf, LIBC_NAMESPACE::log2f16(neg_zero), FE_DIVBYZERO);
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(
+      zero,
+      LIBC_NAMESPACE::log2f16(LIBC_NAMESPACE::fputil::cast<float16>(1.0)));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(
+      aNaN,
+      LIBC_NAMESPACE::log2f16(LIBC_NAMESPACE::fputil::cast<float16>(-1.0)));
+  EXPECT_MATH_ERRNO(EDOM);
+}
diff --git a/libc/test/src/math/smoke/logf16_test.cpp b/libc/test/src/math/smoke/logf16_test.cpp
new file mode 100644
index 000000000000..c7232aa1c1e3
--- /dev/null
+++ b/libc/test/src/math/smoke/logf16_test.cpp
@@ -0,0 +1,49 @@
+//===-- Unittests for logf16 ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/errno/libc_errno.h"
+#include "src/math/logf16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcLogf16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+TEST_F(LlvmLibcLogf16Test, SpecialNumbers) {
+  LIBC_NAMESPACE::libc_errno = 0;
+
+  EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::logf16(aNaN));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::logf16(sNaN), FE_INVALID);
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::logf16(inf));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::logf16(neg_inf));
+  EXPECT_MATH_ERRNO(EDOM);
+
+  EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING(
+      neg_inf, LIBC_NAMESPACE::logf16(zero), FE_DIVBYZERO);
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING(
+      neg_inf, LIBC_NAMESPACE::logf16(neg_zero), FE_DIVBYZERO);
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(
+      zero, LIBC_NAMESPACE::logf16(LIBC_NAMESPACE::fputil::cast<float16>(1.0)));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(
+      aNaN,
+      LIBC_NAMESPACE::logf16(LIBC_NAMESPACE::fputil::cast<float16>(-1.0)));
+  EXPECT_MATH_ERRNO(EDOM);
+}
diff --git a/libc/test/src/stdio/sscanf_test.cpp b/libc/test/src/stdio/sscanf_test.cpp
index 33bb0acba3e6..18addb632067 100644
--- a/libc/test/src/stdio/sscanf_test.cpp
+++ b/libc/test/src/stdio/sscanf_test.cpp
@@ -177,13 +177,25 @@ TEST(LlvmLibcSScanfTest, IntConvMaxLengthTests) {
   EXPECT_EQ(ret_val, 1);
   EXPECT_EQ(result, 0);
 
+  result = -999;
+
+  // 0x is a valid prefix, but not a valid number. This should be a matching
+  // failure and should not modify the values.
   ret_val = LIBC_NAMESPACE::sscanf("0x1", "%2i", &result);
-  EXPECT_EQ(ret_val, 1);
-  EXPECT_EQ(result, 0);
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, -999);
 
   ret_val = LIBC_NAMESPACE::sscanf("-0x1", "%3i", &result);
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, -999);
+
+  ret_val = LIBC_NAMESPACE::sscanf("0x1", "%3i", &result);
   EXPECT_EQ(ret_val, 1);
-  EXPECT_EQ(result, 0);
+  EXPECT_EQ(result, 1);
+
+  ret_val = LIBC_NAMESPACE::sscanf("-0x1", "%4i", &result);
+  EXPECT_EQ(ret_val, 1);
+  EXPECT_EQ(result, -1);
 
   ret_val = LIBC_NAMESPACE::sscanf("-0x123", "%4i", &result);
   EXPECT_EQ(ret_val, 1);
@@ -212,7 +224,7 @@ TEST(LlvmLibcSScanfTest, IntConvNoWriteTests) {
   EXPECT_EQ(result, 0);
 
   ret_val = LIBC_NAMESPACE::sscanf("0x1", "%*2i", &result);
-  EXPECT_EQ(ret_val, 1);
+  EXPECT_EQ(ret_val, 0);
   EXPECT_EQ(result, 0);
 
   ret_val = LIBC_NAMESPACE::sscanf("a", "%*i", &result);
@@ -679,13 +691,17 @@ TEST(LlvmLibcSScanfTest, CombinedConv) {
   EXPECT_EQ(result, 123);
   ASSERT_STREQ(buffer, "abc");
 
+  result = -1;
+
+  // 0x is a valid prefix, but not a valid number. This should be a matching
+  // failure and should not modify the values.
   ret_val = LIBC_NAMESPACE::sscanf("0xZZZ", "%i%s", &result, buffer);
-  EXPECT_EQ(ret_val, 2);
-  EXPECT_EQ(result, 0);
-  ASSERT_STREQ(buffer, "ZZZ");
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, -1);
+  ASSERT_STREQ(buffer, "abc");
 
   ret_val = LIBC_NAMESPACE::sscanf("0xZZZ", "%X%s", &result, buffer);
-  EXPECT_EQ(ret_val, 2);
-  EXPECT_EQ(result, 0);
-  ASSERT_STREQ(buffer, "ZZZ");
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, -1);
+  ASSERT_STREQ(buffer, "abc");
 }
diff --git a/lld/test/MachO/objc-category-merging-minimal.s b/lld/test/MachO/objc-category-merging-minimal.s
index 437294791bf3..88c175333f26 100644
--- a/lld/test/MachO/objc-category-merging-minimal.s
+++ b/lld/test/MachO/objc-category-merging-minimal.s
@@ -30,7 +30,7 @@
 
 ############ Test merging skipped due to invalid category name ############
 # Modify __OBJC_$_CATEGORY_MyBaseClass_$_Category01's name to point to L_OBJC_IMAGE_INFO+3
-# RUN: sed -E '/^__OBJC_\$_CATEGORY_MyBaseClass_\$_Category01:/ { n; s/^[ \t]*\.quad[ \t]+l_OBJC_CLASS_NAME_$/\t.quad\tL_OBJC_IMAGE_INFO+3/}' merge_cat_minimal.s > merge_cat_minimal_bad_name.s
+# RUN: awk '/^__OBJC_\$_CATEGORY_MyBaseClass_\$_Category01:/ { print; getline; sub(/^[ \t]*\.quad[ \t]+l_OBJC_CLASS_NAME_$/, "\t.quad\tL_OBJC_IMAGE_INFO+3"); print; next } { print }' merge_cat_minimal.s > merge_cat_minimal_bad_name.s
 
 # Assemble the modified source
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_cat_minimal_bad_name.o merge_cat_minimal_bad_name.s
diff --git a/lldb/docs/index.rst b/lldb/docs/index.rst
index b91077d66089..e2c15d872b4b 100644
--- a/lldb/docs/index.rst
+++ b/lldb/docs/index.rst
@@ -134,6 +134,7 @@ interesting areas to contribute to lldb.
    use/intel_pt
    use/ondemand
    use/aarch64-linux
+   use/symbolfilejson
    use/troubleshooting
    use/links
    Man Page <man/lldb>
diff --git a/lldb/docs/use/symbolfilejson.rst b/lldb/docs/use/symbolfilejson.rst
new file mode 100644
index 000000000000..9d15d704f357
--- /dev/null
+++ b/lldb/docs/use/symbolfilejson.rst
@@ -0,0 +1,204 @@
+JSON Symbol File Format
+=======================
+
+The JSON symbol file format encodes symbols in a text based, human readable
+format. JSON symbol files can be used to symbolicate programs that lack symbol
+information, for example because they have been stripped.
+
+Under the hood, the JSON symbol file format is also used by the crashlog
+script, specifically to provide symbol information for interactive crashlogs.
+
+Format
+------
+
+The symbol file consists of a single JSON object with the following top level
+keys:
+
+* ``triple`` (string)
+* ``uuid`` (string)
+* ``type`` (string, optional)
+* ``sections`` (array, optional)
+* ``symbols`` (array, optional)
+
+The ``triple``, ``uuid`` and ``type`` form the header and should therefore come
+first. The ``type`` field is optional. The body consists ``sections`` and
+``symbols``. Both arrays are optional, and can be omitted and are allowed to be
+empty.
+
+triple
+``````
+
+The triple is a string with the triple of the object file it corresponds to.
+The triple follows the same format as used by LLVM:
+``<arch><sub>-<vendor>-<sys>-<env>``.
+
+.. code-block:: JSON
+
+  { "triple": "arm64-apple-darwin22.0.0" }
+
+uuid
+````
+
+The UUID is a string with the textual representation of the UUID of the object
+file it corresponds to. The UUID is represented as outlined in RFC 4122: with
+32 hexadecimal digits, displayed in five groups separated by hyphens, in the
+form 8-4-4-4-12 for a total of 36 characters (32 alphanumeric characters and
+four hyphens).
+
+.. code-block:: JSON
+
+  { "uuid": "2107157B-6D7E-39F6-806D-AECDC15FC533" }
+
+type
+````
+The optional ``type`` field allows you to specify the type of object file the
+JSON file represent. This is often unnecessary, and can be omitted, in which
+case the file is considered of the type ``DebugInfo``.
+
+Valid values for the ``type`` field are:
+
+* ``corefile``: A core file that has a checkpoint of a program's execution state.
+* ``executable``: A normal executable.
+* ``debuginfo``: An object file that contains only debug information.
+* ``dynamiclinker``: The platform's dynamic linker executable.
+* ``objectfile``: An intermediate object file.
+* ``sharedlibrary``: A shared library that can be used during execution.
+* ``stublibrary``: A library that can be linked against but not used for execution.
+* ``jit``: JIT code that has symbols, sections and possibly debug info.
+
+
+sections
+````````
+
+* ``name``: a string representing the section name.
+* ``type``: a string representing the section type (see below).
+* ``address``: a number representing the section file address.
+* ``size``: a number representing the section size in bytes.
+
+.. code-block:: JSON
+
+  {
+      "name": "__TEXT",
+      "type": "code",
+      "address": 0,
+      "size": 546,
+  }
+
+The ``type`` field accepts the following values: ``code``, ``container``,
+``data``, ``debug``.
+
+symbols
+```````
+
+Symbols are JSON objects with the following keys:
+
+* ``name``: a string representing the string name.
+* ``value``: a number representing the symbol value.
+* ``address``: a number representing the symbol address in a section.
+* ``size``: a number representing the symbol size.
+* ``type``: an optional string representing the symbol type (see below).
+
+A symbol must contain either a ``value`` or an ``address``. The ``type`` is
+optional.
+
+.. code-block:: JSON
+
+  {
+      "name": "foo",
+      "type": "code",
+      "size": 10,
+      "address": 4294983544,
+  }
+
+The ``type`` field accepts any type in the ``lldb::SymbolType`` enum in
+`lldb-enumerations.h <https://lldb.llvm.org/cpp_reference/lldb-enumerations_8h.html>`_
+, without the ``eSymbolType``. For example ``code`` maps to ``eSymbolTypeCode``
+and ``variableType`` to ``eSymbolTypeVariableType``.
+
+Usage
+-----
+
+Symbol files can be added with the ``target symbol add`` command. The triple
+and UUID will be used to match it to the correct module.
+
+.. code-block:: shell
+
+  (lldb) target symbol add /path/to/symbol.json
+  symbol file '/path/to/symbol.json' has been added to '/path/to/executable'
+
+You can use ``image list`` to confirm that the symbol file has been associated
+with the module.
+
+.. code-block:: shell
+
+  (lldb) image list
+  [  0] A711AB38-1FB1-38B1-B38B-859352ED2A20 0x0000000100000000 /path/to/executable
+        /path/to/symbol.json
+  [  1] 4BF76A72-53CC-3E42-8945-4E314C101535 0x00000001800c6000 /usr/lib/dyld
+
+
+Example
+-------
+
+The simplest valid JSON symbol file consists of just a triple and UUID:
+
+.. code-block:: JSON
+
+  {
+    "triple": "arm64-apple-macosx15.0.0",
+    "uuid": "A711AB38-1FB1-38B1-B38B-859352ED2A20"
+  }
+
+A JSON symbol file with symbols for ``main``, ``foo``, and ``bar``.
+
+.. code-block:: JSON
+
+  {
+      "triple": "arm64-apple-macosx15.0.0",
+      "uuid": "321C6225-2378-3E6D-B6C1-6374DEC6D81A",
+      "symbols": [
+          {
+              "name": "main",
+              "type": "code",
+              "size": 32,
+              "address": 4294983552
+          },
+          {
+              "name": "foo",
+              "type": "code",
+              "size": 8,
+              "address": 4294983544
+          },
+          {
+              "name": "bar",
+              "type": "code",
+              "size": 0,
+              "value": 255
+          }
+      ]
+  }
+
+A symbol file with a symbol ``foo`` belonging to the ``__TEXT`` section.
+
+.. code-block:: JSON
+
+  {
+      "triple": "arm64-apple-macosx15.0.0",
+      "uuid": "58489DB0-F9FF-4E62-ABD1-A7CCE5DFB879",
+      "type": "sharedlibrary",
+      "sections": [
+          {
+              "name": "__TEXT",
+              "type": "code",
+              "address": 0,
+              "size": 546
+          }
+      ],
+      "symbols": [
+          {
+              "name": "foo",
+              "address": 256,
+              "size": 17
+          }
+      ]
+  }
diff --git a/lldb/test/API/commands/expression/ir-interpreter-phi-nodes/Makefile b/lldb/test/API/commands/expression/ir-interpreter-phi-nodes/Makefile
index d420a34c03e7..a1f689e07c77 100644
--- a/lldb/test/API/commands/expression/ir-interpreter-phi-nodes/Makefile
+++ b/lldb/test/API/commands/expression/ir-interpreter-phi-nodes/Makefile
@@ -1,4 +1,4 @@
-
-CXX_SOURCES := main.cpp
-
-include Makefile.rules
+
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/commands/settings/use_source_cache/TestUseSourceCache.py b/lldb/test/API/commands/settings/use_source_cache/TestUseSourceCache.py
index 421599080a9e..8425ab09ab9d 100644
--- a/lldb/test/API/commands/settings/use_source_cache/TestUseSourceCache.py
+++ b/lldb/test/API/commands/settings/use_source_cache/TestUseSourceCache.py
@@ -18,9 +18,8 @@ class SettingsUseSourceCacheTestCase(TestBase):
         self.set_use_source_cache_and_test(False)
 
     @skipIf(hostoslist=no_match(["windows"]))
-    @skipIf(oslist=["windows"])  # Fails on windows 11
     def test_set_use_source_cache_true(self):
-        """Test that after 'set use-source-cache false', files are locked."""
+        """Test that after 'set use-source-cache true', files are locked."""
         self.set_use_source_cache_and_test(True)
 
     def set_use_source_cache_and_test(self, is_cache_enabled):
@@ -46,23 +45,27 @@ class SettingsUseSourceCacheTestCase(TestBase):
         # Show the source file contents to make sure LLDB loads src file.
         self.runCmd("source list")
 
-        # Try deleting the source file.
-        is_file_removed = self.removeFile(src)
+        # Try overwriting the source file.
+        is_file_overwritten = self.overwriteFile(src)
 
         if is_cache_enabled:
             self.assertFalse(
-                is_file_removed, "Source cache is enabled, but delete file succeeded"
+                is_file_overwritten,
+                "Source cache is enabled, but writing to file succeeded",
             )
 
         if not is_cache_enabled:
             self.assertTrue(
-                is_file_removed, "Source cache is disabled, but delete file failed"
+                is_file_overwritten,
+                "Source cache is disabled, but writing to file failed",
             )
 
-    def removeFile(self, src):
-        """Remove file and return true iff file was successfully removed."""
+    def overwriteFile(self, src):
+        """Write to file and return true iff file was successfully written."""
         try:
-            os.remove(src)
+            f = open(src, "w")
+            f.writelines(["// hello world\n"])
+            f.close()
             return True
         except Exception:
             return False
diff --git a/lldb/test/API/functionalities/postmortem/minidump/fizzbuzz.syms b/lldb/test/API/functionalities/postmortem/minidump/fizzbuzz.syms
index e817a491af57..cab06c1c9d50 100644
--- a/lldb/test/API/functionalities/postmortem/minidump/fizzbuzz.syms
+++ b/lldb/test/API/functionalities/postmortem/minidump/fizzbuzz.syms
@@ -1,2 +1,2 @@
-MODULE windows x86 0F45B7919A9646F9BF8F2D6076EA421A11 fizzbuzz.pdb
-PUBLIC 1000 0 main
+MODULE windows x86 0F45B7919A9646F9BF8F2D6076EA421A11 fizzbuzz.pdb
+PUBLIC 1000 0 main
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/Makefile b/lldb/test/API/functionalities/target-new-solib-notifications/Makefile
index 745f6cc9d65a..e3b48697fd78 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/Makefile
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/Makefile
@@ -1,23 +1,23 @@
-CXX_SOURCES := main.cpp
-LD_EXTRAS := -L. -l_d -l_c -l_a -l_b
-
-a.out: lib_b lib_a lib_c lib_d
-
-include Makefile.rules
-
-lib_a: lib_b
-	"$(MAKE)" -f $(MAKEFILE_RULES) \
-		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=a.cpp DYLIB_NAME=_a \
-		LD_EXTRAS="-L. -l_b"
-
-lib_b:
-	"$(MAKE)" -f $(MAKEFILE_RULES) \
-		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=b.cpp DYLIB_NAME=_b
-
-lib_c:
-	"$(MAKE)" -f $(MAKEFILE_RULES) \
-		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=c.cpp DYLIB_NAME=_c
-
-lib_d:
-	"$(MAKE)" -f $(MAKEFILE_RULES) \
-		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=d.cpp DYLIB_NAME=_d
+CXX_SOURCES := main.cpp
+LD_EXTRAS := -L. -l_d -l_c -l_a -l_b
+
+a.out: lib_b lib_a lib_c lib_d
+
+include Makefile.rules
+
+lib_a: lib_b
+	"$(MAKE)" -f $(MAKEFILE_RULES) \
+		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=a.cpp DYLIB_NAME=_a \
+		LD_EXTRAS="-L. -l_b"
+
+lib_b:
+	"$(MAKE)" -f $(MAKEFILE_RULES) \
+		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=b.cpp DYLIB_NAME=_b
+
+lib_c:
+	"$(MAKE)" -f $(MAKEFILE_RULES) \
+		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=c.cpp DYLIB_NAME=_c
+
+lib_d:
+	"$(MAKE)" -f $(MAKEFILE_RULES) \
+		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=d.cpp DYLIB_NAME=_d
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp
index 66633b70ee1e..778b46ed5cef 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp
@@ -1,3 +1,3 @@
-extern "C" int b_function();
-
-extern "C" int a_function() { return b_function(); }
+extern "C" int b_function();
+
+extern "C" int a_function() { return b_function(); }
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp
index 8b16fbdb5728..4f1a4032ee0e 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp
@@ -1 +1 @@
-extern "C" int b_function() { return 500; }
+extern "C" int b_function() { return 500; }
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp
index 120c88f2bb60..8abd1b155a75 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp
@@ -1 +1 @@
-extern "C" int c_function() { return 600; }
+extern "C" int c_function() { return 600; }
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp
index d37ad2621ae4..58888a29ba32 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp
@@ -1 +1 @@
-extern "C" int d_function() { return 700; }
+extern "C" int d_function() { return 700; }
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp
index bd2c79cdab9d..77b38c5ccdc6 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp
@@ -1,16 +1,16 @@
-#include <stdio.h>
-
-extern "C" int a_function();
-extern "C" int c_function();
-extern "C" int b_function();
-extern "C" int d_function();
-
-int main() {
-  a_function();
-  b_function();
-  c_function();
-  d_function();
-
-  puts("running"); // breakpoint here
-  return 0;
-}
+#include <stdio.h>
+
+extern "C" int a_function();
+extern "C" int c_function();
+extern "C" int b_function();
+extern "C" int d_function();
+
+int main() {
+  a_function();
+  b_function();
+  c_function();
+  d_function();
+
+  puts("running"); // breakpoint here
+  return 0;
+}
diff --git a/lldb/test/API/functionalities/unwind/zeroth_frame/Makefile b/lldb/test/API/functionalities/unwind/zeroth_frame/Makefile
index 10495940055b..15a931850e17 100644
--- a/lldb/test/API/functionalities/unwind/zeroth_frame/Makefile
+++ b/lldb/test/API/functionalities/unwind/zeroth_frame/Makefile
@@ -1,3 +1,3 @@
-C_SOURCES := main.c
-
-include Makefile.rules
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py b/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py
index 70f72c72c834..d660844405e1 100644
--- a/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py
+++ b/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py
@@ -1,88 +1,88 @@
-"""
-Test that line information is recalculated properly for a frame when it moves
-from the middle of the backtrace to a zero index.
-
-This is a regression test for a StackFrame bug, where whether frame is zero or
-not depends on an internal field. When LLDB was updating its frame list value
-of the field wasn't copied into existing StackFrame instances, so those
-StackFrame instances, would use an incorrect line entry evaluation logic in
-situations if it was in the middle of the stack frame list (not zeroth), and
-then moved to the top position. The difference in logic is that for zeroth
-frames line entry is returned for program counter, while for other frame
-(except for those that "behave like zeroth") it is for the instruction
-preceding PC, as PC points to the next instruction after function call. When
-the bug is present, when execution stops at the second breakpoint
-SBFrame.GetLineEntry() returns line entry for the previous line, rather than
-the one with a breakpoint. Note that this is specific to
-SBFrame.GetLineEntry(), SBFrame.GetPCAddress().GetLineEntry() would return
-correct entry.
-
-This bug doesn't reproduce through an LLDB interpretator, however it happens
-when using API directly, for example in LLDB-MI.
-"""
-
-import lldb
-from lldbsuite.test.decorators import *
-from lldbsuite.test.lldbtest import *
-from lldbsuite.test import lldbutil
-
-
-class ZerothFrame(TestBase):
-    def test(self):
-        """
-        Test that line information is recalculated properly for a frame when it moves
-        from the middle of the backtrace to a zero index.
-        """
-        self.build()
-        self.setTearDownCleanup()
-
-        exe = self.getBuildArtifact("a.out")
-        target = self.dbg.CreateTarget(exe)
-        self.assertTrue(target, VALID_TARGET)
-
-        main_dot_c = lldb.SBFileSpec("main.c")
-        bp1 = target.BreakpointCreateBySourceRegex(
-            "// Set breakpoint 1 here", main_dot_c
-        )
-        bp2 = target.BreakpointCreateBySourceRegex(
-            "// Set breakpoint 2 here", main_dot_c
-        )
-
-        process = target.LaunchSimple(None, None, self.get_process_working_directory())
-        self.assertTrue(process, VALID_PROCESS)
-
-        thread = self.thread()
-
-        if self.TraceOn():
-            print("Backtrace at the first breakpoint:")
-            for f in thread.frames:
-                print(f)
-
-        # Check that we have stopped at correct breakpoint.
-        self.assertEqual(
-            thread.frame[0].GetLineEntry().GetLine(),
-            bp1.GetLocationAtIndex(0).GetAddress().GetLineEntry().GetLine(),
-            "LLDB reported incorrect line number.",
-        )
-
-        # Important to use SBProcess::Continue() instead of
-        # self.runCmd('continue'), because the problem doesn't reproduce with
-        # 'continue' command.
-        process.Continue()
-
-        if self.TraceOn():
-            print("Backtrace at the second breakpoint:")
-            for f in thread.frames:
-                print(f)
-        # Check that we have stopped at the breakpoint
-        self.assertEqual(
-            thread.frame[0].GetLineEntry().GetLine(),
-            bp2.GetLocationAtIndex(0).GetAddress().GetLineEntry().GetLine(),
-            "LLDB reported incorrect line number.",
-        )
-        # Double-check with GetPCAddress()
-        self.assertEqual(
-            thread.frame[0].GetLineEntry().GetLine(),
-            thread.frame[0].GetPCAddress().GetLineEntry().GetLine(),
-            "LLDB reported incorrect line number.",
-        )
+"""
+Test that line information is recalculated properly for a frame when it moves
+from the middle of the backtrace to a zero index.
+
+This is a regression test for a StackFrame bug, where whether frame is zero or
+not depends on an internal field. When LLDB was updating its frame list value
+of the field wasn't copied into existing StackFrame instances, so those
+StackFrame instances, would use an incorrect line entry evaluation logic in
+situations if it was in the middle of the stack frame list (not zeroth), and
+then moved to the top position. The difference in logic is that for zeroth
+frames line entry is returned for program counter, while for other frame
+(except for those that "behave like zeroth") it is for the instruction
+preceding PC, as PC points to the next instruction after function call. When
+the bug is present, when execution stops at the second breakpoint
+SBFrame.GetLineEntry() returns line entry for the previous line, rather than
+the one with a breakpoint. Note that this is specific to
+SBFrame.GetLineEntry(), SBFrame.GetPCAddress().GetLineEntry() would return
+correct entry.
+
+This bug doesn't reproduce through an LLDB interpretator, however it happens
+when using API directly, for example in LLDB-MI.
+"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class ZerothFrame(TestBase):
+    def test(self):
+        """
+        Test that line information is recalculated properly for a frame when it moves
+        from the middle of the backtrace to a zero index.
+        """
+        self.build()
+        self.setTearDownCleanup()
+
+        exe = self.getBuildArtifact("a.out")
+        target = self.dbg.CreateTarget(exe)
+        self.assertTrue(target, VALID_TARGET)
+
+        main_dot_c = lldb.SBFileSpec("main.c")
+        bp1 = target.BreakpointCreateBySourceRegex(
+            "// Set breakpoint 1 here", main_dot_c
+        )
+        bp2 = target.BreakpointCreateBySourceRegex(
+            "// Set breakpoint 2 here", main_dot_c
+        )
+
+        process = target.LaunchSimple(None, None, self.get_process_working_directory())
+        self.assertTrue(process, VALID_PROCESS)
+
+        thread = self.thread()
+
+        if self.TraceOn():
+            print("Backtrace at the first breakpoint:")
+            for f in thread.frames:
+                print(f)
+
+        # Check that we have stopped at correct breakpoint.
+        self.assertEqual(
+            thread.frame[0].GetLineEntry().GetLine(),
+            bp1.GetLocationAtIndex(0).GetAddress().GetLineEntry().GetLine(),
+            "LLDB reported incorrect line number.",
+        )
+
+        # Important to use SBProcess::Continue() instead of
+        # self.runCmd('continue'), because the problem doesn't reproduce with
+        # 'continue' command.
+        process.Continue()
+
+        if self.TraceOn():
+            print("Backtrace at the second breakpoint:")
+            for f in thread.frames:
+                print(f)
+        # Check that we have stopped at the breakpoint
+        self.assertEqual(
+            thread.frame[0].GetLineEntry().GetLine(),
+            bp2.GetLocationAtIndex(0).GetAddress().GetLineEntry().GetLine(),
+            "LLDB reported incorrect line number.",
+        )
+        # Double-check with GetPCAddress()
+        self.assertEqual(
+            thread.frame[0].GetLineEntry().GetLine(),
+            thread.frame[0].GetPCAddress().GetLineEntry().GetLine(),
+            "LLDB reported incorrect line number.",
+        )
diff --git a/lldb/test/API/python_api/debugger/Makefile b/lldb/test/API/python_api/debugger/Makefile
index 99998b20bcb0..bfad5f33e867 100644
--- a/lldb/test/API/python_api/debugger/Makefile
+++ b/lldb/test/API/python_api/debugger/Makefile
@@ -1,3 +1,3 @@
-CXX_SOURCES := main.cpp
-
-include Makefile.rules
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py b/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py
index 9ab4619b1f8f..04e807c5c620 100644
--- a/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py
+++ b/lldb/test/API/python_api/find_in_memory/TestFindInMemory.py
@@ -55,7 +55,7 @@ class FindInMemoryTestCase(TestBase):
         error = lldb.SBError()
         addr = self.process.FindInMemory(
             SINGLE_INSTANCE_PATTERN_STACK,
-            GetStackRange(self),
+            GetStackRange(self, True),
             1,
             error,
         )
@@ -70,7 +70,7 @@ class FindInMemoryTestCase(TestBase):
         error = lldb.SBError()
         addr = self.process.FindInMemory(
             DOUBLE_INSTANCE_PATTERN_HEAP,
-            GetHeapRanges(self)[0],
+            GetHeapRanges(self, True)[0],
             1,
             error,
         )
@@ -86,7 +86,7 @@ class FindInMemoryTestCase(TestBase):
         error = lldb.SBError()
         addr = self.process.FindInMemory(
             SINGLE_INSTANCE_PATTERN_STACK,
-            GetStackRange(self),
+            GetStackRange(self, True),
             0,
             error,
         )
@@ -118,7 +118,7 @@ class FindInMemoryTestCase(TestBase):
         error = lldb.SBError()
         addr = self.process.FindInMemory(
             "",
-            GetStackRange(self),
+            GetStackRange(self, True),
             1,
             error,
         )
@@ -131,7 +131,7 @@ class FindInMemoryTestCase(TestBase):
         self.assertTrue(self.process, PROCESS_IS_VALID)
         self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
         error = lldb.SBError()
-        range = GetAlignedRange(self)
+        range = GetAlignedRange(self, True)
 
         # First we make sure the pattern is found with alignment 1
         addr = self.process.FindInMemory(
diff --git a/lldb/test/API/python_api/find_in_memory/TestFindRangesInMemory.py b/lldb/test/API/python_api/find_in_memory/TestFindRangesInMemory.py
index 31bc0e99f491..895c527430f2 100644
--- a/lldb/test/API/python_api/find_in_memory/TestFindRangesInMemory.py
+++ b/lldb/test/API/python_api/find_in_memory/TestFindRangesInMemory.py
@@ -30,7 +30,7 @@ class FindRangesInMemoryTestCase(TestBase):
         self.assertTrue(self.process, PROCESS_IS_VALID)
         self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
 
-        addr_ranges = GetHeapRanges(self)
+        addr_ranges = GetHeapRanges(self, True)
         error = lldb.SBError()
         matches = self.process.FindRangesInMemory(
             DOUBLE_INSTANCE_PATTERN_HEAP,
@@ -48,7 +48,7 @@ class FindRangesInMemoryTestCase(TestBase):
         self.assertTrue(self.process, PROCESS_IS_VALID)
         self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
 
-        addr_ranges = GetStackRanges(self)
+        addr_ranges = GetStackRanges(self, True)
         error = lldb.SBError()
         matches = self.process.FindRangesInMemory(
             SINGLE_INSTANCE_PATTERN_STACK,
@@ -66,7 +66,7 @@ class FindRangesInMemoryTestCase(TestBase):
         self.assertTrue(self.process, PROCESS_IS_VALID)
         self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
 
-        addr_ranges = GetRanges(self)
+        addr_ranges = GetRanges(self, True)
         addr_ranges.Append(lldb.SBAddressRange())
         self.assertGreater(addr_ranges.GetSize(), 2)
         error = lldb.SBError()
@@ -86,7 +86,7 @@ class FindRangesInMemoryTestCase(TestBase):
         self.assertTrue(self.process, PROCESS_IS_VALID)
         self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
 
-        addr_ranges = GetHeapRanges(self)
+        addr_ranges = GetHeapRanges(self, True)
         error = lldb.SBError()
         matches = self.process.FindRangesInMemory(
             DOUBLE_INSTANCE_PATTERN_HEAP,
@@ -104,7 +104,7 @@ class FindRangesInMemoryTestCase(TestBase):
         self.assertTrue(self.process, PROCESS_IS_VALID)
         self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
 
-        addr_ranges = GetHeapRanges(self)
+        addr_ranges = GetHeapRanges(self, True)
         error = lldb.SBError()
         matches = self.process.FindRangesInMemory(
             DOUBLE_INSTANCE_PATTERN_HEAP,
@@ -160,7 +160,7 @@ class FindRangesInMemoryTestCase(TestBase):
         self.assertTrue(self.process, PROCESS_IS_VALID)
         self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
 
-        addr_ranges = GetHeapRanges(self)
+        addr_ranges = GetHeapRanges(self, True)
         error = lldb.SBError()
         matches = self.process.FindRangesInMemory(
             "",
@@ -178,7 +178,7 @@ class FindRangesInMemoryTestCase(TestBase):
         self.assertTrue(self.process, PROCESS_IS_VALID)
         self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
 
-        addr_ranges = GetHeapRanges(self)
+        addr_ranges = GetHeapRanges(self, True)
         error = lldb.SBError()
         matches = self.process.FindRangesInMemory(
             DOUBLE_INSTANCE_PATTERN_HEAP,
@@ -197,7 +197,7 @@ class FindRangesInMemoryTestCase(TestBase):
         self.assertState(self.process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
 
         addr_ranges = lldb.SBAddressRangeList()
-        addr_ranges.Append(GetAlignedRange(self))
+        addr_ranges.Append(GetAlignedRange(self, True))
         error = lldb.SBError()
 
         matches = self.process.FindRangesInMemory(
diff --git a/lldb/test/API/python_api/find_in_memory/address_ranges_helper.py b/lldb/test/API/python_api/find_in_memory/address_ranges_helper.py
index 810fb9fee386..dcceca6d8a5c 100644
--- a/lldb/test/API/python_api/find_in_memory/address_ranges_helper.py
+++ b/lldb/test/API/python_api/find_in_memory/address_ranges_helper.py
@@ -6,27 +6,30 @@ ALIGNED_INSTANCE_PATTERN_HEAP = "i_am_unaligned_string_on_the_heap"
 UNALIGNED_INSTANCE_PATTERN_HEAP = ALIGNED_INSTANCE_PATTERN_HEAP[1:]
 
 
-def GetAlignedRange(test_base):
+def GetAlignedRange(test_base, shrink=False):
     frame = test_base.thread.GetSelectedFrame()
     ex = frame.EvaluateExpression("aligned_string_ptr")
     test_base.assertTrue(ex.IsValid())
-    return GetRangeFromAddrValue(test_base, ex)
+    return GetRangeFromAddrValue(test_base, ex, shrink)
 
 
-def GetStackRange(test_base):
+def GetStackRange(test_base, shrink=False):
     frame = test_base.thread.GetSelectedFrame()
     ex = frame.EvaluateExpression("&stack_pointer")
     test_base.assertTrue(ex.IsValid())
-    return GetRangeFromAddrValue(test_base, ex)
+    return GetRangeFromAddrValue(test_base, ex, shrink)
 
 
-def GetStackRanges(test_base):
+def GetStackRanges(test_base, shrink=False):
     addr_ranges = lldb.SBAddressRangeList()
     addr_ranges.Append(GetStackRange(test_base))
     return addr_ranges
 
 
-def GetRangeFromAddrValue(test_base, addr):
+def GetRangeFromAddrValue(test_base, addr, shrink=False):
+    """Returns a memory region containing 'addr'.
+    If 'shrink' is True, the address range will be reduced to not exceed 2K.
+    """
     region = lldb.SBMemoryRegionInfo()
     test_base.assertTrue(
         test_base.process.GetMemoryRegionInfo(
@@ -37,37 +40,48 @@ def GetRangeFromAddrValue(test_base, addr):
     test_base.assertTrue(region.IsReadable())
     test_base.assertFalse(region.IsExecutable())
 
-    address_start = lldb.SBAddress(region.GetRegionBase(), test_base.target)
-    stack_size = region.GetRegionEnd() - region.GetRegionBase()
-    return lldb.SBAddressRange(address_start, stack_size)
+    base = region.GetRegionBase()
+    end = region.GetRegionEnd()
 
+    if shrink:
+        addr2 = addr.GetValueAsUnsigned()
+        addr2 -= addr2 % 512
+        base = max(base, addr2 - 1024)
+        end = min(end, addr2 + 1024)
 
-def IsWithinRange(addr, range, target):
+    start = lldb.SBAddress(base, test_base.target)
+    size = end - base
+
+    return lldb.SBAddressRange(start, size)
+
+
+def IsWithinRange(addr, size, range, target):
     start_addr = range.GetBaseAddress().GetLoadAddress(target)
     end_addr = start_addr + range.GetByteSize()
     addr = addr.GetValueAsUnsigned()
-    return addr >= start_addr and addr < end_addr
+    return addr >= start_addr and addr + size <= end_addr
 
 
-def GetHeapRanges(test_base):
+def GetHeapRanges(test_base, shrink=False):
     frame = test_base.thread.GetSelectedFrame()
 
     ex = frame.EvaluateExpression("heap_pointer1")
     test_base.assertTrue(ex.IsValid())
-    range = GetRangeFromAddrValue(test_base, ex)
+    range = GetRangeFromAddrValue(test_base, ex, shrink)
     addr_ranges = lldb.SBAddressRangeList()
     addr_ranges.Append(range)
 
     ex = frame.EvaluateExpression("heap_pointer2")
     test_base.assertTrue(ex.IsValid())
-    if not IsWithinRange(ex, addr_ranges[0], test_base.target):
-        addr_ranges.Append(GetRangeFromAddrValue(test_base, ex))
+    size = len(DOUBLE_INSTANCE_PATTERN_HEAP)
+    if not IsWithinRange(ex, size, addr_ranges[0], test_base.target):
+        addr_ranges.Append(GetRangeFromAddrValue(test_base, ex, shrink))
 
     return addr_ranges
 
 
-def GetRanges(test_base):
-    ranges = GetHeapRanges(test_base)
-    ranges.Append(GetStackRanges(test_base))
+def GetRanges(test_base, shrink=False):
+    ranges = GetHeapRanges(test_base, shrink)
+    ranges.Append(GetStackRanges(test_base, shrink))
 
     return ranges
diff --git a/lldb/test/Shell/BuildScript/modes.test b/lldb/test/Shell/BuildScript/modes.test
index 1ce50104855f..02311f712d77 100644
--- a/lldb/test/Shell/BuildScript/modes.test
+++ b/lldb/test/Shell/BuildScript/modes.test
@@ -1,35 +1,35 @@
-RUN: %build -n --verbose --arch=32 --mode=compile --compiler=any -o %t/foo.out foobar.c \
-RUN:    | FileCheck --check-prefix=COMPILE %s
-
-RUN: %build -n --verbose --arch=32 --mode=compile --compiler=any --outdir %t foo.c bar.c \
-RUN:    | FileCheck --check-prefix=COMPILE-MULTI %s
-
-RUN: %build -n --verbose --arch=32 --mode=link --compiler=any -o %t/foo.exe foobar.obj \
-RUN:    | FileCheck --check-prefix=LINK %s
-
-RUN: %build -n --verbose --arch=32 --mode=link --compiler=any -o %t/foobar.exe foo.obj bar.obj \
-RUN:    | FileCheck --check-prefix=LINK-MULTI %s
-
-RUN: %build -n --verbose --arch=32 --mode=compile-and-link --compiler=any -o %t/foobar.exe foobar.c \
-RUN:    | FileCheck --check-prefix=BOTH %s
-
-RUN: %build -n --verbose --arch=32 --mode=compile-and-link --compiler=any -o %t/foobar.exe foo.c bar.c \
-RUN:    | FileCheck --check-prefix=BOTH-MULTI %s
-
-
-COMPILE: compiling foobar.c -> foo.out
-
-COMPILE-MULTI: compiling foo.c -> foo.o{{(bj)?}}
-COMPILE-MULTI: compiling bar.c -> bar.o{{(bj)?}}
-
-
-LINK: linking foobar.obj -> foo.exe
-
-LINK-MULTI: linking foo.obj+bar.obj -> foobar.exe
-
-BOTH: compiling foobar.c -> [[OBJFOO:foobar.exe-foobar.o(bj)?]]
-BOTH: linking [[OBJFOO]] -> foobar.exe
-
-BOTH-MULTI: compiling foo.c -> [[OBJFOO:foobar.exe-foo.o(bj)?]]
-BOTH-MULTI: compiling bar.c -> [[OBJBAR:foobar.exe-bar.o(bj)?]]
-BOTH-MULTI: linking [[OBJFOO]]+[[OBJBAR]] -> foobar.exe
+RUN: %build -n --verbose --arch=32 --mode=compile --compiler=any -o %t/foo.out foobar.c \
+RUN:    | FileCheck --check-prefix=COMPILE %s
+
+RUN: %build -n --verbose --arch=32 --mode=compile --compiler=any --outdir %t foo.c bar.c \
+RUN:    | FileCheck --check-prefix=COMPILE-MULTI %s
+
+RUN: %build -n --verbose --arch=32 --mode=link --compiler=any -o %t/foo.exe foobar.obj \
+RUN:    | FileCheck --check-prefix=LINK %s
+
+RUN: %build -n --verbose --arch=32 --mode=link --compiler=any -o %t/foobar.exe foo.obj bar.obj \
+RUN:    | FileCheck --check-prefix=LINK-MULTI %s
+
+RUN: %build -n --verbose --arch=32 --mode=compile-and-link --compiler=any -o %t/foobar.exe foobar.c \
+RUN:    | FileCheck --check-prefix=BOTH %s
+
+RUN: %build -n --verbose --arch=32 --mode=compile-and-link --compiler=any -o %t/foobar.exe foo.c bar.c \
+RUN:    | FileCheck --check-prefix=BOTH-MULTI %s
+
+
+COMPILE: compiling foobar.c -> foo.out
+
+COMPILE-MULTI: compiling foo.c -> foo.o{{(bj)?}}
+COMPILE-MULTI: compiling bar.c -> bar.o{{(bj)?}}
+
+
+LINK: linking foobar.obj -> foo.exe
+
+LINK-MULTI: linking foo.obj+bar.obj -> foobar.exe
+
+BOTH: compiling foobar.c -> [[OBJFOO:foobar.exe-foobar.o(bj)?]]
+BOTH: linking [[OBJFOO]] -> foobar.exe
+
+BOTH-MULTI: compiling foo.c -> [[OBJFOO:foobar.exe-foo.o(bj)?]]
+BOTH-MULTI: compiling bar.c -> [[OBJBAR:foobar.exe-bar.o(bj)?]]
+BOTH-MULTI: linking [[OBJFOO]]+[[OBJBAR]] -> foobar.exe
diff --git a/lldb/test/Shell/BuildScript/script-args.test b/lldb/test/Shell/BuildScript/script-args.test
index 647a48e4442b..13e8a5160942 100644
--- a/lldb/test/Shell/BuildScript/script-args.test
+++ b/lldb/test/Shell/BuildScript/script-args.test
@@ -1,32 +1,32 @@
-RUN: %build -n --verbose --arch=32 --mode=compile --compiler=any -o %t/foo.out foobar.c \
-RUN:    | FileCheck %s
-RUN: %build -n --verbose --arch=32 --mode=compile --compiler=any --outdir %t foo.c bar.c \
-RUN:    | FileCheck --check-prefix=MULTI-INPUT %s
-
-
-CHECK:      Script Arguments:
-CHECK-NEXT:   Arch: 32
-CHECK:        Compiler: any
-CHECK:        Outdir: {{.*}}script-args.test.tmp
-CHECK:        Output: {{.*}}script-args.test.tmp{{.}}foo.out
-CHECK:        Nodefaultlib: False
-CHECK:        Opt: none
-CHECK:        Mode: compile
-CHECK:        Clean: True
-CHECK:        Verbose: True
-CHECK:        Dryrun: True
-CHECK:        Inputs: foobar.c
-
-MULTI-INPUT:      Script Arguments:
-MULTI-INPUT-NEXT:   Arch: 32
-MULTI-INPUT-NEXT:   Compiler: any
-MULTI-INPUT-NEXT:   Outdir: {{.*}}script-args.test.tmp
-MULTI-INPUT-NEXT:   Output: 
-MULTI-INPUT-NEXT:   Nodefaultlib: False
-MULTI-INPUT-NEXT:   Opt: none
-MULTI-INPUT-NEXT:   Mode: compile
-MULTI-INPUT-NEXT:   Clean: True
-MULTI-INPUT-NEXT:   Verbose: True
-MULTI-INPUT-NEXT:   Dryrun: True
-MULTI-INPUT-NEXT:   Inputs: foo.c
-MULTI-INPUT-NEXT:           bar.c
+RUN: %build -n --verbose --arch=32 --mode=compile --compiler=any -o %t/foo.out foobar.c \
+RUN:    | FileCheck %s
+RUN: %build -n --verbose --arch=32 --mode=compile --compiler=any --outdir %t foo.c bar.c \
+RUN:    | FileCheck --check-prefix=MULTI-INPUT %s
+
+
+CHECK:      Script Arguments:
+CHECK-NEXT:   Arch: 32
+CHECK:        Compiler: any
+CHECK:        Outdir: {{.*}}script-args.test.tmp
+CHECK:        Output: {{.*}}script-args.test.tmp{{.}}foo.out
+CHECK:        Nodefaultlib: False
+CHECK:        Opt: none
+CHECK:        Mode: compile
+CHECK:        Clean: True
+CHECK:        Verbose: True
+CHECK:        Dryrun: True
+CHECK:        Inputs: foobar.c
+
+MULTI-INPUT:      Script Arguments:
+MULTI-INPUT-NEXT:   Arch: 32
+MULTI-INPUT-NEXT:   Compiler: any
+MULTI-INPUT-NEXT:   Outdir: {{.*}}script-args.test.tmp
+MULTI-INPUT-NEXT:   Output: 
+MULTI-INPUT-NEXT:   Nodefaultlib: False
+MULTI-INPUT-NEXT:   Opt: none
+MULTI-INPUT-NEXT:   Mode: compile
+MULTI-INPUT-NEXT:   Clean: True
+MULTI-INPUT-NEXT:   Verbose: True
+MULTI-INPUT-NEXT:   Dryrun: True
+MULTI-INPUT-NEXT:   Inputs: foo.c
+MULTI-INPUT-NEXT:           bar.c
diff --git a/lldb/test/Shell/BuildScript/toolchain-clang-cl.test b/lldb/test/Shell/BuildScript/toolchain-clang-cl.test
index 4f64859a02b6..8c9ea9fddb8a 100644
--- a/lldb/test/Shell/BuildScript/toolchain-clang-cl.test
+++ b/lldb/test/Shell/BuildScript/toolchain-clang-cl.test
@@ -1,49 +1,49 @@
-REQUIRES: lld, system-windows
-
-RUN: %build -n --verbose --arch=32 --compiler=clang-cl --mode=compile-and-link -o %t/foo.exe foobar.c \
-RUN:    | FileCheck --check-prefix=CHECK-32 %s
-
-RUN: %build -n --verbose --arch=64 --compiler=clang-cl --mode=compile-and-link -o %t/foo.exe foobar.c \
-RUN:    | FileCheck --check-prefix=CHECK-64 %s
-
-CHECK-32: Script Arguments:
-CHECK-32:   Arch: 32
-CHECK-32:   Compiler: clang-cl
-CHECK-32:   Outdir: {{.*}}
-CHECK-32:   Output: {{.*}}toolchain-clang-cl.test.tmp\foo.exe
-CHECK-32:   Nodefaultlib: False
-CHECK-32:   Opt: none
-CHECK-32:   Mode: compile
-CHECK-32:   Clean: True
-CHECK-32:   Verbose: True
-CHECK-32:   Dryrun: True
-CHECK-32:   Inputs: foobar.c
-CHECK-32: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foobar.ilk
-CHECK-32: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.exe-foobar.obj
-CHECK-32: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.pdb
-CHECK-32: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.exe
-CHECK-32: compiling foobar.c -> foo.exe-foobar.obj
-CHECK-32: {{.*}}clang-cl{{(\.EXE)?}} -m32
-CHECK-32: linking foo.exe-foobar.obj -> foo.exe
-CHECK-32: {{.*}}lld-link{{(\.EXE)?}}
-
-CHECK-64: Script Arguments:
-CHECK-64:   Arch: 64
-CHECK-64:   Compiler: clang-cl
-CHECK-64:   Outdir: {{.*}}
-CHECK-64:   Output: {{.*}}toolchain-clang-cl.test.tmp\foo.exe
-CHECK-64:   Nodefaultlib: False
-CHECK-64:   Opt: none
-CHECK-64:   Mode: compile
-CHECK-64:   Clean: True
-CHECK-64:   Verbose: True
-CHECK-64:   Dryrun: True
-CHECK-64:   Inputs: foobar.c
-CHECK-64: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foobar.ilk
-CHECK-64: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.exe-foobar.obj
-CHECK-64: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.pdb
-CHECK-64: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.exe
-CHECK-64: compiling foobar.c -> foo.exe-foobar.obj
-CHECK-64: {{.*}}clang-cl{{(\.EXE)?}} -m64
-CHECK-64: linking foo.exe-foobar.obj -> foo.exe
-CHECK-64: {{.*}}lld-link{{(\.EXE)?}}
+REQUIRES: lld, system-windows
+
+RUN: %build -n --verbose --arch=32 --compiler=clang-cl --mode=compile-and-link -o %t/foo.exe foobar.c \
+RUN:    | FileCheck --check-prefix=CHECK-32 %s
+
+RUN: %build -n --verbose --arch=64 --compiler=clang-cl --mode=compile-and-link -o %t/foo.exe foobar.c \
+RUN:    | FileCheck --check-prefix=CHECK-64 %s
+
+CHECK-32: Script Arguments:
+CHECK-32:   Arch: 32
+CHECK-32:   Compiler: clang-cl
+CHECK-32:   Outdir: {{.*}}
+CHECK-32:   Output: {{.*}}toolchain-clang-cl.test.tmp\foo.exe
+CHECK-32:   Nodefaultlib: False
+CHECK-32:   Opt: none
+CHECK-32:   Mode: compile
+CHECK-32:   Clean: True
+CHECK-32:   Verbose: True
+CHECK-32:   Dryrun: True
+CHECK-32:   Inputs: foobar.c
+CHECK-32: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foobar.ilk
+CHECK-32: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.exe-foobar.obj
+CHECK-32: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.pdb
+CHECK-32: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.exe
+CHECK-32: compiling foobar.c -> foo.exe-foobar.obj
+CHECK-32: {{.*}}clang-cl{{(\.EXE)?}} -m32
+CHECK-32: linking foo.exe-foobar.obj -> foo.exe
+CHECK-32: {{.*}}lld-link{{(\.EXE)?}}
+
+CHECK-64: Script Arguments:
+CHECK-64:   Arch: 64
+CHECK-64:   Compiler: clang-cl
+CHECK-64:   Outdir: {{.*}}
+CHECK-64:   Output: {{.*}}toolchain-clang-cl.test.tmp\foo.exe
+CHECK-64:   Nodefaultlib: False
+CHECK-64:   Opt: none
+CHECK-64:   Mode: compile
+CHECK-64:   Clean: True
+CHECK-64:   Verbose: True
+CHECK-64:   Dryrun: True
+CHECK-64:   Inputs: foobar.c
+CHECK-64: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foobar.ilk
+CHECK-64: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.exe-foobar.obj
+CHECK-64: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.pdb
+CHECK-64: Cleaning {{.*}}toolchain-clang-cl.test.tmp{{.}}foo.exe
+CHECK-64: compiling foobar.c -> foo.exe-foobar.obj
+CHECK-64: {{.*}}clang-cl{{(\.EXE)?}} -m64
+CHECK-64: linking foo.exe-foobar.obj -> foo.exe
+CHECK-64: {{.*}}lld-link{{(\.EXE)?}}
diff --git a/lldb/test/Shell/Minidump/Windows/Sigsegv/Inputs/sigsegv.cpp b/lldb/test/Shell/Minidump/Windows/Sigsegv/Inputs/sigsegv.cpp
index d5b96472eb11..6bf78b5dc43b 100644
--- a/lldb/test/Shell/Minidump/Windows/Sigsegv/Inputs/sigsegv.cpp
+++ b/lldb/test/Shell/Minidump/Windows/Sigsegv/Inputs/sigsegv.cpp
@@ -1,40 +1,40 @@
-
-// nodefaultlib build: cl -Zi sigsegv.cpp /link /nodefaultlib
-
-#ifdef USE_CRT
-#include <stdio.h>
-#else
-int main();
-extern "C"
-{
-    int _fltused;
-    void mainCRTStartup() { main(); }
-    void printf(const char*, ...) {}
-}
-#endif
-
-void crash(bool crash_self)
-{
-    printf("Before...\n");
-    if(crash_self)
-    {
-        printf("Crashing in 3, 2, 1 ...\n");
-        *(volatile int*)nullptr = 0;
-    }
-    printf("After...\n");
-}
-
-int foo(int x, float y, const char* msg)
-{
-    bool flag = x > y;
-    if(flag)
-        printf("x = %d, y = %f, msg = %s\n", x, y, msg);
-    crash(flag);
-    return x << 1;
-}
-
-int main()
-{
-    foo(10, 3.14, "testing");
-}
-
+
+// nodefaultlib build: cl -Zi sigsegv.cpp /link /nodefaultlib
+
+#ifdef USE_CRT
+#include <stdio.h>
+#else
+int main();
+extern "C"
+{
+    int _fltused;
+    void mainCRTStartup() { main(); }
+    void printf(const char*, ...) {}
+}
+#endif
+
+void crash(bool crash_self)
+{
+    printf("Before...\n");
+    if(crash_self)
+    {
+        printf("Crashing in 3, 2, 1 ...\n");
+        *(volatile int*)nullptr = 0;
+    }
+    printf("After...\n");
+}
+
+int foo(int x, float y, const char* msg)
+{
+    bool flag = x > y;
+    if(flag)
+        printf("x = %d, y = %f, msg = %s\n", x, y, msg);
+    crash(flag);
+    return x << 1;
+}
+
+int main()
+{
+    foo(10, 3.14, "testing");
+}
+
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/Inputs/inline_sites.s b/lldb/test/Shell/SymbolFile/NativePDB/Inputs/inline_sites.s
index a9d248758bfc..aac8f4c16980 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/Inputs/inline_sites.s
+++ b/lldb/test/Shell/SymbolFile/NativePDB/Inputs/inline_sites.s
@@ -1,622 +1,622 @@
-# Compiled from the following files, but replaced the call to abort with nop.
-# clang-cl -fuse-ld=lld-link /Z7 /O1 /Faa.asm /winsysroot~/win_toolchain a.cpp
-# a.cpp:
-# #include "a.h"
-# int main(int argc, char** argv) {
-#   volatile int main_local = Namespace1::foo(2);
-#   return 0;
-# }
-# a.h:
-# #include <stdlib.h>
-# #include "b.h"
-# namespace Namespace1 {
-# inline int foo(int x) {
-#   volatile int foo_local = x + 1;
-#   ++foo_local;
-#   if (!foo_local)
-#     abort();
-#   return Class1::bar(foo_local);
-# }
-# } // namespace Namespace1
-# b.h:
-# #include "c.h"
-# class Class1 {
-# public:
-#   inline static int bar(int x) {
-#     volatile int bar_local = x + 1;
-#     ++bar_local;
-#     return Namespace2::Class2::func(bar_local);
-#   }
-# };
-# c.h:
-# namespace Namespace2 {
-# class Class2 {
-# public:
-#   inline static int func(int x) {
-#     volatile int func_local = x + 1;
-#     func_local += x;
-#     return func_local;
-#   }
-# };
-# } // namespace Namespace2
-
-	.text
-	.def	@feat.00;
-	.scl	3;
-	.type	0;
-	.endef
-	.globl	@feat.00
-.set @feat.00, 0
-	.intel_syntax noprefix
-	.file	"a.cpp"
-	.def	main;
-	.scl	2;
-	.type	32;
-	.endef
-	.section	.text,"xr",one_only,main
-	.globl	main                            # -- Begin function main
-main:                                   # @main
-.Lfunc_begin0:
-	.cv_func_id 0
-	.cv_file	1 "/tmp/a.cpp" "4FFB96E5DF1A95CE7DB9732CFFE001D7" 1
-	.cv_loc	0 1 2 0                         # a.cpp:2:0
-.seh_proc main
-# %bb.0:
-	#DEBUG_VALUE: main:argv <- $rdx
-	#DEBUG_VALUE: main:argc <- $ecx
-	#DEBUG_VALUE: foo:x <- 2
-	sub	rsp, 56
-	.seh_stackalloc 56
-	.seh_endprologue
-.Ltmp0:
-	.cv_file	2 "/tmp/./a.h" "BBFED90EF093E9C1D032CC9B05B5D167" 1
-	.cv_inline_site_id 1 within 0 inlined_at 1 3 0
-	.cv_loc	1 2 5 0                         # ./a.h:5:0
-	mov	dword ptr [rsp + 44], 3
-	.cv_loc	1 2 6 0                         # ./a.h:6:0
-	inc	dword ptr [rsp + 44]
-	.cv_loc	1 2 7 0                         # ./a.h:7:0
-	mov	eax, dword ptr [rsp + 44]
-	test	eax, eax
-	je	.LBB0_2
-.Ltmp1:
-# %bb.1:
-	#DEBUG_VALUE: main:argv <- $rdx
-	#DEBUG_VALUE: main:argc <- $ecx
-	#DEBUG_VALUE: foo:x <- 2
-	.cv_loc	1 2 9 0                         # ./a.h:9:0
-	mov	eax, dword ptr [rsp + 44]
-.Ltmp2:
-	#DEBUG_VALUE: bar:x <- $eax
-	.cv_file	3 "/tmp/./b.h" "A26CC743A260115F33AF91AB11F95877" 1
-	.cv_inline_site_id 2 within 1 inlined_at 2 9 0
-	.cv_loc	2 3 5 0                         # ./b.h:5:0
-	inc	eax
-.Ltmp3:
-	mov	dword ptr [rsp + 52], eax
-	.cv_loc	2 3 6 0                         # ./b.h:6:0
-	inc	dword ptr [rsp + 52]
-	.cv_loc	2 3 7 0                         # ./b.h:7:0
-	mov	eax, dword ptr [rsp + 52]
-.Ltmp4:
-	#DEBUG_VALUE: func:x <- $eax
-	.cv_file	4 "/tmp/./c.h" "8AF4613F78624BBE96D1C408ABA39B2D" 1
-	.cv_inline_site_id 3 within 2 inlined_at 3 7 0
-	.cv_loc	3 4 5 0                         # ./c.h:5:0
-	lea	ecx, [rax + 1]
-.Ltmp5:
-	#DEBUG_VALUE: main:argc <- [DW_OP_LLVM_entry_value 1] $ecx
-	mov	dword ptr [rsp + 48], ecx
-	.cv_loc	3 4 6 0                         # ./c.h:6:0
-	add	dword ptr [rsp + 48], eax
-	.cv_loc	3 4 7 0                         # ./c.h:7:0
-	mov	eax, dword ptr [rsp + 48]
-.Ltmp6:
-	.cv_loc	0 1 3 0                         # a.cpp:3:0
-	mov	dword ptr [rsp + 48], eax
-	.cv_loc	0 1 4 0                         # a.cpp:4:0
-	xor	eax, eax
-	# Use fake debug info to tests inline info.
-	.cv_loc	1 2 20 0
-	add	rsp, 56
-	ret
-.Ltmp7:
-.LBB0_2:
-	#DEBUG_VALUE: main:argv <- $rdx
-	#DEBUG_VALUE: main:argc <- $ecx
-	#DEBUG_VALUE: foo:x <- 2
-	.cv_loc	1 2 8 0                         # ./a.h:8:0
-	nop
-.Ltmp8:
-	int3
-.Ltmp9:
-	#DEBUG_VALUE: main:argc <- [DW_OP_LLVM_entry_value 1] $ecx
-	#DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rdx
-.Lfunc_end0:
-	.seh_endproc
-                                        # -- End function
-	.section	.drectve,"yn"
-	.ascii	" /DEFAULTLIB:libcmt.lib"
-	.ascii	" /DEFAULTLIB:oldnames.lib"
-	.section	.debug$S,"dr"
-	.p2align	2
-	.long	4                               # Debug section magic
-	.long	241
-	.long	.Ltmp11-.Ltmp10                 # Subsection size
-.Ltmp10:
-	.short	.Ltmp13-.Ltmp12                 # Record length
-.Ltmp12:
-	.short	4353                            # Record kind: S_OBJNAME
-	.long	0                               # Signature
-	.asciz	"/tmp/a-2b2ba0.obj"             # Object name
-	.p2align	2
-.Ltmp13:
-	.short	.Ltmp15-.Ltmp14                 # Record length
-.Ltmp14:
-	.short	4412                            # Record kind: S_COMPILE3
-	.long	1                               # Flags and language
-	.short	208                             # CPUType
-	.short	15                              # Frontend version
-	.short	0
-	.short	0
-	.short	0
-	.short	15000                           # Backend version
-	.short	0
-	.short	0
-	.short	0
-	.asciz	"clang version 15.0.0"          # Null-terminated compiler version string
-	.p2align	2
-.Ltmp15:
-.Ltmp11:
-	.p2align	2
-	.long	246                             # Inlinee lines subsection
-	.long	.Ltmp17-.Ltmp16                 # Subsection size
-.Ltmp16:
-	.long	0                               # Inlinee lines signature
-
-                                        # Inlined function foo starts at ./a.h:4
-	.long	4099                            # Type index of inlined function
-	.cv_filechecksumoffset	2               # Offset into filechecksum table
-	.long	4                               # Starting line number
-
-                                        # Inlined function bar starts at ./b.h:4
-	.long	4106                            # Type index of inlined function
-	.cv_filechecksumoffset	3               # Offset into filechecksum table
-	.long	4                               # Starting line number
-
-                                        # Inlined function func starts at ./c.h:4
-	.long	4113                            # Type index of inlined function
-	.cv_filechecksumoffset	4               # Offset into filechecksum table
-	.long	4                               # Starting line number
-.Ltmp17:
-	.p2align	2
-	.section	.debug$S,"dr",associative,main
-	.p2align	2
-	.long	4                               # Debug section magic
-	.long	241                             # Symbol subsection for main
-	.long	.Ltmp19-.Ltmp18                 # Subsection size
-.Ltmp18:
-	.short	.Ltmp21-.Ltmp20                 # Record length
-.Ltmp20:
-	.short	4423                            # Record kind: S_GPROC32_ID
-	.long	0                               # PtrParent
-	.long	0                               # PtrEnd
-	.long	0                               # PtrNext
-	.long	.Lfunc_end0-main                # Code size
-	.long	0                               # Offset after prologue
-	.long	0                               # Offset before epilogue
-	.long	4117                            # Function type index
-	.secrel32	main                    # Function section relative address
-	.secidx	main                            # Function section index
-	.byte	0                               # Flags
-	.asciz	"main"                          # Function name
-	.p2align	2
-.Ltmp21:
-	.short	.Ltmp23-.Ltmp22                 # Record length
-.Ltmp22:
-	.short	4114                            # Record kind: S_FRAMEPROC
-	.long	56                              # FrameSize
-	.long	0                               # Padding
-	.long	0                               # Offset of padding
-	.long	0                               # Bytes of callee saved registers
-	.long	0                               # Exception handler offset
-	.short	0                               # Exception handler section
-	.long	81920                           # Flags (defines frame register)
-	.p2align	2
-.Ltmp23:
-	.short	.Ltmp25-.Ltmp24                 # Record length
-.Ltmp24:
-	.short	4414                            # Record kind: S_LOCAL
-	.long	116                             # TypeIndex
-	.short	1                               # Flags
-	.asciz	"argc"
-	.p2align	2
-.Ltmp25:
-	.cv_def_range	 .Lfunc_begin0 .Ltmp5 .Ltmp7 .Ltmp8, reg, 18
-	.short	.Ltmp27-.Ltmp26                 # Record length
-.Ltmp26:
-	.short	4414                            # Record kind: S_LOCAL
-	.long	4114                            # TypeIndex
-	.short	1                               # Flags
-	.asciz	"argv"
-	.p2align	2
-.Ltmp27:
-	.cv_def_range	 .Lfunc_begin0 .Ltmp8, reg, 331
-	.short	.Ltmp29-.Ltmp28                 # Record length
-.Ltmp28:
-	.short	4414                            # Record kind: S_LOCAL
-	.long	4118                            # TypeIndex
-	.short	0                               # Flags
-	.asciz	"main_local"
-	.p2align	2
-.Ltmp29:
-	.cv_def_range	 .Ltmp0 .Ltmp9, frame_ptr_rel, 48
-	.short	.Ltmp31-.Ltmp30                 # Record length
-.Ltmp30:
-	.short	4429                            # Record kind: S_INLINESITE
-	.long	0                               # PtrParent
-	.long	0                               # PtrEnd
-	.long	4099                            # Inlinee type index
-	.cv_inline_linetable	1 2 4 .Lfunc_begin0 .Lfunc_end0
-	.p2align	2
-.Ltmp31:
-	.short	.Ltmp33-.Ltmp32                 # Record length
-.Ltmp32:
-	.short	4414                            # Record kind: S_LOCAL
-	.long	116                             # TypeIndex
-	.short	257                             # Flags
-	.asciz	"x"
-	.p2align	2
-.Ltmp33:
-	.short	.Ltmp35-.Ltmp34                 # Record length
-.Ltmp34:
-	.short	4414                            # Record kind: S_LOCAL
-	.long	4118                            # TypeIndex
-	.short	0                               # Flags
-	.asciz	"foo_local"
-	.p2align	2
-.Ltmp35:
-	.cv_def_range	 .Ltmp0 .Ltmp6 .Ltmp7 .Ltmp9, frame_ptr_rel, 44
-	.short	.Ltmp37-.Ltmp36                 # Record length
-.Ltmp36:
-	.short	4429                            # Record kind: S_INLINESITE
-	.long	0                               # PtrParent
-	.long	0                               # PtrEnd
-	.long	4106                            # Inlinee type index
-	.cv_inline_linetable	2 3 4 .Lfunc_begin0 .Lfunc_end0
-	.p2align	2
-.Ltmp37:
-	.short	.Ltmp39-.Ltmp38                 # Record length
-.Ltmp38:
-	.short	4414                            # Record kind: S_LOCAL
-	.long	116                             # TypeIndex
-	.short	1                               # Flags
-	.asciz	"x"
-	.p2align	2
-.Ltmp39:
-	.cv_def_range	 .Ltmp2 .Ltmp3, reg, 17
-	.short	.Ltmp41-.Ltmp40                 # Record length
-.Ltmp40:
-	.short	4414                            # Record kind: S_LOCAL
-	.long	4118                            # TypeIndex
-	.short	0                               # Flags
-	.asciz	"bar_local"
-	.p2align	2
-.Ltmp41:
-	.cv_def_range	 .Ltmp2 .Ltmp6, frame_ptr_rel, 52
-	.short	.Ltmp43-.Ltmp42                 # Record length
-.Ltmp42:
-	.short	4429                            # Record kind: S_INLINESITE
-	.long	0                               # PtrParent
-	.long	0                               # PtrEnd
-	.long	4113                            # Inlinee type index
-	.cv_inline_linetable	3 4 4 .Lfunc_begin0 .Lfunc_end0
-	.p2align	2
-.Ltmp43:
-	.short	.Ltmp45-.Ltmp44                 # Record length
-.Ltmp44:
-	.short	4414                            # Record kind: S_LOCAL
-	.long	116                             # TypeIndex
-	.short	1                               # Flags
-	.asciz	"x"
-	.p2align	2
-.Ltmp45:
-	.cv_def_range	 .Ltmp4 .Ltmp6, reg, 17
-	.short	.Ltmp47-.Ltmp46                 # Record length
-.Ltmp46:
-	.short	4414                            # Record kind: S_LOCAL
-	.long	4118                            # TypeIndex
-	.short	0                               # Flags
-	.asciz	"func_local"
-	.p2align	2
-.Ltmp47:
-	.cv_def_range	 .Ltmp4 .Ltmp6, frame_ptr_rel, 48
-	.short	2                               # Record length
-	.short	4430                            # Record kind: S_INLINESITE_END
-	.short	2                               # Record length
-	.short	4430                            # Record kind: S_INLINESITE_END
-	.short	2                               # Record length
-	.short	4430                            # Record kind: S_INLINESITE_END
-	.short	2                               # Record length
-	.short	4431                            # Record kind: S_PROC_ID_END
-.Ltmp19:
-	.p2align	2
-	.cv_linetable	0, main, .Lfunc_end0
-	.section	.debug$S,"dr"
-	.long	241
-	.long	.Ltmp49-.Ltmp48                 # Subsection size
-.Ltmp48:
-	.short	.Ltmp51-.Ltmp50                 # Record length
-.Ltmp50:
-	.short	4360                            # Record kind: S_UDT
-	.long	4103                            # Type
-	.asciz	"Class1"
-	.p2align	2
-.Ltmp51:
-	.short	.Ltmp53-.Ltmp52                 # Record length
-.Ltmp52:
-	.short	4360                            # Record kind: S_UDT
-	.long	4110                            # Type
-	.asciz	"Namespace2::Class2"
-	.p2align	2
-.Ltmp53:
-.Ltmp49:
-	.p2align	2
-	.cv_filechecksums                       # File index to string table offset subsection
-	.cv_stringtable                         # String table
-	.long	241
-	.long	.Ltmp55-.Ltmp54                 # Subsection size
-.Ltmp54:
-	.short	.Ltmp57-.Ltmp56                 # Record length
-.Ltmp56:
-	.short	4428                            # Record kind: S_BUILDINFO
-	.long	4124                            # LF_BUILDINFO index
-	.p2align	2
-.Ltmp57:
-.Ltmp55:
-	.p2align	2
-	.section	.debug$T,"dr"
-	.p2align	2
-	.long	4                               # Debug section magic
-	# StringId (0x1000)
-	.short	0x12                            # Record length
-	.short	0x1605                          # Record kind: LF_STRING_ID
-	.long	0x0                             # Id
-	.asciz	"Namespace1"                    # StringData
-	.byte	241
-	# ArgList (0x1001)
-	.short	0xa                             # Record length
-	.short	0x1201                          # Record kind: LF_ARGLIST
-	.long	0x1                             # NumArgs
-	.long	0x74                            # Argument: int
-	# Procedure (0x1002)
-	.short	0xe                             # Record length
-	.short	0x1008                          # Record kind: LF_PROCEDURE
-	.long	0x74                            # ReturnType: int
-	.byte	0x0                             # CallingConvention: NearC
-	.byte	0x0                             # FunctionOptions
-	.short	0x1                             # NumParameters
-	.long	0x1001                          # ArgListType: (int)
-	# FuncId (0x1003)
-	.short	0xe                             # Record length
-	.short	0x1601                          # Record kind: LF_FUNC_ID
-	.long	0x1000                          # ParentScope: Namespace1
-	.long	0x1002                          # FunctionType: int (int)
-	.asciz	"foo"                           # Name
-	# Class (0x1004)
-	.short	0x2a                            # Record length
-	.short	0x1504                          # Record kind: LF_CLASS
-	.short	0x0                             # MemberCount
-	.short	0x280                           # Properties ( ForwardReference (0x80) | HasUniqueName (0x200) )
-	.long	0x0                             # FieldList
-	.long	0x0                             # DerivedFrom
-	.long	0x0                             # VShape
-	.short	0x0                             # SizeOf
-	.asciz	"Class1"                        # Name
-	.asciz	".?AVClass1@@"                  # LinkageName
-	.byte	242
-	.byte	241
-	# MemberFunction (0x1005)
-	.short	0x1a                            # Record length
-	.short	0x1009                          # Record kind: LF_MFUNCTION
-	.long	0x74                            # ReturnType: int
-	.long	0x1004                          # ClassType: Class1
-	.long	0x0                             # ThisType
-	.byte	0x0                             # CallingConvention: NearC
-	.byte	0x0                             # FunctionOptions
-	.short	0x1                             # NumParameters
-	.long	0x1001                          # ArgListType: (int)
-	.long	0x0                             # ThisAdjustment
-	# FieldList (0x1006)
-	.short	0xe                             # Record length
-	.short	0x1203                          # Record kind: LF_FIELDLIST
-	.short	0x1511                          # Member kind: OneMethod ( LF_ONEMETHOD )
-	.short	0xb                             # Attrs: Public, Static
-	.long	0x1005                          # Type: int Class1::(int)
-	.asciz	"bar"                           # Name
-	# Class (0x1007)
-	.short	0x2a                            # Record length
-	.short	0x1504                          # Record kind: LF_CLASS
-	.short	0x1                             # MemberCount
-	.short	0x200                           # Properties ( HasUniqueName (0x200) )
-	.long	0x1006                          # FieldList: <field list>
-	.long	0x0                             # DerivedFrom
-	.long	0x0                             # VShape
-	.short	0x1                             # SizeOf
-	.asciz	"Class1"                        # Name
-	.asciz	".?AVClass1@@"                  # LinkageName
-	.byte	242
-	.byte	241
-	# StringId (0x1008)
-	.short	0x12                            # Record length
-	.short	0x1605                          # Record kind: LF_STRING_ID
-	.long	0x0                             # Id
-	.asciz	"/tmp/./b.h"                    # StringData
-	.byte	241
-	# UdtSourceLine (0x1009)
-	.short	0xe                             # Record length
-	.short	0x1606                          # Record kind: LF_UDT_SRC_LINE
-	.long	0x1007                          # UDT: Class1
-	.long	0x1008                          # SourceFile: /tmp/./b.h
-	.long	0x2                             # LineNumber
-	# MemberFuncId (0x100A)
-	.short	0xe                             # Record length
-	.short	0x1602                          # Record kind: LF_MFUNC_ID
-	.long	0x1004                          # ClassType: Class1
-	.long	0x1005                          # FunctionType: int Class1::(int)
-	.asciz	"bar"                           # Name
-	# Class (0x100B)
-	.short	0x42                            # Record length
-	.short	0x1504                          # Record kind: LF_CLASS
-	.short	0x0                             # MemberCount
-	.short	0x280                           # Properties ( ForwardReference (0x80) | HasUniqueName (0x200) )
-	.long	0x0                             # FieldList
-	.long	0x0                             # DerivedFrom
-	.long	0x0                             # VShape
-	.short	0x0                             # SizeOf
-	.asciz	"Namespace2::Class2"            # Name
-	.asciz	".?AVClass2@Namespace2@@"       # LinkageName
-	.byte	243
-	.byte	242
-	.byte	241
-	# MemberFunction (0x100C)
-	.short	0x1a                            # Record length
-	.short	0x1009                          # Record kind: LF_MFUNCTION
-	.long	0x74                            # ReturnType: int
-	.long	0x100b                          # ClassType: Namespace2::Class2
-	.long	0x0                             # ThisType
-	.byte	0x0                             # CallingConvention: NearC
-	.byte	0x0                             # FunctionOptions
-	.short	0x1                             # NumParameters
-	.long	0x1001                          # ArgListType: (int)
-	.long	0x0                             # ThisAdjustment
-	# FieldList (0x100D)
-	.short	0x12                            # Record length
-	.short	0x1203                          # Record kind: LF_FIELDLIST
-	.short	0x1511                          # Member kind: OneMethod ( LF_ONEMETHOD )
-	.short	0xb                             # Attrs: Public, Static
-	.long	0x100c                          # Type: int Namespace2::Class2::(int)
-	.asciz	"func"                          # Name
-	.byte	243
-	.byte	242
-	.byte	241
-	# Class (0x100E)
-	.short	0x42                            # Record length
-	.short	0x1504                          # Record kind: LF_CLASS
-	.short	0x1                             # MemberCount
-	.short	0x200                           # Properties ( HasUniqueName (0x200) )
-	.long	0x100d                          # FieldList: <field list>
-	.long	0x0                             # DerivedFrom
-	.long	0x0                             # VShape
-	.short	0x1                             # SizeOf
-	.asciz	"Namespace2::Class2"            # Name
-	.asciz	".?AVClass2@Namespace2@@"       # LinkageName
-	.byte	243
-	.byte	242
-	.byte	241
-	# StringId (0x100F)
-	.short	0x12                            # Record length
-	.short	0x1605                          # Record kind: LF_STRING_ID
-	.long	0x0                             # Id
-	.asciz	"/tmp/./c.h"                    # StringData
-	.byte	241
-	# UdtSourceLine (0x1010)
-	.short	0xe                             # Record length
-	.short	0x1606                          # Record kind: LF_UDT_SRC_LINE
-	.long	0x100e                          # UDT: Namespace2::Class2
-	.long	0x100f                          # SourceFile: /tmp/./c.h
-	.long	0x2                             # LineNumber
-	# MemberFuncId (0x1011)
-	.short	0x12                            # Record length
-	.short	0x1602                          # Record kind: LF_MFUNC_ID
-	.long	0x100b                          # ClassType: Namespace2::Class2
-	.long	0x100c                          # FunctionType: int Namespace2::Class2::(int)
-	.asciz	"func"                          # Name
-	.byte	243
-	.byte	242
-	.byte	241
-	# Pointer (0x1012)
-	.short	0xa                             # Record length
-	.short	0x1002                          # Record kind: LF_POINTER
-	.long	0x670                           # PointeeType: char*
-	.long	0x1000c                         # Attrs: [ Type: Near64, Mode: Pointer, SizeOf: 8 ]
-	# ArgList (0x1013)
-	.short	0xe                             # Record length
-	.short	0x1201                          # Record kind: LF_ARGLIST
-	.long	0x2                             # NumArgs
-	.long	0x74                            # Argument: int
-	.long	0x1012                          # Argument: char**
-	# Procedure (0x1014)
-	.short	0xe                             # Record length
-	.short	0x1008                          # Record kind: LF_PROCEDURE
-	.long	0x74                            # ReturnType: int
-	.byte	0x0                             # CallingConvention: NearC
-	.byte	0x0                             # FunctionOptions
-	.short	0x2                             # NumParameters
-	.long	0x1013                          # ArgListType: (int, char**)
-	# FuncId (0x1015)
-	.short	0x12                            # Record length
-	.short	0x1601                          # Record kind: LF_FUNC_ID
-	.long	0x0                             # ParentScope
-	.long	0x1014                          # FunctionType: int (int, char**)
-	.asciz	"main"                          # Name
-	.byte	243
-	.byte	242
-	.byte	241
-	# Modifier (0x1016)
-	.short	0xa                             # Record length
-	.short	0x1001                          # Record kind: LF_MODIFIER
-	.long	0x74                            # ModifiedType: int
-	.short	0x2                             # Modifiers ( Volatile (0x2) )
-	.byte	242
-	.byte	241
-	# StringId (0x1017)
-	.short	0xe                             # Record length
-	.short	0x1605                          # Record kind: LF_STRING_ID
-	.long	0x0                             # Id
-	.asciz	"/tmp"                          # StringData
-	.byte	243
-	.byte	242
-	.byte	241
-	# StringId (0x1018)
-	.short	0xe                             # Record length
-	.short	0x1605                          # Record kind: LF_STRING_ID
-	.long	0x0                             # Id
-	.asciz	"a.cpp"                         # StringData
-	.byte	242
-	.byte	241
-	# StringId (0x1019)
-	.short	0xa                             # Record length
-	.short	0x1605                          # Record kind: LF_STRING_ID
-	.long	0x0                             # Id
-	.byte	0                               # StringData
-	.byte	243
-	.byte	242
-	.byte	241
-	# StringId (0x101A)
-	.short	0x4e                            # Record length
-	.short	0x1605                          # Record kind: LF_STRING_ID
-	.long	0x0                             # Id
-	.asciz	"/usr/local/google/home/zequanwu/llvm-project/build/release/bin/clang" # StringData
-	.byte	243
-	.byte	242
-	.byte	241
-	# StringId (0x101B)
-	.short	0x9f6                           # Record length
-	.short	0x1605                          # Record kind: LF_STRING_ID
-	.long	0x0                             # Id
-	.asciz	"\"-cc1\" \"-triple\" \"x86_64-pc-windows-msvc19.20.0\" \"-S\" \"-disable-free\" \"-clear-ast-before-backend\" \"-disable-llvm-verifier\" \"-discard-value-names\" \"-mrelocation-model\" \"pic\" \"-pic-level\" \"2\" \"-mframe-pointer=none\" \"-relaxed-aliasing\" \"-fmath-errno\" \"-ffp-contract=on\" \"-fno-rounding-math\" \"-mconstructor-aliases\" \"-funwind-tables=2\" \"-target-cpu\" \"x86-64\" \"-mllvm\" \"-x86-asm-syntax=intel\" \"-tune-cpu\" \"generic\" \"-mllvm\" \"-treat-scalable-fixed-error-as-warning\" \"-D_MT\" \"-flto-visibility-public-std\" \"--dependent-lib=libcmt\" \"--dependent-lib=oldnames\" \"-stack-protector\" \"2\" \"-fms-volatile\" \"-fdiagnostics-format\" \"msvc\" \"-gno-column-info\" \"-gcodeview\" \"-debug-info-kind=constructor\" \"-ffunction-sections\" \"-fcoverage-compilation-dir=/tmp\" \"-resource-dir\" \"/usr/local/google/home/zequanwu/llvm-project/build/release/lib/clang/15.0.0\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/llvm-project/build/release/lib/clang/15.0.0/include\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/DIA SDK/include\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/VC/Tools/MSVC/14.26.28801/include\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/VC/Tools/MSVC/14.26.28801/atlmfc/include\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/ucrt\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/shared\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/um\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/winrt\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/cppwinrt\" \"-Os\" \"-fdeprecated-macro\" \"-fdebug-compilation-dir=/tmp\" \"-ferror-limit\" \"19\" \"-fno-use-cxa-atexit\" \"-fms-extensions\" \"-fms-compatibility\" \"-fms-compatibility-version=19.20\" \"-std=c++14\" \"-fdelayed-template-parsing\" \"-fcolor-diagnostics\" \"-vectorize-loops\" \"-vectorize-slp\" \"-faddrsig\" \"-x\" \"c++\"" # StringData
-	.byte	242
-	.byte	241
-	# BuildInfo (0x101C)
-	.short	0x1a                            # Record length
-	.short	0x1603                          # Record kind: LF_BUILDINFO
-	.short	0x5                             # NumArgs
-	.long	0x1017                          # Argument: /tmp
-	.long	0x101a                          # Argument: /usr/local/google/home/zequanwu/llvm-project/build/release/bin/clang
-	.long	0x1018                          # Argument: a.cpp
-	.long	0x1019                          # Argument
-	.long	0x101b                          # Argument: "-cc1" "-triple" "x86_64-pc-windows-msvc19.20.0" "-S" "-disable-free" "-clear-ast-before-backend" "-disable-llvm-verifier" "-discard-value-names" "-mrelocation-model" "pic" "-pic-level" "2" "-mframe-pointer=none" "-relaxed-aliasing" "-fmath-errno" "-ffp-contract=on" "-fno-rounding-math" "-mconstructor-aliases" "-funwind-tables=2" "-target-cpu" "x86-64" "-mllvm" "-x86-asm-syntax=intel" "-tune-cpu" "generic" "-mllvm" "-treat-scalable-fixed-error-as-warning" "-D_MT" "-flto-visibility-public-std" "--dependent-lib=libcmt" "--dependent-lib=oldnames" "-stack-protector" "2" "-fms-volatile" "-fdiagnostics-format" "msvc" "-gno-column-info" "-gcodeview" "-debug-info-kind=constructor" "-ffunction-sections" "-fcoverage-compilation-dir=/tmp" "-resource-dir" "/usr/local/google/home/zequanwu/llvm-project/build/release/lib/clang/15.0.0" "-internal-isystem" "/usr/local/google/home/zequanwu/llvm-project/build/release/lib/clang/15.0.0/include" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/DIA SDK/include" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/VC/Tools/MSVC/14.26.28801/include" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/VC/Tools/MSVC/14.26.28801/atlmfc/include" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/ucrt" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/shared" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/um" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/winrt" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/cppwinrt" "-Os" "-fdeprecated-macro" "-fdebug-compilation-dir=/tmp" "-ferror-limit" "19" "-fno-use-cxa-atexit" "-fms-extensions" "-fms-compatibility" "-fms-compatibility-version=19.20" "-std=c++14" "-fdelayed-template-parsing" "-fcolor-diagnostics" "-vectorize-loops" "-vectorize-slp" "-faddrsig" "-x" "c++"
-	.byte	242
-	.byte	241
-	.addrsig
+# Compiled from the following files, but replaced the call to abort with nop.
+# clang-cl -fuse-ld=lld-link /Z7 /O1 /Faa.asm /winsysroot~/win_toolchain a.cpp
+# a.cpp:
+# #include "a.h"
+# int main(int argc, char** argv) {
+#   volatile int main_local = Namespace1::foo(2);
+#   return 0;
+# }
+# a.h:
+# #include <stdlib.h>
+# #include "b.h"
+# namespace Namespace1 {
+# inline int foo(int x) {
+#   volatile int foo_local = x + 1;
+#   ++foo_local;
+#   if (!foo_local)
+#     abort();
+#   return Class1::bar(foo_local);
+# }
+# } // namespace Namespace1
+# b.h:
+# #include "c.h"
+# class Class1 {
+# public:
+#   inline static int bar(int x) {
+#     volatile int bar_local = x + 1;
+#     ++bar_local;
+#     return Namespace2::Class2::func(bar_local);
+#   }
+# };
+# c.h:
+# namespace Namespace2 {
+# class Class2 {
+# public:
+#   inline static int func(int x) {
+#     volatile int func_local = x + 1;
+#     func_local += x;
+#     return func_local;
+#   }
+# };
+# } // namespace Namespace2
+
+	.text
+	.def	@feat.00;
+	.scl	3;
+	.type	0;
+	.endef
+	.globl	@feat.00
+.set @feat.00, 0
+	.intel_syntax noprefix
+	.file	"a.cpp"
+	.def	main;
+	.scl	2;
+	.type	32;
+	.endef
+	.section	.text,"xr",one_only,main
+	.globl	main                            # -- Begin function main
+main:                                   # @main
+.Lfunc_begin0:
+	.cv_func_id 0
+	.cv_file	1 "/tmp/a.cpp" "4FFB96E5DF1A95CE7DB9732CFFE001D7" 1
+	.cv_loc	0 1 2 0                         # a.cpp:2:0
+.seh_proc main
+# %bb.0:
+	#DEBUG_VALUE: main:argv <- $rdx
+	#DEBUG_VALUE: main:argc <- $ecx
+	#DEBUG_VALUE: foo:x <- 2
+	sub	rsp, 56
+	.seh_stackalloc 56
+	.seh_endprologue
+.Ltmp0:
+	.cv_file	2 "/tmp/./a.h" "BBFED90EF093E9C1D032CC9B05B5D167" 1
+	.cv_inline_site_id 1 within 0 inlined_at 1 3 0
+	.cv_loc	1 2 5 0                         # ./a.h:5:0
+	mov	dword ptr [rsp + 44], 3
+	.cv_loc	1 2 6 0                         # ./a.h:6:0
+	inc	dword ptr [rsp + 44]
+	.cv_loc	1 2 7 0                         # ./a.h:7:0
+	mov	eax, dword ptr [rsp + 44]
+	test	eax, eax
+	je	.LBB0_2
+.Ltmp1:
+# %bb.1:
+	#DEBUG_VALUE: main:argv <- $rdx
+	#DEBUG_VALUE: main:argc <- $ecx
+	#DEBUG_VALUE: foo:x <- 2
+	.cv_loc	1 2 9 0                         # ./a.h:9:0
+	mov	eax, dword ptr [rsp + 44]
+.Ltmp2:
+	#DEBUG_VALUE: bar:x <- $eax
+	.cv_file	3 "/tmp/./b.h" "A26CC743A260115F33AF91AB11F95877" 1
+	.cv_inline_site_id 2 within 1 inlined_at 2 9 0
+	.cv_loc	2 3 5 0                         # ./b.h:5:0
+	inc	eax
+.Ltmp3:
+	mov	dword ptr [rsp + 52], eax
+	.cv_loc	2 3 6 0                         # ./b.h:6:0
+	inc	dword ptr [rsp + 52]
+	.cv_loc	2 3 7 0                         # ./b.h:7:0
+	mov	eax, dword ptr [rsp + 52]
+.Ltmp4:
+	#DEBUG_VALUE: func:x <- $eax
+	.cv_file	4 "/tmp/./c.h" "8AF4613F78624BBE96D1C408ABA39B2D" 1
+	.cv_inline_site_id 3 within 2 inlined_at 3 7 0
+	.cv_loc	3 4 5 0                         # ./c.h:5:0
+	lea	ecx, [rax + 1]
+.Ltmp5:
+	#DEBUG_VALUE: main:argc <- [DW_OP_LLVM_entry_value 1] $ecx
+	mov	dword ptr [rsp + 48], ecx
+	.cv_loc	3 4 6 0                         # ./c.h:6:0
+	add	dword ptr [rsp + 48], eax
+	.cv_loc	3 4 7 0                         # ./c.h:7:0
+	mov	eax, dword ptr [rsp + 48]
+.Ltmp6:
+	.cv_loc	0 1 3 0                         # a.cpp:3:0
+	mov	dword ptr [rsp + 48], eax
+	.cv_loc	0 1 4 0                         # a.cpp:4:0
+	xor	eax, eax
+	# Use fake debug info to tests inline info.
+	.cv_loc	1 2 20 0
+	add	rsp, 56
+	ret
+.Ltmp7:
+.LBB0_2:
+	#DEBUG_VALUE: main:argv <- $rdx
+	#DEBUG_VALUE: main:argc <- $ecx
+	#DEBUG_VALUE: foo:x <- 2
+	.cv_loc	1 2 8 0                         # ./a.h:8:0
+	nop
+.Ltmp8:
+	int3
+.Ltmp9:
+	#DEBUG_VALUE: main:argc <- [DW_OP_LLVM_entry_value 1] $ecx
+	#DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rdx
+.Lfunc_end0:
+	.seh_endproc
+                                        # -- End function
+	.section	.drectve,"yn"
+	.ascii	" /DEFAULTLIB:libcmt.lib"
+	.ascii	" /DEFAULTLIB:oldnames.lib"
+	.section	.debug$S,"dr"
+	.p2align	2
+	.long	4                               # Debug section magic
+	.long	241
+	.long	.Ltmp11-.Ltmp10                 # Subsection size
+.Ltmp10:
+	.short	.Ltmp13-.Ltmp12                 # Record length
+.Ltmp12:
+	.short	4353                            # Record kind: S_OBJNAME
+	.long	0                               # Signature
+	.asciz	"/tmp/a-2b2ba0.obj"             # Object name
+	.p2align	2
+.Ltmp13:
+	.short	.Ltmp15-.Ltmp14                 # Record length
+.Ltmp14:
+	.short	4412                            # Record kind: S_COMPILE3
+	.long	1                               # Flags and language
+	.short	208                             # CPUType
+	.short	15                              # Frontend version
+	.short	0
+	.short	0
+	.short	0
+	.short	15000                           # Backend version
+	.short	0
+	.short	0
+	.short	0
+	.asciz	"clang version 15.0.0"          # Null-terminated compiler version string
+	.p2align	2
+.Ltmp15:
+.Ltmp11:
+	.p2align	2
+	.long	246                             # Inlinee lines subsection
+	.long	.Ltmp17-.Ltmp16                 # Subsection size
+.Ltmp16:
+	.long	0                               # Inlinee lines signature
+
+                                        # Inlined function foo starts at ./a.h:4
+	.long	4099                            # Type index of inlined function
+	.cv_filechecksumoffset	2               # Offset into filechecksum table
+	.long	4                               # Starting line number
+
+                                        # Inlined function bar starts at ./b.h:4
+	.long	4106                            # Type index of inlined function
+	.cv_filechecksumoffset	3               # Offset into filechecksum table
+	.long	4                               # Starting line number
+
+                                        # Inlined function func starts at ./c.h:4
+	.long	4113                            # Type index of inlined function
+	.cv_filechecksumoffset	4               # Offset into filechecksum table
+	.long	4                               # Starting line number
+.Ltmp17:
+	.p2align	2
+	.section	.debug$S,"dr",associative,main
+	.p2align	2
+	.long	4                               # Debug section magic
+	.long	241                             # Symbol subsection for main
+	.long	.Ltmp19-.Ltmp18                 # Subsection size
+.Ltmp18:
+	.short	.Ltmp21-.Ltmp20                 # Record length
+.Ltmp20:
+	.short	4423                            # Record kind: S_GPROC32_ID
+	.long	0                               # PtrParent
+	.long	0                               # PtrEnd
+	.long	0                               # PtrNext
+	.long	.Lfunc_end0-main                # Code size
+	.long	0                               # Offset after prologue
+	.long	0                               # Offset before epilogue
+	.long	4117                            # Function type index
+	.secrel32	main                    # Function section relative address
+	.secidx	main                            # Function section index
+	.byte	0                               # Flags
+	.asciz	"main"                          # Function name
+	.p2align	2
+.Ltmp21:
+	.short	.Ltmp23-.Ltmp22                 # Record length
+.Ltmp22:
+	.short	4114                            # Record kind: S_FRAMEPROC
+	.long	56                              # FrameSize
+	.long	0                               # Padding
+	.long	0                               # Offset of padding
+	.long	0                               # Bytes of callee saved registers
+	.long	0                               # Exception handler offset
+	.short	0                               # Exception handler section
+	.long	81920                           # Flags (defines frame register)
+	.p2align	2
+.Ltmp23:
+	.short	.Ltmp25-.Ltmp24                 # Record length
+.Ltmp24:
+	.short	4414                            # Record kind: S_LOCAL
+	.long	116                             # TypeIndex
+	.short	1                               # Flags
+	.asciz	"argc"
+	.p2align	2
+.Ltmp25:
+	.cv_def_range	 .Lfunc_begin0 .Ltmp5 .Ltmp7 .Ltmp8, reg, 18
+	.short	.Ltmp27-.Ltmp26                 # Record length
+.Ltmp26:
+	.short	4414                            # Record kind: S_LOCAL
+	.long	4114                            # TypeIndex
+	.short	1                               # Flags
+	.asciz	"argv"
+	.p2align	2
+.Ltmp27:
+	.cv_def_range	 .Lfunc_begin0 .Ltmp8, reg, 331
+	.short	.Ltmp29-.Ltmp28                 # Record length
+.Ltmp28:
+	.short	4414                            # Record kind: S_LOCAL
+	.long	4118                            # TypeIndex
+	.short	0                               # Flags
+	.asciz	"main_local"
+	.p2align	2
+.Ltmp29:
+	.cv_def_range	 .Ltmp0 .Ltmp9, frame_ptr_rel, 48
+	.short	.Ltmp31-.Ltmp30                 # Record length
+.Ltmp30:
+	.short	4429                            # Record kind: S_INLINESITE
+	.long	0                               # PtrParent
+	.long	0                               # PtrEnd
+	.long	4099                            # Inlinee type index
+	.cv_inline_linetable	1 2 4 .Lfunc_begin0 .Lfunc_end0
+	.p2align	2
+.Ltmp31:
+	.short	.Ltmp33-.Ltmp32                 # Record length
+.Ltmp32:
+	.short	4414                            # Record kind: S_LOCAL
+	.long	116                             # TypeIndex
+	.short	257                             # Flags
+	.asciz	"x"
+	.p2align	2
+.Ltmp33:
+	.short	.Ltmp35-.Ltmp34                 # Record length
+.Ltmp34:
+	.short	4414                            # Record kind: S_LOCAL
+	.long	4118                            # TypeIndex
+	.short	0                               # Flags
+	.asciz	"foo_local"
+	.p2align	2
+.Ltmp35:
+	.cv_def_range	 .Ltmp0 .Ltmp6 .Ltmp7 .Ltmp9, frame_ptr_rel, 44
+	.short	.Ltmp37-.Ltmp36                 # Record length
+.Ltmp36:
+	.short	4429                            # Record kind: S_INLINESITE
+	.long	0                               # PtrParent
+	.long	0                               # PtrEnd
+	.long	4106                            # Inlinee type index
+	.cv_inline_linetable	2 3 4 .Lfunc_begin0 .Lfunc_end0
+	.p2align	2
+.Ltmp37:
+	.short	.Ltmp39-.Ltmp38                 # Record length
+.Ltmp38:
+	.short	4414                            # Record kind: S_LOCAL
+	.long	116                             # TypeIndex
+	.short	1                               # Flags
+	.asciz	"x"
+	.p2align	2
+.Ltmp39:
+	.cv_def_range	 .Ltmp2 .Ltmp3, reg, 17
+	.short	.Ltmp41-.Ltmp40                 # Record length
+.Ltmp40:
+	.short	4414                            # Record kind: S_LOCAL
+	.long	4118                            # TypeIndex
+	.short	0                               # Flags
+	.asciz	"bar_local"
+	.p2align	2
+.Ltmp41:
+	.cv_def_range	 .Ltmp2 .Ltmp6, frame_ptr_rel, 52
+	.short	.Ltmp43-.Ltmp42                 # Record length
+.Ltmp42:
+	.short	4429                            # Record kind: S_INLINESITE
+	.long	0                               # PtrParent
+	.long	0                               # PtrEnd
+	.long	4113                            # Inlinee type index
+	.cv_inline_linetable	3 4 4 .Lfunc_begin0 .Lfunc_end0
+	.p2align	2
+.Ltmp43:
+	.short	.Ltmp45-.Ltmp44                 # Record length
+.Ltmp44:
+	.short	4414                            # Record kind: S_LOCAL
+	.long	116                             # TypeIndex
+	.short	1                               # Flags
+	.asciz	"x"
+	.p2align	2
+.Ltmp45:
+	.cv_def_range	 .Ltmp4 .Ltmp6, reg, 17
+	.short	.Ltmp47-.Ltmp46                 # Record length
+.Ltmp46:
+	.short	4414                            # Record kind: S_LOCAL
+	.long	4118                            # TypeIndex
+	.short	0                               # Flags
+	.asciz	"func_local"
+	.p2align	2
+.Ltmp47:
+	.cv_def_range	 .Ltmp4 .Ltmp6, frame_ptr_rel, 48
+	.short	2                               # Record length
+	.short	4430                            # Record kind: S_INLINESITE_END
+	.short	2                               # Record length
+	.short	4430                            # Record kind: S_INLINESITE_END
+	.short	2                               # Record length
+	.short	4430                            # Record kind: S_INLINESITE_END
+	.short	2                               # Record length
+	.short	4431                            # Record kind: S_PROC_ID_END
+.Ltmp19:
+	.p2align	2
+	.cv_linetable	0, main, .Lfunc_end0
+	.section	.debug$S,"dr"
+	.long	241
+	.long	.Ltmp49-.Ltmp48                 # Subsection size
+.Ltmp48:
+	.short	.Ltmp51-.Ltmp50                 # Record length
+.Ltmp50:
+	.short	4360                            # Record kind: S_UDT
+	.long	4103                            # Type
+	.asciz	"Class1"
+	.p2align	2
+.Ltmp51:
+	.short	.Ltmp53-.Ltmp52                 # Record length
+.Ltmp52:
+	.short	4360                            # Record kind: S_UDT
+	.long	4110                            # Type
+	.asciz	"Namespace2::Class2"
+	.p2align	2
+.Ltmp53:
+.Ltmp49:
+	.p2align	2
+	.cv_filechecksums                       # File index to string table offset subsection
+	.cv_stringtable                         # String table
+	.long	241
+	.long	.Ltmp55-.Ltmp54                 # Subsection size
+.Ltmp54:
+	.short	.Ltmp57-.Ltmp56                 # Record length
+.Ltmp56:
+	.short	4428                            # Record kind: S_BUILDINFO
+	.long	4124                            # LF_BUILDINFO index
+	.p2align	2
+.Ltmp57:
+.Ltmp55:
+	.p2align	2
+	.section	.debug$T,"dr"
+	.p2align	2
+	.long	4                               # Debug section magic
+	# StringId (0x1000)
+	.short	0x12                            # Record length
+	.short	0x1605                          # Record kind: LF_STRING_ID
+	.long	0x0                             # Id
+	.asciz	"Namespace1"                    # StringData
+	.byte	241
+	# ArgList (0x1001)
+	.short	0xa                             # Record length
+	.short	0x1201                          # Record kind: LF_ARGLIST
+	.long	0x1                             # NumArgs
+	.long	0x74                            # Argument: int
+	# Procedure (0x1002)
+	.short	0xe                             # Record length
+	.short	0x1008                          # Record kind: LF_PROCEDURE
+	.long	0x74                            # ReturnType: int
+	.byte	0x0                             # CallingConvention: NearC
+	.byte	0x0                             # FunctionOptions
+	.short	0x1                             # NumParameters
+	.long	0x1001                          # ArgListType: (int)
+	# FuncId (0x1003)
+	.short	0xe                             # Record length
+	.short	0x1601                          # Record kind: LF_FUNC_ID
+	.long	0x1000                          # ParentScope: Namespace1
+	.long	0x1002                          # FunctionType: int (int)
+	.asciz	"foo"                           # Name
+	# Class (0x1004)
+	.short	0x2a                            # Record length
+	.short	0x1504                          # Record kind: LF_CLASS
+	.short	0x0                             # MemberCount
+	.short	0x280                           # Properties ( ForwardReference (0x80) | HasUniqueName (0x200) )
+	.long	0x0                             # FieldList
+	.long	0x0                             # DerivedFrom
+	.long	0x0                             # VShape
+	.short	0x0                             # SizeOf
+	.asciz	"Class1"                        # Name
+	.asciz	".?AVClass1@@"                  # LinkageName
+	.byte	242
+	.byte	241
+	# MemberFunction (0x1005)
+	.short	0x1a                            # Record length
+	.short	0x1009                          # Record kind: LF_MFUNCTION
+	.long	0x74                            # ReturnType: int
+	.long	0x1004                          # ClassType: Class1
+	.long	0x0                             # ThisType
+	.byte	0x0                             # CallingConvention: NearC
+	.byte	0x0                             # FunctionOptions
+	.short	0x1                             # NumParameters
+	.long	0x1001                          # ArgListType: (int)
+	.long	0x0                             # ThisAdjustment
+	# FieldList (0x1006)
+	.short	0xe                             # Record length
+	.short	0x1203                          # Record kind: LF_FIELDLIST
+	.short	0x1511                          # Member kind: OneMethod ( LF_ONEMETHOD )
+	.short	0xb                             # Attrs: Public, Static
+	.long	0x1005                          # Type: int Class1::(int)
+	.asciz	"bar"                           # Name
+	# Class (0x1007)
+	.short	0x2a                            # Record length
+	.short	0x1504                          # Record kind: LF_CLASS
+	.short	0x1                             # MemberCount
+	.short	0x200                           # Properties ( HasUniqueName (0x200) )
+	.long	0x1006                          # FieldList: <field list>
+	.long	0x0                             # DerivedFrom
+	.long	0x0                             # VShape
+	.short	0x1                             # SizeOf
+	.asciz	"Class1"                        # Name
+	.asciz	".?AVClass1@@"                  # LinkageName
+	.byte	242
+	.byte	241
+	# StringId (0x1008)
+	.short	0x12                            # Record length
+	.short	0x1605                          # Record kind: LF_STRING_ID
+	.long	0x0                             # Id
+	.asciz	"/tmp/./b.h"                    # StringData
+	.byte	241
+	# UdtSourceLine (0x1009)
+	.short	0xe                             # Record length
+	.short	0x1606                          # Record kind: LF_UDT_SRC_LINE
+	.long	0x1007                          # UDT: Class1
+	.long	0x1008                          # SourceFile: /tmp/./b.h
+	.long	0x2                             # LineNumber
+	# MemberFuncId (0x100A)
+	.short	0xe                             # Record length
+	.short	0x1602                          # Record kind: LF_MFUNC_ID
+	.long	0x1004                          # ClassType: Class1
+	.long	0x1005                          # FunctionType: int Class1::(int)
+	.asciz	"bar"                           # Name
+	# Class (0x100B)
+	.short	0x42                            # Record length
+	.short	0x1504                          # Record kind: LF_CLASS
+	.short	0x0                             # MemberCount
+	.short	0x280                           # Properties ( ForwardReference (0x80) | HasUniqueName (0x200) )
+	.long	0x0                             # FieldList
+	.long	0x0                             # DerivedFrom
+	.long	0x0                             # VShape
+	.short	0x0                             # SizeOf
+	.asciz	"Namespace2::Class2"            # Name
+	.asciz	".?AVClass2@Namespace2@@"       # LinkageName
+	.byte	243
+	.byte	242
+	.byte	241
+	# MemberFunction (0x100C)
+	.short	0x1a                            # Record length
+	.short	0x1009                          # Record kind: LF_MFUNCTION
+	.long	0x74                            # ReturnType: int
+	.long	0x100b                          # ClassType: Namespace2::Class2
+	.long	0x0                             # ThisType
+	.byte	0x0                             # CallingConvention: NearC
+	.byte	0x0                             # FunctionOptions
+	.short	0x1                             # NumParameters
+	.long	0x1001                          # ArgListType: (int)
+	.long	0x0                             # ThisAdjustment
+	# FieldList (0x100D)
+	.short	0x12                            # Record length
+	.short	0x1203                          # Record kind: LF_FIELDLIST
+	.short	0x1511                          # Member kind: OneMethod ( LF_ONEMETHOD )
+	.short	0xb                             # Attrs: Public, Static
+	.long	0x100c                          # Type: int Namespace2::Class2::(int)
+	.asciz	"func"                          # Name
+	.byte	243
+	.byte	242
+	.byte	241
+	# Class (0x100E)
+	.short	0x42                            # Record length
+	.short	0x1504                          # Record kind: LF_CLASS
+	.short	0x1                             # MemberCount
+	.short	0x200                           # Properties ( HasUniqueName (0x200) )
+	.long	0x100d                          # FieldList: <field list>
+	.long	0x0                             # DerivedFrom
+	.long	0x0                             # VShape
+	.short	0x1                             # SizeOf
+	.asciz	"Namespace2::Class2"            # Name
+	.asciz	".?AVClass2@Namespace2@@"       # LinkageName
+	.byte	243
+	.byte	242
+	.byte	241
+	# StringId (0x100F)
+	.short	0x12                            # Record length
+	.short	0x1605                          # Record kind: LF_STRING_ID
+	.long	0x0                             # Id
+	.asciz	"/tmp/./c.h"                    # StringData
+	.byte	241
+	# UdtSourceLine (0x1010)
+	.short	0xe                             # Record length
+	.short	0x1606                          # Record kind: LF_UDT_SRC_LINE
+	.long	0x100e                          # UDT: Namespace2::Class2
+	.long	0x100f                          # SourceFile: /tmp/./c.h
+	.long	0x2                             # LineNumber
+	# MemberFuncId (0x1011)
+	.short	0x12                            # Record length
+	.short	0x1602                          # Record kind: LF_MFUNC_ID
+	.long	0x100b                          # ClassType: Namespace2::Class2
+	.long	0x100c                          # FunctionType: int Namespace2::Class2::(int)
+	.asciz	"func"                          # Name
+	.byte	243
+	.byte	242
+	.byte	241
+	# Pointer (0x1012)
+	.short	0xa                             # Record length
+	.short	0x1002                          # Record kind: LF_POINTER
+	.long	0x670                           # PointeeType: char*
+	.long	0x1000c                         # Attrs: [ Type: Near64, Mode: Pointer, SizeOf: 8 ]
+	# ArgList (0x1013)
+	.short	0xe                             # Record length
+	.short	0x1201                          # Record kind: LF_ARGLIST
+	.long	0x2                             # NumArgs
+	.long	0x74                            # Argument: int
+	.long	0x1012                          # Argument: char**
+	# Procedure (0x1014)
+	.short	0xe                             # Record length
+	.short	0x1008                          # Record kind: LF_PROCEDURE
+	.long	0x74                            # ReturnType: int
+	.byte	0x0                             # CallingConvention: NearC
+	.byte	0x0                             # FunctionOptions
+	.short	0x2                             # NumParameters
+	.long	0x1013                          # ArgListType: (int, char**)
+	# FuncId (0x1015)
+	.short	0x12                            # Record length
+	.short	0x1601                          # Record kind: LF_FUNC_ID
+	.long	0x0                             # ParentScope
+	.long	0x1014                          # FunctionType: int (int, char**)
+	.asciz	"main"                          # Name
+	.byte	243
+	.byte	242
+	.byte	241
+	# Modifier (0x1016)
+	.short	0xa                             # Record length
+	.short	0x1001                          # Record kind: LF_MODIFIER
+	.long	0x74                            # ModifiedType: int
+	.short	0x2                             # Modifiers ( Volatile (0x2) )
+	.byte	242
+	.byte	241
+	# StringId (0x1017)
+	.short	0xe                             # Record length
+	.short	0x1605                          # Record kind: LF_STRING_ID
+	.long	0x0                             # Id
+	.asciz	"/tmp"                          # StringData
+	.byte	243
+	.byte	242
+	.byte	241
+	# StringId (0x1018)
+	.short	0xe                             # Record length
+	.short	0x1605                          # Record kind: LF_STRING_ID
+	.long	0x0                             # Id
+	.asciz	"a.cpp"                         # StringData
+	.byte	242
+	.byte	241
+	# StringId (0x1019)
+	.short	0xa                             # Record length
+	.short	0x1605                          # Record kind: LF_STRING_ID
+	.long	0x0                             # Id
+	.byte	0                               # StringData
+	.byte	243
+	.byte	242
+	.byte	241
+	# StringId (0x101A)
+	.short	0x4e                            # Record length
+	.short	0x1605                          # Record kind: LF_STRING_ID
+	.long	0x0                             # Id
+	.asciz	"/usr/local/google/home/zequanwu/llvm-project/build/release/bin/clang" # StringData
+	.byte	243
+	.byte	242
+	.byte	241
+	# StringId (0x101B)
+	.short	0x9f6                           # Record length
+	.short	0x1605                          # Record kind: LF_STRING_ID
+	.long	0x0                             # Id
+	.asciz	"\"-cc1\" \"-triple\" \"x86_64-pc-windows-msvc19.20.0\" \"-S\" \"-disable-free\" \"-clear-ast-before-backend\" \"-disable-llvm-verifier\" \"-discard-value-names\" \"-mrelocation-model\" \"pic\" \"-pic-level\" \"2\" \"-mframe-pointer=none\" \"-relaxed-aliasing\" \"-fmath-errno\" \"-ffp-contract=on\" \"-fno-rounding-math\" \"-mconstructor-aliases\" \"-funwind-tables=2\" \"-target-cpu\" \"x86-64\" \"-mllvm\" \"-x86-asm-syntax=intel\" \"-tune-cpu\" \"generic\" \"-mllvm\" \"-treat-scalable-fixed-error-as-warning\" \"-D_MT\" \"-flto-visibility-public-std\" \"--dependent-lib=libcmt\" \"--dependent-lib=oldnames\" \"-stack-protector\" \"2\" \"-fms-volatile\" \"-fdiagnostics-format\" \"msvc\" \"-gno-column-info\" \"-gcodeview\" \"-debug-info-kind=constructor\" \"-ffunction-sections\" \"-fcoverage-compilation-dir=/tmp\" \"-resource-dir\" \"/usr/local/google/home/zequanwu/llvm-project/build/release/lib/clang/15.0.0\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/llvm-project/build/release/lib/clang/15.0.0/include\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/DIA SDK/include\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/VC/Tools/MSVC/14.26.28801/include\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/VC/Tools/MSVC/14.26.28801/atlmfc/include\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/ucrt\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/shared\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/um\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/winrt\" \"-internal-isystem\" \"/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/cppwinrt\" \"-Os\" \"-fdeprecated-macro\" \"-fdebug-compilation-dir=/tmp\" \"-ferror-limit\" \"19\" \"-fno-use-cxa-atexit\" \"-fms-extensions\" \"-fms-compatibility\" \"-fms-compatibility-version=19.20\" \"-std=c++14\" \"-fdelayed-template-parsing\" \"-fcolor-diagnostics\" \"-vectorize-loops\" \"-vectorize-slp\" \"-faddrsig\" \"-x\" \"c++\"" # StringData
+	.byte	242
+	.byte	241
+	# BuildInfo (0x101C)
+	.short	0x1a                            # Record length
+	.short	0x1603                          # Record kind: LF_BUILDINFO
+	.short	0x5                             # NumArgs
+	.long	0x1017                          # Argument: /tmp
+	.long	0x101a                          # Argument: /usr/local/google/home/zequanwu/llvm-project/build/release/bin/clang
+	.long	0x1018                          # Argument: a.cpp
+	.long	0x1019                          # Argument
+	.long	0x101b                          # Argument: "-cc1" "-triple" "x86_64-pc-windows-msvc19.20.0" "-S" "-disable-free" "-clear-ast-before-backend" "-disable-llvm-verifier" "-discard-value-names" "-mrelocation-model" "pic" "-pic-level" "2" "-mframe-pointer=none" "-relaxed-aliasing" "-fmath-errno" "-ffp-contract=on" "-fno-rounding-math" "-mconstructor-aliases" "-funwind-tables=2" "-target-cpu" "x86-64" "-mllvm" "-x86-asm-syntax=intel" "-tune-cpu" "generic" "-mllvm" "-treat-scalable-fixed-error-as-warning" "-D_MT" "-flto-visibility-public-std" "--dependent-lib=libcmt" "--dependent-lib=oldnames" "-stack-protector" "2" "-fms-volatile" "-fdiagnostics-format" "msvc" "-gno-column-info" "-gcodeview" "-debug-info-kind=constructor" "-ffunction-sections" "-fcoverage-compilation-dir=/tmp" "-resource-dir" "/usr/local/google/home/zequanwu/llvm-project/build/release/lib/clang/15.0.0" "-internal-isystem" "/usr/local/google/home/zequanwu/llvm-project/build/release/lib/clang/15.0.0/include" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/DIA SDK/include" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/VC/Tools/MSVC/14.26.28801/include" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/VC/Tools/MSVC/14.26.28801/atlmfc/include" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/ucrt" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/shared" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/um" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/winrt" "-internal-isystem" "/usr/local/google/home/zequanwu/chromium/src/third_party/depot_tools/win_toolchain/vs_files/3bda71a11e/Windows Kits/10/Include/10.0.19041.0/cppwinrt" "-Os" "-fdeprecated-macro" "-fdebug-compilation-dir=/tmp" "-ferror-limit" "19" "-fno-use-cxa-atexit" "-fms-extensions" "-fms-compatibility" "-fms-compatibility-version=19.20" "-std=c++14" "-fdelayed-template-parsing" "-fcolor-diagnostics" "-vectorize-loops" "-vectorize-slp" "-faddrsig" "-x" "c++"
+	.byte	242
+	.byte	241
+	.addrsig
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/Inputs/inline_sites_live.lldbinit b/lldb/test/Shell/SymbolFile/NativePDB/Inputs/inline_sites_live.lldbinit
index eab5061dafbd..2291c7c45271 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/Inputs/inline_sites_live.lldbinit
+++ b/lldb/test/Shell/SymbolFile/NativePDB/Inputs/inline_sites_live.lldbinit
@@ -1,7 +1,7 @@
-br set -p BP_bar -f inline_sites_live.cpp
-br set -p BP_foo -f inline_sites_live.cpp
-run
-expression param
-continue
-expression param
-expression local
+br set -p BP_bar -f inline_sites_live.cpp
+br set -p BP_foo -f inline_sites_live.cpp
+run
+expression param
+continue
+expression param
+expression local
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/Inputs/local-variables-registers.lldbinit b/lldb/test/Shell/SymbolFile/NativePDB/Inputs/local-variables-registers.lldbinit
index feda74856757..ad080da24dab 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/Inputs/local-variables-registers.lldbinit
+++ b/lldb/test/Shell/SymbolFile/NativePDB/Inputs/local-variables-registers.lldbinit
@@ -1,35 +1,35 @@
-image lookup -a 0x140001000 -v
-image lookup -a 0x140001003 -v
-image lookup -a 0x140001006 -v
-
-image lookup -a 0x140001011 -v
-image lookup -a 0x140001017 -v
-image lookup -a 0x140001019 -v
-image lookup -a 0x14000101e -v
-image lookup -a 0x14000102c -v
-
-image lookup -a 0x140001031 -v
-image lookup -a 0x140001032 -v
-image lookup -a 0x140001033 -v
-image lookup -a 0x140001034 -v
-image lookup -a 0x140001035 -v
-image lookup -a 0x140001036 -v
-image lookup -a 0x140001037 -v
-image lookup -a 0x14000103b -v
-image lookup -a 0x14000103d -v
-image lookup -a 0x14000103f -v
-image lookup -a 0x140001041 -v
-image lookup -a 0x140001043 -v
-image lookup -a 0x140001045 -v
-image lookup -a 0x140001046 -v
-image lookup -a 0x140001047 -v
-image lookup -a 0x140001048 -v
-image lookup -a 0x140001049 -v
-image lookup -a 0x14000104a -v
-image lookup -a 0x14000104b -v
-image lookup -a 0x14000104c -v
-image lookup -a 0x14000104e -v
-image lookup -a 0x14000104f -v
-image lookup -a 0x140001050 -v
-image lookup -a 0x140001051 -v
-exit
+image lookup -a 0x140001000 -v
+image lookup -a 0x140001003 -v
+image lookup -a 0x140001006 -v
+
+image lookup -a 0x140001011 -v
+image lookup -a 0x140001017 -v
+image lookup -a 0x140001019 -v
+image lookup -a 0x14000101e -v
+image lookup -a 0x14000102c -v
+
+image lookup -a 0x140001031 -v
+image lookup -a 0x140001032 -v
+image lookup -a 0x140001033 -v
+image lookup -a 0x140001034 -v
+image lookup -a 0x140001035 -v
+image lookup -a 0x140001036 -v
+image lookup -a 0x140001037 -v
+image lookup -a 0x14000103b -v
+image lookup -a 0x14000103d -v
+image lookup -a 0x14000103f -v
+image lookup -a 0x140001041 -v
+image lookup -a 0x140001043 -v
+image lookup -a 0x140001045 -v
+image lookup -a 0x140001046 -v
+image lookup -a 0x140001047 -v
+image lookup -a 0x140001048 -v
+image lookup -a 0x140001049 -v
+image lookup -a 0x14000104a -v
+image lookup -a 0x14000104b -v
+image lookup -a 0x14000104c -v
+image lookup -a 0x14000104e -v
+image lookup -a 0x14000104f -v
+image lookup -a 0x140001050 -v
+image lookup -a 0x140001051 -v
+exit
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/Inputs/lookup-by-types.lldbinit b/lldb/test/Shell/SymbolFile/NativePDB/Inputs/lookup-by-types.lldbinit
index 3f639eb2e539..afe3f2c8b943 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/Inputs/lookup-by-types.lldbinit
+++ b/lldb/test/Shell/SymbolFile/NativePDB/Inputs/lookup-by-types.lldbinit
@@ -1,4 +1,4 @@
-image lookup -type A
-image lookup -type B
-
+image lookup -type A
+image lookup -type B
+
 quit
 \ No newline at end of file
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/Inputs/subfield_register_simple_type.lldbinit b/lldb/test/Shell/SymbolFile/NativePDB/Inputs/subfield_register_simple_type.lldbinit
index 32758f1fbc51..3dc33fd789da 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/Inputs/subfield_register_simple_type.lldbinit
+++ b/lldb/test/Shell/SymbolFile/NativePDB/Inputs/subfield_register_simple_type.lldbinit
@@ -1,2 +1,2 @@
-image lookup -a 0x40102f -v
-quit
+image lookup -a 0x40102f -v
+quit
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/function-types-classes.cpp b/lldb/test/Shell/SymbolFile/NativePDB/function-types-classes.cpp
index f0fac90e5065..ca2a84de7698 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/function-types-classes.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/function-types-classes.cpp
@@ -113,9 +113,9 @@ auto incomplete = &three<Incomplete*, Incomplete**, const Incomplete*>;
 // CHECK: |-CXXRecordDecl {{.*}} union U
 // CHECK: |-EnumDecl {{.*}} E
 // CHECK: |-CXXRecordDecl {{.*}} struct S
-// CHECK: |-VarDecl {{.*}} a 'S (*)(C *, U &, E &&)'
-// CHECK: |-VarDecl {{.*}} b 'E (*)(const S *, const C &, const U &&)'
-// CHECK: |-VarDecl {{.*}} c 'U (*)(volatile E *, volatile S &, volatile C &&)'
+// CHECK: |-VarDecl {{.*}} a 'S (*)(C *, U &, E &&)'
+// CHECK: |-VarDecl {{.*}} b 'E (*)(const S *, const C &, const U &&)'
+// CHECK: |-VarDecl {{.*}} c 'U (*)(volatile E *, volatile S &, volatile C &&)'
 // CHECK: |-VarDecl {{.*}} d 'C (*)(const volatile U *, const volatile E &, const volatile S &&)'
 // CHECK: |-CXXRecordDecl {{.*}} struct B
 // CHECK: | `-CXXRecordDecl {{.*}} struct A
@@ -125,14 +125,14 @@ auto incomplete = &three<Incomplete*, Incomplete**, const Incomplete*>;
 // CHECK: | | `-CXXRecordDecl {{.*}} struct S
 // CHECK: | `-NamespaceDecl {{.*}} B
 // CHECK: |   `-CXXRecordDecl {{.*}} struct S
-// CHECK: |-VarDecl {{.*}} e 'A::B::S *(*)(B::A::S *, A::C::S &)'
-// CHECK: |-VarDecl {{.*}} f 'A::C::S &(*)(A::B::S *, B::A::S *)'
+// CHECK: |-VarDecl {{.*}} e 'A::B::S *(*)(B::A::S *, A::C::S &)'
+// CHECK: |-VarDecl {{.*}} f 'A::C::S &(*)(A::B::S *, B::A::S *)'
 // CHECK: |-VarDecl {{.*}} g 'B::A::S *(*)(A::C::S &, A::B::S *)'
 // CHECK: |-CXXRecordDecl {{.*}} struct TC<int>
 // CHECK: |-CXXRecordDecl {{.*}} struct TC<TC<int>>
 // CHECK: |-CXXRecordDecl {{.*}} struct TC<A::B::S>
 // CHECK: |-CXXRecordDecl {{.*}} struct TC<void>
-// CHECK: |-VarDecl {{.*}} h 'TC<void> (*)(TC<int>, TC<TC<int>>, TC<A::B::S>)'
+// CHECK: |-VarDecl {{.*}} h 'TC<void> (*)(TC<int>, TC<TC<int>>, TC<A::B::S>)'
 // CHECK: |-VarDecl {{.*}} i 'A::B::S (*)()'
 // CHECK: |-CXXRecordDecl {{.*}} struct Incomplete
 // CHECK: `-VarDecl {{.*}} incomplete 'Incomplete *(*)(Incomplete **, const Incomplete *)'
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp
index 402982726965..767149ea18c4 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp
@@ -1,34 +1,34 @@
-// clang-format off
-// REQUIRES: system-windows
-
-// RUN: %build -o %t.exe -- %s
-// RUN: env LLDB_USE_NATIVE_PDB_READER=1 %lldb -f %t.exe -s \
-// RUN:     %p/Inputs/inline_sites_live.lldbinit 2>&1 | FileCheck %s
-
-void use(int) {}
-
-void __attribute__((always_inline)) bar(int param) {
-  use(param); // BP_bar
-}
-
-void __attribute__((always_inline)) foo(int param) {
-  int local = param+1;
-  bar(local);
-  use(param);
-  use(local); // BP_foo
-}
-
-int main(int argc, char** argv) {
-  foo(argc);
-}
-
-// CHECK:      * thread #1, stop reason = breakpoint 1
-// CHECK-NEXT:    frame #0: {{.*}}`main [inlined] bar(param=2)
-// CHECK:      (lldb) expression param
-// CHECK-NEXT: (int) $0 = 2
-// CHECK:      * thread #1, stop reason = breakpoint 2
-// CHECK-NEXT:    frame #0: {{.*}}`main [inlined] foo(param=1)
-// CHECK:      (lldb) expression param
-// CHECK-NEXT: (int) $1 = 1
-// CHECK-NEXT: (lldb) expression local
-// CHECK-NEXT: (int) $2 = 2
+// clang-format off
+// REQUIRES: system-windows
+
+// RUN: %build -o %t.exe -- %s
+// RUN: env LLDB_USE_NATIVE_PDB_READER=1 %lldb -f %t.exe -s \
+// RUN:     %p/Inputs/inline_sites_live.lldbinit 2>&1 | FileCheck %s
+
+void use(int) {}
+
+void __attribute__((always_inline)) bar(int param) {
+  use(param); // BP_bar
+}
+
+void __attribute__((always_inline)) foo(int param) {
+  int local = param+1;
+  bar(local);
+  use(param);
+  use(local); // BP_foo
+}
+
+int main(int argc, char** argv) {
+  foo(argc);
+}
+
+// CHECK:      * thread #1, stop reason = breakpoint 1
+// CHECK-NEXT:    frame #0: {{.*}}`main [inlined] bar(param=2)
+// CHECK:      (lldb) expression param
+// CHECK-NEXT: (int) $0 = 2
+// CHECK:      * thread #1, stop reason = breakpoint 2
+// CHECK-NEXT:    frame #0: {{.*}}`main [inlined] foo(param=1)
+// CHECK:      (lldb) expression param
+// CHECK-NEXT: (int) $1 = 1
+// CHECK-NEXT: (lldb) expression local
+// CHECK-NEXT: (int) $2 = 2
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/lookup-by-types.cpp b/lldb/test/Shell/SymbolFile/NativePDB/lookup-by-types.cpp
index cd5bbfc30fa0..f3aea8115f38 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/lookup-by-types.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/lookup-by-types.cpp
@@ -1,46 +1,46 @@
-// clang-format off
-
-// RUN: %build -o %t.exe -- %s
-// RUN: env LLDB_USE_NATIVE_PDB_READER=1 %lldb -f %t.exe -s \
-// RUN:     %p/Inputs/lookup-by-types.lldbinit 2>&1 | FileCheck %s
-
-class B;
-class A {
-public:
-    static const A constA;
-    static A a;
-    static B b;
-    int val = 1;
-};
-class B {
-public:
-    static A a;
-    int val = 2;
-};
-A varA;
-B varB;
-const A A::constA = varA;
-A A::a = varA;
-B A::b = varB;
-A B::a = varA;
-
-int main(int argc, char **argv) {
-  return varA.val + varB.val;
-}
-
-// CHECK:      image lookup -type A
-// CHECK-NEXT: 1 match found in {{.*}}.exe
-// CHECK-NEXT: compiler_type = "class A {
-// CHECK-NEXT:     static const A constA;
-// CHECK-NEXT:     static A a;
-// CHECK-NEXT:     static B b;
-// CHECK-NEXT: public:
-// CHECK-NEXT:     int val;
-// CHECK-NEXT: }"
-// CHECK:      image lookup -type B
-// CHECK-NEXT: 1 match found in {{.*}}.exe
-// CHECK-NEXT:  compiler_type = "class B {
-// CHECK-NEXT:     static A a;
-// CHECK-NEXT: public:
-// CHECK-NEXT:     int val;
-// CHECK-NEXT: }"
+// clang-format off
+
+// RUN: %build -o %t.exe -- %s
+// RUN: env LLDB_USE_NATIVE_PDB_READER=1 %lldb -f %t.exe -s \
+// RUN:     %p/Inputs/lookup-by-types.lldbinit 2>&1 | FileCheck %s
+
+class B;
+class A {
+public:
+    static const A constA;
+    static A a;
+    static B b;
+    int val = 1;
+};
+class B {
+public:
+    static A a;
+    int val = 2;
+};
+A varA;
+B varB;
+const A A::constA = varA;
+A A::a = varA;
+B A::b = varB;
+A B::a = varA;
+
+int main(int argc, char **argv) {
+  return varA.val + varB.val;
+}
+
+// CHECK:      image lookup -type A
+// CHECK-NEXT: 1 match found in {{.*}}.exe
+// CHECK-NEXT: compiler_type = "class A {
+// CHECK-NEXT:     static const A constA;
+// CHECK-NEXT:     static A a;
+// CHECK-NEXT:     static B b;
+// CHECK-NEXT: public:
+// CHECK-NEXT:     int val;
+// CHECK-NEXT: }"
+// CHECK:      image lookup -type B
+// CHECK-NEXT: 1 match found in {{.*}}.exe
+// CHECK-NEXT:  compiler_type = "class B {
+// CHECK-NEXT:     static A a;
+// CHECK-NEXT: public:
+// CHECK-NEXT:     int val;
+// CHECK-NEXT: }"
diff --git a/lldb/unittests/Breakpoint/CMakeLists.txt b/lldb/unittests/Breakpoint/CMakeLists.txt
index db985bc82dc5..757c2da1a4d9 100644
--- a/lldb/unittests/Breakpoint/CMakeLists.txt
+++ b/lldb/unittests/Breakpoint/CMakeLists.txt
@@ -1,10 +1,10 @@
-add_lldb_unittest(LLDBBreakpointTests
-  BreakpointIDTest.cpp
-  WatchpointAlgorithmsTests.cpp
-
-  LINK_LIBS
-    lldbBreakpoint
-    lldbCore
-  LINK_COMPONENTS
-    Support
-  )
+add_lldb_unittest(LLDBBreakpointTests
+  BreakpointIDTest.cpp
+  WatchpointAlgorithmsTests.cpp
+
+  LINK_LIBS
+    lldbBreakpoint
+    lldbCore
+  LINK_COMPONENTS
+    Support
+  )
diff --git a/llvm/benchmarks/FormatVariadicBM.cpp b/llvm/benchmarks/FormatVariadicBM.cpp
index e351db338730..c03ead400d0d 100644
--- a/llvm/benchmarks/FormatVariadicBM.cpp
+++ b/llvm/benchmarks/FormatVariadicBM.cpp
@@ -1,63 +1,63 @@
-//===- FormatVariadicBM.cpp - formatv() benchmark ---------- --------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "benchmark/benchmark.h"
-#include "llvm/Support/FormatVariadic.h"
-#include <algorithm>
-#include <string>
-#include <vector>
-
-using namespace llvm;
-using namespace std;
-
-// Generate a list of format strings that have `NumReplacements` replacements
-// by permuting the replacements and some literal text.
-static vector<string> getFormatStrings(int NumReplacements) {
-  vector<string> Components;
-  for (int I = 0; I < NumReplacements; I++)
-    Components.push_back("{" + to_string(I) + "}");
-  // Intersperse these with some other literal text (_).
-  const string_view Literal = "____";
-  for (char C : Literal)
-    Components.push_back(string(1, C));
-
-  vector<string> Formats;
-  do {
-    string Concat;
-    for (const string &C : Components)
-      Concat += C;
-    Formats.emplace_back(Concat);
-  } while (next_permutation(Components.begin(), Components.end()));
-  return Formats;
-}
-
-// Generate the set of formats to exercise outside the benchmark code.
-static const vector<vector<string>> Formats = {
-    getFormatStrings(1), getFormatStrings(2), getFormatStrings(3),
-    getFormatStrings(4), getFormatStrings(5),
-};
-
-// Benchmark formatv() for a variety of format strings and 1-5 replacements.
-static void BM_FormatVariadic(benchmark::State &state) {
-  for (auto _ : state) {
-    for (const string &Fmt : Formats[0])
-      formatv(Fmt.c_str(), 1).str();
-    for (const string &Fmt : Formats[1])
-      formatv(Fmt.c_str(), 1, 2).str();
-    for (const string &Fmt : Formats[2])
-      formatv(Fmt.c_str(), 1, 2, 3).str();
-    for (const string &Fmt : Formats[3])
-      formatv(Fmt.c_str(), 1, 2, 3, 4).str();
-    for (const string &Fmt : Formats[4])
-      formatv(Fmt.c_str(), 1, 2, 3, 4, 5).str();
-  }
-}
-
-BENCHMARK(BM_FormatVariadic);
-
-BENCHMARK_MAIN();
+//===- FormatVariadicBM.cpp - formatv() benchmark ---------- --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmark/benchmark.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+using namespace std;
+
+// Generate a list of format strings that have `NumReplacements` replacements
+// by permuting the replacements and some literal text.
+static vector<string> getFormatStrings(int NumReplacements) {
+  vector<string> Components;
+  for (int I = 0; I < NumReplacements; I++)
+    Components.push_back("{" + to_string(I) + "}");
+  // Intersperse these with some other literal text (_).
+  const string_view Literal = "____";
+  for (char C : Literal)
+    Components.push_back(string(1, C));
+
+  vector<string> Formats;
+  do {
+    string Concat;
+    for (const string &C : Components)
+      Concat += C;
+    Formats.emplace_back(Concat);
+  } while (next_permutation(Components.begin(), Components.end()));
+  return Formats;
+}
+
+// Generate the set of formats to exercise outside the benchmark code.
+static const vector<vector<string>> Formats = {
+    getFormatStrings(1), getFormatStrings(2), getFormatStrings(3),
+    getFormatStrings(4), getFormatStrings(5),
+};
+
+// Benchmark formatv() for a variety of format strings and 1-5 replacements.
+static void BM_FormatVariadic(benchmark::State &state) {
+  for (auto _ : state) {
+    for (const string &Fmt : Formats[0])
+      formatv(Fmt.c_str(), 1).str();
+    for (const string &Fmt : Formats[1])
+      formatv(Fmt.c_str(), 1, 2).str();
+    for (const string &Fmt : Formats[2])
+      formatv(Fmt.c_str(), 1, 2, 3).str();
+    for (const string &Fmt : Formats[3])
+      formatv(Fmt.c_str(), 1, 2, 3, 4).str();
+    for (const string &Fmt : Formats[4])
+      formatv(Fmt.c_str(), 1, 2, 3, 4, 5).str();
+  }
+}
+
+BENCHMARK(BM_FormatVariadic);
+
+BENCHMARK_MAIN();
diff --git a/llvm/benchmarks/GetIntrinsicForClangBuiltin.cpp b/llvm/benchmarks/GetIntrinsicForClangBuiltin.cpp
index 953d9125e11e..fa9c528424c9 100644
--- a/llvm/benchmarks/GetIntrinsicForClangBuiltin.cpp
+++ b/llvm/benchmarks/GetIntrinsicForClangBuiltin.cpp
@@ -1,50 +1,50 @@
-#include "benchmark/benchmark.h"
-#include "llvm/IR/Intrinsics.h"
-
-using namespace llvm;
-using namespace Intrinsic;
-
-// Benchmark intrinsic lookup from a variety of targets.
-static void BM_GetIntrinsicForClangBuiltin(benchmark::State &state) {
-  static const char *Builtins[] = {
-      "__builtin_adjust_trampoline",
-      "__builtin_trap",
-      "__builtin_arm_ttest",
-      "__builtin_amdgcn_cubetc",
-      "__builtin_amdgcn_udot2",
-      "__builtin_arm_stc",
-      "__builtin_bpf_compare",
-      "__builtin_HEXAGON_A2_max",
-      "__builtin_lasx_xvabsd_b",
-      "__builtin_mips_dlsa",
-      "__nvvm_floor_f",
-      "__builtin_altivec_vslb",
-      "__builtin_r600_read_tgid_x",
-      "__builtin_riscv_aes64im",
-      "__builtin_s390_vcksm",
-      "__builtin_ve_vl_pvfmksge_Mvl",
-      "__builtin_ia32_axor64",
-      "__builtin_bitrev",
-  };
-  static const char *Targets[] = {"",     "aarch64", "amdgcn", "mips",
-                                  "nvvm", "r600",    "riscv"};
-
-  for (auto _ : state) {
-    for (auto Builtin : Builtins)
-      for (auto Target : Targets)
-        getIntrinsicForClangBuiltin(Target, Builtin);
-  }
-}
-
-static void
-BM_GetIntrinsicForClangBuiltinHexagonFirst(benchmark::State &state) {
-  // Exercise the worst case by looking for the first builtin for a target
-  // that has a lot of builtins.
-  for (auto _ : state)
-    getIntrinsicForClangBuiltin("hexagon", "__builtin_HEXAGON_A2_abs");
-}
-
-BENCHMARK(BM_GetIntrinsicForClangBuiltin);
-BENCHMARK(BM_GetIntrinsicForClangBuiltinHexagonFirst);
-
-BENCHMARK_MAIN();
+#include "benchmark/benchmark.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace Intrinsic;
+
+// Benchmark intrinsic lookup from a variety of targets.
+static void BM_GetIntrinsicForClangBuiltin(benchmark::State &state) {
+  static const char *Builtins[] = {
+      "__builtin_adjust_trampoline",
+      "__builtin_trap",
+      "__builtin_arm_ttest",
+      "__builtin_amdgcn_cubetc",
+      "__builtin_amdgcn_udot2",
+      "__builtin_arm_stc",
+      "__builtin_bpf_compare",
+      "__builtin_HEXAGON_A2_max",
+      "__builtin_lasx_xvabsd_b",
+      "__builtin_mips_dlsa",
+      "__nvvm_floor_f",
+      "__builtin_altivec_vslb",
+      "__builtin_r600_read_tgid_x",
+      "__builtin_riscv_aes64im",
+      "__builtin_s390_vcksm",
+      "__builtin_ve_vl_pvfmksge_Mvl",
+      "__builtin_ia32_axor64",
+      "__builtin_bitrev",
+  };
+  static const char *Targets[] = {"",     "aarch64", "amdgcn", "mips",
+                                  "nvvm", "r600",    "riscv"};
+
+  for (auto _ : state) {
+    for (auto Builtin : Builtins)
+      for (auto Target : Targets)
+        getIntrinsicForClangBuiltin(Target, Builtin);
+  }
+}
+
+static void
+BM_GetIntrinsicForClangBuiltinHexagonFirst(benchmark::State &state) {
+  // Exercise the worst case by looking for the first builtin for a target
+  // that has a lot of builtins.
+  for (auto _ : state)
+    getIntrinsicForClangBuiltin("hexagon", "__builtin_HEXAGON_A2_abs");
+}
+
+BENCHMARK(BM_GetIntrinsicForClangBuiltin);
+BENCHMARK(BM_GetIntrinsicForClangBuiltinHexagonFirst);
+
+BENCHMARK_MAIN();
diff --git a/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp b/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp
index 758291274675..7f3bd3bc9eb6 100644
--- a/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp
+++ b/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp
@@ -1,30 +1,30 @@
-//===- GetIntrinsicInfoTableEntries.cpp - IIT signature benchmark ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "benchmark/benchmark.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Intrinsics.h"
-
-using namespace llvm;
-using namespace Intrinsic;
-
-static void BM_GetIntrinsicInfoTableEntries(benchmark::State &state) {
-  SmallVector<IITDescriptor> Table;
-  for (auto _ : state) {
-    for (ID ID = 1; ID < num_intrinsics; ++ID) {
-      // This makes sure the vector does not keep growing, as well as after the
-      // first iteration does not result in additional allocations.
-      Table.clear();
-      getIntrinsicInfoTableEntries(ID, Table);
-    }
-  }
-}
-
-BENCHMARK(BM_GetIntrinsicInfoTableEntries);
-
-BENCHMARK_MAIN();
+//===- GetIntrinsicInfoTableEntries.cpp - IIT signature benchmark ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmark/benchmark.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace Intrinsic;
+
+static void BM_GetIntrinsicInfoTableEntries(benchmark::State &state) {
+  SmallVector<IITDescriptor> Table;
+  for (auto _ : state) {
+    for (ID ID = 1; ID < num_intrinsics; ++ID) {
+      // This makes sure the vector does not keep growing, as well as after the
+      // first iteration does not result in additional allocations.
+      Table.clear();
+      getIntrinsicInfoTableEntries(ID, Table);
+    }
+  }
+}
+
+BENCHMARK(BM_GetIntrinsicInfoTableEntries);
+
+BENCHMARK_MAIN();
diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index 344a295226f6..08617933519f 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -360,12 +360,6 @@ Best practices for regression tests
 - Try to give values (including variables, blocks and functions) meaningful
   names, and avoid retaining complex names generated by the optimization
   pipeline (such as ``%foo.0.0.0.0.0.0``).
-- If your tests depend on specific input file encodings, beware of line-ending
-  issues across different platforms, and in the project's history. Before you
-  commit tests that depend on explicit encodings, consider adding filetype or
-  specific line-ending annotations to a `<.gitattributes
-  https://git-scm.com/docs/gitattributes#_effects>`_ file in the appropriate
-  directory in the repository.
 
 Extra files
 -----------
diff --git a/llvm/docs/_static/LoopOptWG_invite.ics b/llvm/docs/_static/LoopOptWG_invite.ics
index 7c92e4048cc3..65597d90a9c8 100644
--- a/llvm/docs/_static/LoopOptWG_invite.ics
+++ b/llvm/docs/_static/LoopOptWG_invite.ics
@@ -1,80 +1,80 @@
-BEGIN:VCALENDAR
-PRODID:-//Google Inc//Google Calendar 70.9054//EN
-VERSION:2.0
-CALSCALE:GREGORIAN
-METHOD:PUBLISH
-X-WR-CALNAME:LLVM Loop Optimization Discussion
-X-WR-TIMEZONE:Europe/Berlin
-BEGIN:VTIMEZONE
-TZID:America/New_York
-X-LIC-LOCATION:America/New_York
-BEGIN:DAYLIGHT
-TZOFFSETFROM:-0500
-TZOFFSETTO:-0400
-TZNAME:EDT
-DTSTART:19700308T020000
-RRULE:FREQ=YEARLY;BYMONTH=3;BYDAY=2SU
-END:DAYLIGHT
-BEGIN:STANDARD
-TZOFFSETFROM:-0400
-TZOFFSETTO:-0500
-TZNAME:EST
-DTSTART:19701101T020000
-RRULE:FREQ=YEARLY;BYMONTH=11;BYDAY=1SU
-END:STANDARD
-END:VTIMEZONE
-BEGIN:VEVENT
-DTSTART;TZID=America/New_York:20240904T110000
-DTEND;TZID=America/New_York:20240904T120000
-RRULE:FREQ=MONTHLY;BYDAY=1WE
-DTSTAMP:20240821T160951Z
-UID:58h3f0kd3aooohmeii0johh23c@google.com
-X-GOOGLE-CONFERENCE:https://meet.google.com/fmz-gspu-odg
-CREATED:20240821T151507Z
-DESCRIPTION:LLVM Loop Optimization Discussion<br>Video call link: <a href="
- https://meet.google.com/fmz-gspu-odg" target="_blank">https://meet.google.c
- om/fmz-gspu-odg</a><br>Agenda/Minutes/Discussion: <a href="https://docs.goo
- gle.com/document/d/1sdzoyB11s0ccTZ3fobqctDpgJmRoFcz0sviKxqczs4g/edit?usp=sh
- aring" class="pastedDriveLink-0">https://docs.google.com/document/d/1sdzoyB
- 11s0ccTZ3fobqctDpgJmRoFcz0sviKxqczs4g/edit?usp=sharing</a>\n\n-::~:~::~:~:~
- :~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~::~:~::-\
- nJoin with Google Meet: https://meet.google.com/fmz-gspu-odg\nOr dial: (DE)
-  +49 40 8081617343 PIN: 948106286#\nMore phone numbers: https://tel.meet/fm
- z-gspu-odg?pin=6273693382184&hs=7\n\nLearn more about Meet at: https://supp
- ort.google.com/a/users/answer/9282720\n\nPlease do not edit this section.\n
- -::~:~::~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~
- :~:~::~:~::-
-LAST-MODIFIED:20240821T160941Z
-SEQUENCE:0
-STATUS:CONFIRMED
-SUMMARY:LLVM Loop Optimization Discussion
-TRANSP:OPAQUE
-END:VEVENT
-BEGIN:VEVENT
-DTSTART;TZID=America/New_York:20240904T110000
-DTEND;TZID=America/New_York:20240904T120000
-DTSTAMP:20240821T160951Z
-UID:58h3f0kd3aooohmeii0johh23c@google.com
-X-GOOGLE-CONFERENCE:https://meet.google.com/fmz-gspu-odg
-RECURRENCE-ID;TZID=America/New_York:20240904T110000
-CREATED:20240821T151507Z
-DESCRIPTION:LLVM Loop Optimization Discussion<br>Video call link: <a href="
- https://meet.google.com/fmz-gspu-odg" target="_blank">https://meet.google.c
- om/fmz-gspu-odg</a><br>Agenda/Minutes/Discussion: <a href="https://docs.goo
- gle.com/document/d/1sdzoyB11s0ccTZ3fobqctDpgJmRoFcz0sviKxqczs4g/edit?usp=sh
- aring" class="pastedDriveLink-0">https://docs.google.com/document/d/1sdzoyB
- 11s0ccTZ3fobqctDpgJmRoFcz0sviKxqczs4g/edit?usp=sharing</a>\n\n-::~:~::~:~:~
- :~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~::~:~::-\
- nJoin with Google Meet: https://meet.google.com/fmz-gspu-odg\nOr dial: (DE)
-  +49 40 8081617343 PIN: 948106286#\nMore phone numbers: https://tel.meet/fm
- z-gspu-odg?pin=6273693382184&hs=7\n\nLearn more about Meet at: https://supp
- ort.google.com/a/users/answer/9282720\n\nPlease do not edit this section.\n
- -::~:~::~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~
- :~:~::~:~::-
-LAST-MODIFIED:20240821T160941Z
-SEQUENCE:0
-STATUS:CONFIRMED
-SUMMARY:LLVM Loop Optimization Discussion
-TRANSP:OPAQUE
-END:VEVENT
-END:VCALENDAR
+BEGIN:VCALENDAR
+PRODID:-//Google Inc//Google Calendar 70.9054//EN
+VERSION:2.0
+CALSCALE:GREGORIAN
+METHOD:PUBLISH
+X-WR-CALNAME:LLVM Loop Optimization Discussion
+X-WR-TIMEZONE:Europe/Berlin
+BEGIN:VTIMEZONE
+TZID:America/New_York
+X-LIC-LOCATION:America/New_York
+BEGIN:DAYLIGHT
+TZOFFSETFROM:-0500
+TZOFFSETTO:-0400
+TZNAME:EDT
+DTSTART:19700308T020000
+RRULE:FREQ=YEARLY;BYMONTH=3;BYDAY=2SU
+END:DAYLIGHT
+BEGIN:STANDARD
+TZOFFSETFROM:-0400
+TZOFFSETTO:-0500
+TZNAME:EST
+DTSTART:19701101T020000
+RRULE:FREQ=YEARLY;BYMONTH=11;BYDAY=1SU
+END:STANDARD
+END:VTIMEZONE
+BEGIN:VEVENT
+DTSTART;TZID=America/New_York:20240904T110000
+DTEND;TZID=America/New_York:20240904T120000
+RRULE:FREQ=MONTHLY;BYDAY=1WE
+DTSTAMP:20240821T160951Z
+UID:58h3f0kd3aooohmeii0johh23c@google.com
+X-GOOGLE-CONFERENCE:https://meet.google.com/fmz-gspu-odg
+CREATED:20240821T151507Z
+DESCRIPTION:LLVM Loop Optimization Discussion<br>Video call link: <a href="
+ https://meet.google.com/fmz-gspu-odg" target="_blank">https://meet.google.c
+ om/fmz-gspu-odg</a><br>Agenda/Minutes/Discussion: <a href="https://docs.goo
+ gle.com/document/d/1sdzoyB11s0ccTZ3fobqctDpgJmRoFcz0sviKxqczs4g/edit?usp=sh
+ aring" class="pastedDriveLink-0">https://docs.google.com/document/d/1sdzoyB
+ 11s0ccTZ3fobqctDpgJmRoFcz0sviKxqczs4g/edit?usp=sharing</a>\n\n-::~:~::~:~:~
+ :~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~::~:~::-\
+ nJoin with Google Meet: https://meet.google.com/fmz-gspu-odg\nOr dial: (DE)
+  +49 40 8081617343 PIN: 948106286#\nMore phone numbers: https://tel.meet/fm
+ z-gspu-odg?pin=6273693382184&hs=7\n\nLearn more about Meet at: https://supp
+ ort.google.com/a/users/answer/9282720\n\nPlease do not edit this section.\n
+ -::~:~::~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~
+ :~:~::~:~::-
+LAST-MODIFIED:20240821T160941Z
+SEQUENCE:0
+STATUS:CONFIRMED
+SUMMARY:LLVM Loop Optimization Discussion
+TRANSP:OPAQUE
+END:VEVENT
+BEGIN:VEVENT
+DTSTART;TZID=America/New_York:20240904T110000
+DTEND;TZID=America/New_York:20240904T120000
+DTSTAMP:20240821T160951Z
+UID:58h3f0kd3aooohmeii0johh23c@google.com
+X-GOOGLE-CONFERENCE:https://meet.google.com/fmz-gspu-odg
+RECURRENCE-ID;TZID=America/New_York:20240904T110000
+CREATED:20240821T151507Z
+DESCRIPTION:LLVM Loop Optimization Discussion<br>Video call link: <a href="
+ https://meet.google.com/fmz-gspu-odg" target="_blank">https://meet.google.c
+ om/fmz-gspu-odg</a><br>Agenda/Minutes/Discussion: <a href="https://docs.goo
+ gle.com/document/d/1sdzoyB11s0ccTZ3fobqctDpgJmRoFcz0sviKxqczs4g/edit?usp=sh
+ aring" class="pastedDriveLink-0">https://docs.google.com/document/d/1sdzoyB
+ 11s0ccTZ3fobqctDpgJmRoFcz0sviKxqczs4g/edit?usp=sharing</a>\n\n-::~:~::~:~:~
+ :~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~::~:~::-\
+ nJoin with Google Meet: https://meet.google.com/fmz-gspu-odg\nOr dial: (DE)
+  +49 40 8081617343 PIN: 948106286#\nMore phone numbers: https://tel.meet/fm
+ z-gspu-odg?pin=6273693382184&hs=7\n\nLearn more about Meet at: https://supp
+ ort.google.com/a/users/answer/9282720\n\nPlease do not edit this section.\n
+ -::~:~::~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~
+ :~:~::~:~::-
+LAST-MODIFIED:20240821T160941Z
+SEQUENCE:0
+STATUS:CONFIRMED
+SUMMARY:LLVM Loop Optimization Discussion
+TRANSP:OPAQUE
+END:VEVENT
+END:VCALENDAR
diff --git a/llvm/include/llvm-c/Disassembler.h b/llvm/include/llvm-c/Disassembler.h
index b1cb35da6687..4bc6b04dd6ea 100644
--- a/llvm/include/llvm-c/Disassembler.h
+++ b/llvm/include/llvm-c/Disassembler.h
@@ -79,8 +79,10 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DC, uint64_t Options);
 #define LLVMDisassembler_Option_AsmPrinterVariant 4
 /* The option to set comment on instructions */
 #define LLVMDisassembler_Option_SetInstrComments 8
-  /* The option to print latency information alongside instructions */
+/* The option to print latency information alongside instructions */
 #define LLVMDisassembler_Option_PrintLatency 16
+/* The option to print in color */
+#define LLVMDisassembler_Option_Color 32
 
 /**
  * Dispose of a disassembler context.
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h
index ae3ceed447c4..5be05bc80c49 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h
@@ -113,8 +113,15 @@ public:
   virtual ~DGNode() = default;
   /// \Returns the number of unscheduled successors.
   unsigned getNumUnscheduledSuccs() const { return UnscheduledSuccs; }
+  void decrUnscheduledSuccs() {
+    assert(UnscheduledSuccs > 0 && "Counting error!");
+    --UnscheduledSuccs;
+  }
+  /// \Returns true if all dependent successors have been scheduled.
+  bool ready() const { return UnscheduledSuccs == 0; }
   /// \Returns true if this node has been scheduled.
   bool scheduled() const { return Scheduled; }
+  void setScheduled(bool NewVal) { Scheduled = NewVal; }
   /// \Returns true if this is before \p Other in program order.
   bool comesBefore(const DGNode *Other) { return I->comesBefore(Other->I); }
   using iterator = PredIterator;
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
new file mode 100644
index 000000000000..08972d460b40
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
@@ -0,0 +1,126 @@
+//===- Scheduler.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the bottom-up list scheduler used by the vectorizer. It is used for
+// checking the legality of vectorization and for scheduling instructions in
+// such a way that makes vectorization possible, if legal.
+//
+// The legality check is performed by `trySchedule(Instrs)`, which will try to
+// schedule the IR until all instructions in `Instrs` can be scheduled together
+// back-to-back. If this fails then it is illegal to vectorize `Instrs`.
+//
+// Internally the scheduler uses the vectorizer-specific DependencyGraph class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SCHEDULER_H
+#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SCHEDULER_H
+
+#include "llvm/SandboxIR/Instruction.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h"
+#include <queue>
+
+namespace llvm::sandboxir {
+
+class PriorityCmp {
+public:
+  bool operator()(const DGNode *N1, const DGNode *N2) {
+    // TODO: This should be a hierarchical comparator.
+    return N1->getInstruction()->comesBefore(N2->getInstruction());
+  }
+};
+
+/// The list holding nodes that are ready to schedule. Used by the scheduler.
+class ReadyListContainer {
+  PriorityCmp Cmp;
+  /// Control/Other dependencies are not modeled by the DAG to save memory.
+  /// These have to be modeled in the ready list for correctness.
+  /// This means that the list will hold back nodes that need to meet such
+  /// unmodeled dependencies.
+  std::priority_queue<DGNode *, std::vector<DGNode *>, PriorityCmp> List;
+
+public:
+  ReadyListContainer() : List(Cmp) {}
+  void insert(DGNode *N) { List.push(N); }
+  DGNode *pop() {
+    auto *Back = List.top();
+    List.pop();
+    return Back;
+  }
+  bool empty() const { return List.empty(); }
+#ifndef NDEBUG
+  void dump(raw_ostream &OS) const;
+  LLVM_DUMP_METHOD void dump() const;
+#endif // NDEBUG
+};
+
+/// The nodes that need to be scheduled back-to-back in a single scheduling
+/// cycle form a SchedBundle.
+class SchedBundle {
+public:
+  using ContainerTy = SmallVector<DGNode *, 4>;
+
+private:
+  ContainerTy Nodes;
+
+public:
+  SchedBundle() = default;
+  SchedBundle(ContainerTy &&Nodes) : Nodes(std::move(Nodes)) {}
+  using iterator = ContainerTy::iterator;
+  using const_iterator = ContainerTy::const_iterator;
+  iterator begin() { return Nodes.begin(); }
+  iterator end() { return Nodes.end(); }
+  const_iterator begin() const { return Nodes.begin(); }
+  const_iterator end() const { return Nodes.end(); }
+  /// \Returns the bundle node that comes before the others in program order.
+  DGNode *getTop() const;
+  /// \Returns the bundle node that comes after the others in program order.
+  DGNode *getBot() const;
+  /// Move all bundle instructions to \p Where back-to-back.
+  void cluster(BasicBlock::iterator Where);
+#ifndef NDEBUG
+  void dump(raw_ostream &OS) const;
+  LLVM_DUMP_METHOD void dump() const;
+#endif
+};
+
+/// The list scheduler.
+class Scheduler {
+  ReadyListContainer ReadyList;
+  DependencyGraph DAG;
+  std::optional<BasicBlock::iterator> ScheduleTopItOpt;
+  SmallVector<std::unique_ptr<SchedBundle>> Bndls;
+
+  /// \Returns a scheduling bundle containing \p Instrs.
+  SchedBundle *createBundle(ArrayRef<Instruction *> Instrs);
+  /// Schedule nodes until we can schedule \p Instrs back-to-back.
+  bool tryScheduleUntil(ArrayRef<Instruction *> Instrs);
+  /// Schedules all nodes in \p Bndl, marks them as scheduled, updates the
+  /// UnscheduledSuccs counter of all dependency predecessors, and adds any of
+  /// them that become ready to the ready list.
+  void scheduleAndUpdateReadyList(SchedBundle &Bndl);
+
+  /// Disable copies.
+  Scheduler(const Scheduler &) = delete;
+  Scheduler &operator=(const Scheduler &) = delete;
+
+public:
+  Scheduler(AAResults &AA) : DAG(AA) {}
+  ~Scheduler() {}
+
+  bool trySchedule(ArrayRef<Instruction *> Instrs);
+
+#ifndef NDEBUG
+  void dump(raw_ostream &OS) const;
+  LLVM_DUMP_METHOD void dump() const;
+#endif
+};
+
+} // namespace llvm::sandboxir
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SCHEDULER_H
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 327e7f7f8a1e..bf4c707cca06 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2849,7 +2849,7 @@ void AsmPrinter::emitJumpTableSizesSection(const MachineJumpTableInfo *MJTI,
 
   if (isElf) {
     MCSymbolELF *LinkedToSym = dyn_cast<MCSymbolELF>(CurrentFnSym);
-    int Flags = F.hasComdat() ? (unsigned)ELF::SHF_GROUP : 0;
+    int Flags = F.hasComdat() ? static_cast<int>(ELF::SHF_GROUP) : 0;
 
     JumpTableSizesSection = OutContext.getELFSection(
         sectionName, ELF::SHT_LLVM_JT_SIZES, Flags, 0, GroupName, F.hasComdat(),
diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/llvm/lib/MC/MCDisassembler/Disassembler.cpp
index 5e5a163c2902..f5d6c6bb5618 100644
--- a/llvm/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -277,6 +277,12 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
     SmallVector<char, 64> InsnStr;
     raw_svector_ostream OS(InsnStr);
     formatted_raw_ostream FormattedOS(OS);
+
+    if (DC->getOptions() & LLVMDisassembler_Option_Color) {
+      FormattedOS.enable_colors(true);
+      IP->setUseColor(true);
+    }
+
     IP->printInst(&Inst, PC, AnnotationsStr, *DC->getSubtargetInfo(),
                   FormattedOS);
 
@@ -343,5 +349,10 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
     DC->addOptions(LLVMDisassembler_Option_PrintLatency);
     Options &= ~LLVMDisassembler_Option_PrintLatency;
   }
+  if (Options & LLVMDisassembler_Option_Color) {
+    LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR);
+    DC->addOptions(LLVMDisassembler_Option_Color);
+    Options &= ~LLVMDisassembler_Option_Color;
+  }
   return (Options == 0);
 }
diff --git a/llvm/lib/Support/rpmalloc/CACHE.md b/llvm/lib/Support/rpmalloc/CACHE.md
index 645093026deb..052320baf532 100644
--- a/llvm/lib/Support/rpmalloc/CACHE.md
+++ b/llvm/lib/Support/rpmalloc/CACHE.md
@@ -1,19 +1,19 @@
-# Thread caches
-rpmalloc has a thread cache of free memory blocks which can be used in allocations without interfering with other threads or going to system to map more memory, as well as a global cache shared by all threads to let spans of memory pages flow between threads. Configuring the size of these caches can be crucial to obtaining good performance while minimizing memory overhead blowup. Below is a simple case study using the benchmark tool to compare different thread cache configurations for rpmalloc.
-
-The rpmalloc thread cache is configured to be unlimited, performance oriented as meaning default values, size oriented where both thread cache and global cache is reduced significantly, or disabled where both thread and global caches are disabled and completely free pages are directly unmapped.
-
-The benchmark is configured to run threads allocating 150000 blocks distributed in the `[16, 16000]` bytes range with a linear falloff probability. It runs 1000 loops, and every iteration 75000 blocks (50%) are freed and allocated in a scattered pattern. There are no cross thread allocations/deallocations. Parameters: `benchmark n 0 0 0 1000 150000 75000 16 16000`. The benchmarks are run on an Ubuntu 16.10 machine with 8 cores (4 physical, HT) and 12GiB RAM.
-
-The benchmark also includes results for the standard library malloc implementation as a reference for comparison with the nocache setting.
-
-![Ubuntu 16.10 random [16, 16000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=387883204&format=image)
-![Ubuntu 16.10 random [16, 16000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=1644710241&format=image)
-
-For single threaded case the unlimited cache and performance oriented cache settings have identical performance and memory overhead, indicating that the memory pages fit in the combined thread and global cache. As number of threads increase to 2-4 threads, the performance settings have slightly higher performance which can seem odd at first, but can be explained by low contention on the global cache where some memory pages can flow between threads without stalling, reducing the overall number of calls to map new memory pages (also indicated by the slightly lower memory overhead). 
-
-As threads increase even more to 5-10 threads, the increased contention and eventual limit of global cache cause the unlimited setting to gain a slight advantage in performance. As expected the memory overhead remains constant for unlimited caches, while going down for performance setting when number of threads increases.
-
-The size oriented setting maintain good performance compared to the standard library while reducing the memory overhead compared to the performance setting with a decent amount.
-
-The nocache setting still outperforms the reference standard library allocator for workloads up to 6 threads while maintaining a near zero memory overhead, which is even slightly lower than the standard library. For use case scenarios where number of allocation of each size class is lower the overhead in rpmalloc from the 64KiB span size will of course increase.
+# Thread caches
+rpmalloc has a thread cache of free memory blocks which can be used in allocations without interfering with other threads or going to system to map more memory, as well as a global cache shared by all threads to let spans of memory pages flow between threads. Configuring the size of these caches can be crucial to obtaining good performance while minimizing memory overhead blowup. Below is a simple case study using the benchmark tool to compare different thread cache configurations for rpmalloc.
+
+The rpmalloc thread cache is configured to be unlimited, performance oriented as meaning default values, size oriented where both thread cache and global cache is reduced significantly, or disabled where both thread and global caches are disabled and completely free pages are directly unmapped.
+
+The benchmark is configured to run threads allocating 150000 blocks distributed in the `[16, 16000]` bytes range with a linear falloff probability. It runs 1000 loops, and every iteration 75000 blocks (50%) are freed and allocated in a scattered pattern. There are no cross thread allocations/deallocations. Parameters: `benchmark n 0 0 0 1000 150000 75000 16 16000`. The benchmarks are run on an Ubuntu 16.10 machine with 8 cores (4 physical, HT) and 12GiB RAM.
+
+The benchmark also includes results for the standard library malloc implementation as a reference for comparison with the nocache setting.
+
+![Ubuntu 16.10 random [16, 16000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=387883204&format=image)
+![Ubuntu 16.10 random [16, 16000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=1644710241&format=image)
+
+For single threaded case the unlimited cache and performance oriented cache settings have identical performance and memory overhead, indicating that the memory pages fit in the combined thread and global cache. As number of threads increase to 2-4 threads, the performance settings have slightly higher performance which can seem odd at first, but can be explained by low contention on the global cache where some memory pages can flow between threads without stalling, reducing the overall number of calls to map new memory pages (also indicated by the slightly lower memory overhead). 
+
+As threads increase even more to 5-10 threads, the increased contention and eventual limit of global cache cause the unlimited setting to gain a slight advantage in performance. As expected the memory overhead remains constant for unlimited caches, while going down for performance setting when number of threads increases.
+
+The size oriented setting maintain good performance compared to the standard library while reducing the memory overhead compared to the performance setting with a decent amount.
+
+The nocache setting still outperforms the reference standard library allocator for workloads up to 6 threads while maintaining a near zero memory overhead, which is even slightly lower than the standard library. For use case scenarios where number of allocation of each size class is lower the overhead in rpmalloc from the 64KiB span size will of course increase.
diff --git a/llvm/lib/Support/rpmalloc/README.md b/llvm/lib/Support/rpmalloc/README.md
index 2233df9da42d..916bca0118d8 100644
--- a/llvm/lib/Support/rpmalloc/README.md
+++ b/llvm/lib/Support/rpmalloc/README.md
@@ -1,220 +1,220 @@
-# rpmalloc - General Purpose Memory Allocator
-This library provides a cross platform lock free thread caching 16-byte aligned memory allocator implemented in C.
-This is a fork of rpmalloc 1.4.5.
-
-Platforms currently supported:
-
-- Windows
-- MacOS
-- iOS
-- Linux
-- Android
-- Haiku
-
-The code should be easily portable to any platform with atomic operations and an mmap-style virtual memory management API. The API used to map/unmap memory pages can be configured in runtime to a custom implementation and mapping granularity/size.
-
-This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. Or, if you choose, you can use it under the MIT license.
-
-# Performance
-We believe rpmalloc is faster than most popular memory allocators like tcmalloc, hoard, ptmalloc3 and others without causing extra allocated memory overhead in the thread caches compared to these allocators. We also believe the implementation to be easier to read and modify compared to these allocators, as it is a single source file of ~3000 lines of C code. All allocations have a natural 16-byte alignment.
-
-Contained in a parallel repository is a benchmark utility that performs interleaved unaligned allocations and deallocations (both in-thread and cross-thread) in multiple threads. It measures number of memory operations performed per CPU second, as well as memory overhead by comparing the virtual memory mapped with the number of bytes requested in allocation calls. The setup of number of thread, cross-thread deallocation rate and allocation size limits is configured by command line arguments.
-
-https://github.com/mjansson/rpmalloc-benchmark
-
-Below is an example performance comparison chart of rpmalloc and other popular allocator implementations, with default configurations used.
-
-![Ubuntu 16.10, random [16, 8000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=301017877&format=image)
-
-The benchmark producing these numbers were run on an Ubuntu 16.10 machine with 8 logical cores (4 physical, HT). The actual numbers are not to be interpreted as absolute performance figures, but rather as relative comparisons between the different allocators. For additional benchmark results, see the [BENCHMARKS](BENCHMARKS.md) file.
-
-Configuration of the thread and global caches can be important depending on your use pattern. See [CACHE](CACHE.md) for a case study and some comments/guidelines.
-
-# Required functions
-
-Before calling any other function in the API, you __MUST__ call the initialization function, either __rpmalloc_initialize__ or __rpmalloc_initialize_config__, or you will get undefined behaviour when calling other rpmalloc entry point.
-
-Before terminating your use of the allocator, you __SHOULD__ call __rpmalloc_finalize__ in order to release caches and unmap virtual memory, as well as prepare the allocator for global scope cleanup at process exit or dynamic library unload depending on your use case.
-
-# Using
-The easiest way to use the library is simply adding __rpmalloc.[h|c]__ to your project and compile them along with your sources. This contains only the rpmalloc specific entry points and does not provide internal hooks to process and/or thread creation at the moment. You are required to call these functions from your own code in order to initialize and finalize the allocator in your process and threads:
-
-__rpmalloc_initialize__ : Call at process start to initialize the allocator
-
-__rpmalloc_initialize_config__ : Optional entry point to call at process start to initialize the allocator with a custom memory mapping backend, memory page size and mapping granularity.
-
-__rpmalloc_finalize__: Call at process exit to finalize the allocator
-
-__rpmalloc_thread_initialize__: Call at each thread start to initialize the thread local data for the allocator
-
-__rpmalloc_thread_finalize__: Call at each thread exit to finalize and release thread cache back to global cache
-
-__rpmalloc_config__: Get the current runtime configuration of the allocator
-
-Then simply use the __rpmalloc__/__rpfree__ and the other malloc style replacement functions. Remember all allocations are 16-byte aligned, so no need to call the explicit rpmemalign/rpaligned_alloc/rpposix_memalign functions unless you need greater alignment, they are simply wrappers to make it easier to replace in existing code.
-
-If you wish to override the standard library malloc family of functions and have automatic initialization/finalization of process and threads, define __ENABLE_OVERRIDE__ to non-zero which will include the `malloc.c` file in compilation of __rpmalloc.c__, and then rebuild the library or your project where you added the rpmalloc source. If you compile rpmalloc as a separate library you must make the linker use the override symbols from the library by referencing at least one symbol. The easiest way is to simply include `rpmalloc.h` in at least one source file and call `rpmalloc_linker_reference` somewhere - it's a dummy empty function. On Windows platforms and C++ overrides you have to `#include <rpnew.h>` in at least one source file and also manually handle the initialize/finalize of the process and all threads. The list of libc entry points replaced may not be complete, use libc/stdc++ replacement only as a convenience for testing the library on an existing code base, not a final solution.
-
-For explicit first class heaps, see the __rpmalloc_heap_*__ API under [first class heaps](#first-class-heaps) section, requiring __RPMALLOC_FIRST_CLASS_HEAPS__ tp be defined to 1.
-
-# Building
-To compile as a static library run the configure python script which generates a Ninja build script, then build using ninja. The ninja build produces two static libraries, one named `rpmalloc` and one named `rpmallocwrap`, where the latter includes the libc entry point overrides.
-
-The configure + ninja build also produces two shared object/dynamic libraries. The `rpmallocwrap` shared library can be used with LD_PRELOAD/DYLD_INSERT_LIBRARIES to inject in a preexisting binary, replacing any malloc/free family of function calls. This is only implemented for Linux and macOS targets. The list of libc entry points replaced may not be complete, use preloading as a convenience for testing the library on an existing binary, not a final solution. The dynamic library also provides automatic init/fini of process and threads for all platforms.
-
-The latest stable release is available in the master branch. For latest development code, use the develop branch.
-
-# Cache configuration options
-Free memory pages are cached both per thread and in a global cache for all threads. The size of the thread caches is determined by an adaptive scheme where each cache is limited by a percentage of the maximum allocation count of the corresponding size class. The size of the global caches is determined by a multiple of the maximum of all thread caches. The factors controlling the cache sizes can be set by editing the individual defines in the `rpmalloc.c` source file for fine tuned control.
-
-__ENABLE_UNLIMITED_CACHE__: By default defined to 0, set to 1 to make all caches infinite, i.e never release spans to global cache unless thread finishes and never unmap memory pages back to the OS. Highest performance but largest memory overhead.
-
-__ENABLE_UNLIMITED_GLOBAL_CACHE__: By default defined to 0, set to 1 to make global caches infinite, i.e never unmap memory pages back to the OS.
-
-__ENABLE_UNLIMITED_THREAD_CACHE__: By default defined to 0, set to 1 to make thread caches infinite, i.e never release spans to global cache unless thread finishes.
-
-__ENABLE_GLOBAL_CACHE__: By default defined to 1, enables the global cache shared between all threads. Set to 0 to disable the global cache and directly unmap pages evicted from the thread cache.
-
-__ENABLE_THREAD_CACHE__: By default defined to 1, enables the per-thread cache. Set to 0 to disable the thread cache and directly unmap pages no longer in use (also disables the global cache).
-
-__ENABLE_ADAPTIVE_THREAD_CACHE__: Introduces a simple heuristics in the thread cache size, keeping 25% of the high water mark for each span count class.
-
-# Other configuration options
-Detailed statistics are available if __ENABLE_STATISTICS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`. This will cause a slight overhead in runtime to collect statistics for each memory operation, and will also add 4 bytes overhead per allocation to track sizes.
-
-Integer safety checks on all calls are enabled if __ENABLE_VALIDATE_ARGS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`. If enabled, size arguments to the global entry points are verified not to cause integer overflows in calculations.
-
-Asserts are enabled if __ENABLE_ASSERTS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`.
-
-To include __malloc.c__ in compilation and provide overrides of standard library malloc entry points define __ENABLE_OVERRIDE__ to 1. To enable automatic initialization of finalization of process and threads in order to preload the library into executables using standard library malloc, define __ENABLE_PRELOAD__ to 1.
-
-To enable the runtime configurable memory page and span sizes, define __RPMALLOC_CONFIGURABLE__ to 1. By default, memory page size is determined by system APIs and memory span size is set to 64KiB.
-
-To enable support for first class heaps, define __RPMALLOC_FIRST_CLASS_HEAPS__ to 1. By default, the first class heap API is disabled.
-
-# Huge pages
-The allocator has support for huge/large pages on Windows, Linux and MacOS. To enable it, pass a non-zero value in the config value `enable_huge_pages` when initializing the allocator with `rpmalloc_initialize_config`. If the system does not support huge pages it will be automatically disabled. You can query the status by looking at `enable_huge_pages` in the config returned from a call to `rpmalloc_config` after initialization is done.
-
-# Quick overview
-The allocator is similar in spirit to tcmalloc from the [Google Performance Toolkit](https://github.com/gperftools/gperftools). It uses separate heaps for each thread and partitions memory blocks according to a preconfigured set of size classes, up to 2MiB. Larger blocks are mapped and unmapped directly. Allocations for different size classes will be served from different set of memory pages, each "span" of pages is dedicated to one size class. Spans of pages can flow between threads when the thread cache overflows and are released to a global cache, or when the thread ends. Unlike tcmalloc, single blocks do not flow between threads, only entire spans of pages.
-
-# Implementation details
-The allocator is based on a fixed but configurable page alignment (defaults to 64KiB) and 16 byte block alignment, where all runs of memory pages (spans) are mapped to this alignment boundary. On Windows this is automatically guaranteed up to 64KiB by the VirtualAlloc granularity, and on mmap systems it is achieved by oversizing the mapping and aligning the returned virtual memory address to the required boundaries. By aligning to a fixed size the free operation can locate the header of the memory span without having to do a table lookup (as tcmalloc does) by simply masking out the low bits of the address (for 64KiB this would be the low 16 bits).
-
-Memory blocks are divided into three categories. For 64KiB span size/alignment the small blocks are [16, 1024] bytes, medium blocks (1024, 32256] bytes, and large blocks (32256, 2097120] bytes. The three categories are further divided in size classes. If the span size is changed, the small block classes remain but medium blocks go from (1024, span size] bytes.
-
-Small blocks have a size class granularity of 16 bytes each in 64 buckets. Medium blocks have a granularity of 512 bytes, 61 buckets (default). Large blocks have the same granularity as the configured span size (default 64KiB). All allocations are fitted to these size class boundaries (an allocation of 36 bytes will allocate a block of 48 bytes). Each small and medium size class has an associated span (meaning a contiguous set of memory pages) configuration describing how many pages the size class will allocate each time the cache is empty and a new allocation is requested.
-
-Spans for small and medium blocks are cached in four levels to avoid calls to map/unmap memory pages. The first level is a per thread single active span for each size class. The second level is a per thread list of partially free spans for each size class. The third level is a per thread list of free spans. The fourth level is a global list of free spans.
-
-Each span for a small and medium size class keeps track of how many blocks are allocated/free, as well as a list of which blocks that are free for allocation. To avoid locks, each span is completely owned by the allocating thread, and all cross-thread deallocations will be deferred to the owner thread through a separate free list per span.
-
-Large blocks, or super spans, are cached in two levels. The first level is a per thread list of free super spans. The second level is a global list of free super spans.
-
-# Memory mapping
-By default the allocator uses OS APIs to map virtual memory pages as needed, either `VirtualAlloc` on Windows or `mmap` on POSIX systems. If you want to use your own custom memory mapping provider you can use __rpmalloc_initialize_config__ and pass function pointers to map and unmap virtual memory. These function should reserve and free the requested number of bytes.
-
-The returned memory address from the memory map function MUST be aligned to the memory page size and the memory span size (which ever is larger), both of which is configurable. Either provide the page and span sizes during initialization using __rpmalloc_initialize_config__, or use __rpmalloc_config__ to find the required alignment which is equal to the maximum of page and span size. The span size MUST be a power of two in [4096, 262144] range, and be a multiple or divisor of the memory page size.
-
-Memory mapping requests are always done in multiples of the memory page size. You can specify a custom page size when initializing rpmalloc with __rpmalloc_initialize_config__, or pass 0 to let rpmalloc determine the system memory page size using OS APIs. The page size MUST be a power of two.
-
-To reduce system call overhead, memory spans are mapped in batches controlled by the `span_map_count` configuration variable (which defaults to the `DEFAULT_SPAN_MAP_COUNT` value if 0, which in turn is sized according to the cache configuration define, defaulting to 64). If the memory page size is larger than the span size, the number of spans to map in a single call will be adjusted to guarantee a multiple of the page size, and the spans will be kept mapped until the entire span range can be unmapped in one call (to avoid trying to unmap partial pages).
-
-On macOS and iOS mmap requests are tagged with tag 240 for easy identification with the vmmap tool.
-
-# Span breaking
-Super spans (spans a multiple > 1 of the span size) can be subdivided into smaller spans to fulfill a need to map a new span of memory. By default the allocator will greedily grab and break any larger span from the available caches before mapping new virtual memory. However, spans can currently not be glued together to form larger super spans again. Subspans can traverse the cache and be used by different threads individually.
-
-A span that is a subspan of a larger super span can be individually decommitted to reduce physical memory pressure when the span is evicted from caches and scheduled to be unmapped. The entire original super span will keep track of the subspans it is broken up into, and when the entire range is decommitted the super span will be unmapped. This allows platforms like Windows that require the entire virtual memory range that was mapped in a call to VirtualAlloc to be unmapped in one call to VirtualFree, while still decommitting individual pages in subspans (if the page size is smaller than the span size).
-
-If you use a custom memory map/unmap function you need to take this into account by looking at the `release` parameter given to the `memory_unmap` function. It is set to 0 for decommitting individual pages and the total super span byte size for finally releasing the entire super span memory range.
-
-# Memory fragmentation
-There is no memory fragmentation by the allocator in the sense that it will not leave unallocated and unusable "holes" in the memory pages by calls to allocate and free blocks of different sizes. This is due to the fact that the memory pages allocated for each size class is split up in perfectly aligned blocks which are not reused for a request of a different size. The block freed by a call to `rpfree` will always be immediately available for an allocation request within the same size class.
-
-However, there is memory fragmentation in the meaning that a request for x bytes followed by a request of y bytes where x and y are at least one size class different in size will return blocks that are at least one memory page apart in virtual address space. Only blocks of the same size will potentially be within the same memory page span.
-
-rpmalloc keeps an "active span" and free list for each size class. This leads to back-to-back allocations will most likely be served from within the same span of memory pages (unless the span runs out of free blocks). The rpmalloc implementation will also use any "holes" in memory pages in semi-filled spans before using a completely free span.
-
-# First class heaps
-rpmalloc provides a first class heap type with explicit heap control API. Heaps are maintained with calls to __rpmalloc_heap_acquire__ and __rpmalloc_heap_release__ and allocations/frees are done with __rpmalloc_heap_alloc__ and __rpmalloc_heap_free__. See the `rpmalloc.h` documentation for the full list of functions in the heap API. The main use case of explicit heap control is to scope allocations in a heap and release everything with a single call to __rpmalloc_heap_free_all__ without having to maintain ownership of memory blocks. Note that the heap API is not thread-safe, the caller must make sure that each heap is only used in a single thread at any given time.
-
-# Producer-consumer scenario
-Compared to the some other allocators, rpmalloc does not suffer as much from a producer-consumer thread scenario where one thread allocates memory blocks and another thread frees the blocks. In some allocators the free blocks need to traverse both the thread cache of the thread doing the free operations as well as the global cache before being reused in the allocating thread. In rpmalloc the freed blocks will be reused as soon as the allocating thread needs to get new spans from the thread cache. This enables faster release of completely freed memory pages as blocks in a memory page will not be aliased between different owning threads.
-
-# Best case scenarios
-Threads that keep ownership of allocated memory blocks within the thread and free the blocks from the same thread will have optimal performance.
-
-Threads that have allocation patterns where the difference in memory usage high and low water marks fit within the thread cache thresholds in the allocator will never touch the global cache except during thread init/fini and have optimal performance. Tweaking the cache limits can be done on a per-size-class basis.
-
-# Worst case scenarios
-Since each thread cache maps spans of memory pages per size class, a thread that allocates just a few blocks of each size class (16, 32, ...) for many size classes will never fill each bucket, and thus map a lot of memory pages while only using a small fraction of the mapped memory. However, the wasted memory will always be less than 4KiB (or the configured memory page size) per size class as each span is initialized one memory page at a time. The cache for free spans will be reused by all size classes.
-
-Threads that perform a lot of allocations and deallocations in a pattern that have a large difference in high and low water marks, and that difference is larger than the thread cache size, will put a lot of contention on the global cache. What will happen is the thread cache will overflow on each low water mark causing pages to be released to the global cache, then underflow on high water mark causing pages to be re-acquired from the global cache. This can be mitigated by changing the __MAX_SPAN_CACHE_DIVISOR__ define in the source code (at the cost of higher average memory overhead).
-
-# Caveats
-VirtualAlloc has an internal granularity of 64KiB. However, mmap lacks this granularity control, and the implementation instead oversizes the memory mapping with configured span size to be able to always return a memory area with the required alignment. Since the extra memory pages are never touched this will not result in extra committed physical memory pages, but rather only increase virtual memory address space.
-
-All entry points assume the passed values are valid, for example passing an invalid pointer to free would most likely result in a segmentation fault. __The library does not try to guard against errors!__.
-
-To support global scope data doing dynamic allocation/deallocation such as C++ objects with custom constructors and destructors, the call to __rpmalloc_finalize__ will not completely terminate the allocator but rather empty all caches and put the allocator in finalization mode. Once this call has been made, the allocator is no longer thread safe and expects all remaining calls to originate from global data destruction on main thread. Any spans or heaps becoming free during this phase will be immediately unmapped to allow correct teardown of the process or dynamic library without any leaks.
-
-# Other languages
-
-[Johan Andersson](https://github.com/repi) at Embark has created a Rust wrapper available at [rpmalloc-rs](https://github.com/EmbarkStudios/rpmalloc-rs)
-
-[Stas Denisov](https://github.com/nxrighthere) has created a C# wrapper available at [Rpmalloc-CSharp](https://github.com/nxrighthere/Rpmalloc-CSharp)
-
-# License
-
-This is free and unencumbered software released into the public domain.
-
-Anyone is free to copy, modify, publish, use, compile, sell, or
-distribute this software, either in source code form or as a compiled
-binary, for any purpose, commercial or non-commercial, and by any
-means.
-
-In jurisdictions that recognize copyright laws, the author or authors
-of this software dedicate any and all copyright interest in the
-software to the public domain. We make this dedication for the benefit
-of the public at large and to the detriment of our heirs and
-successors. We intend this dedication to be an overt act of
-relinquishment in perpetuity of all present and future rights to this
-software under copyright law.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
-
-For more information, please refer to <http://unlicense.org>
-
-
-You can also use this software under the MIT license if public domain is
-not recognized in your country
-
-
-The MIT License (MIT)
-
-Copyright (c) 2017 Mattias Jansson
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
+# rpmalloc - General Purpose Memory Allocator
+This library provides a cross platform lock free thread caching 16-byte aligned memory allocator implemented in C.
+This is a fork of rpmalloc 1.4.5.
+
+Platforms currently supported:
+
+- Windows
+- MacOS
+- iOS
+- Linux
+- Android
+- Haiku
+
+The code should be easily portable to any platform with atomic operations and an mmap-style virtual memory management API. The API used to map/unmap memory pages can be configured in runtime to a custom implementation and mapping granularity/size.
+
+This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. Or, if you choose, you can use it under the MIT license.
+
+# Performance
+We believe rpmalloc is faster than most popular memory allocators like tcmalloc, hoard, ptmalloc3 and others without causing extra allocated memory overhead in the thread caches compared to these allocators. We also believe the implementation to be easier to read and modify compared to these allocators, as it is a single source file of ~3000 lines of C code. All allocations have a natural 16-byte alignment.
+
+Contained in a parallel repository is a benchmark utility that performs interleaved unaligned allocations and deallocations (both in-thread and cross-thread) in multiple threads. It measures number of memory operations performed per CPU second, as well as memory overhead by comparing the virtual memory mapped with the number of bytes requested in allocation calls. The setup of number of thread, cross-thread deallocation rate and allocation size limits is configured by command line arguments.
+
+https://github.com/mjansson/rpmalloc-benchmark
+
+Below is an example performance comparison chart of rpmalloc and other popular allocator implementations, with default configurations used.
+
+![Ubuntu 16.10, random [16, 8000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=301017877&format=image)
+
+The benchmark producing these numbers were run on an Ubuntu 16.10 machine with 8 logical cores (4 physical, HT). The actual numbers are not to be interpreted as absolute performance figures, but rather as relative comparisons between the different allocators. For additional benchmark results, see the [BENCHMARKS](BENCHMARKS.md) file.
+
+Configuration of the thread and global caches can be important depending on your use pattern. See [CACHE](CACHE.md) for a case study and some comments/guidelines.
+
+# Required functions
+
+Before calling any other function in the API, you __MUST__ call the initialization function, either __rpmalloc_initialize__ or __rpmalloc_initialize_config__, or you will get undefined behaviour when calling other rpmalloc entry point.
+
+Before terminating your use of the allocator, you __SHOULD__ call __rpmalloc_finalize__ in order to release caches and unmap virtual memory, as well as prepare the allocator for global scope cleanup at process exit or dynamic library unload depending on your use case.
+
+# Using
+The easiest way to use the library is simply adding __rpmalloc.[h|c]__ to your project and compile them along with your sources. This contains only the rpmalloc specific entry points and does not provide internal hooks to process and/or thread creation at the moment. You are required to call these functions from your own code in order to initialize and finalize the allocator in your process and threads:
+
+__rpmalloc_initialize__ : Call at process start to initialize the allocator
+
+__rpmalloc_initialize_config__ : Optional entry point to call at process start to initialize the allocator with a custom memory mapping backend, memory page size and mapping granularity.
+
+__rpmalloc_finalize__: Call at process exit to finalize the allocator
+
+__rpmalloc_thread_initialize__: Call at each thread start to initialize the thread local data for the allocator
+
+__rpmalloc_thread_finalize__: Call at each thread exit to finalize and release thread cache back to global cache
+
+__rpmalloc_config__: Get the current runtime configuration of the allocator
+
+Then simply use the __rpmalloc__/__rpfree__ and the other malloc style replacement functions. Remember all allocations are 16-byte aligned, so no need to call the explicit rpmemalign/rpaligned_alloc/rpposix_memalign functions unless you need greater alignment, they are simply wrappers to make it easier to replace in existing code.
+
+If you wish to override the standard library malloc family of functions and have automatic initialization/finalization of process and threads, define __ENABLE_OVERRIDE__ to non-zero which will include the `malloc.c` file in compilation of __rpmalloc.c__, and then rebuild the library or your project where you added the rpmalloc source. If you compile rpmalloc as a separate library you must make the linker use the override symbols from the library by referencing at least one symbol. The easiest way is to simply include `rpmalloc.h` in at least one source file and call `rpmalloc_linker_reference` somewhere - it's a dummy empty function. On Windows platforms and C++ overrides you have to `#include <rpnew.h>` in at least one source file and also manually handle the initialize/finalize of the process and all threads. The list of libc entry points replaced may not be complete, use libc/stdc++ replacement only as a convenience for testing the library on an existing code base, not a final solution.
+
+For explicit first class heaps, see the __rpmalloc_heap_*__ API under [first class heaps](#first-class-heaps) section, requiring __RPMALLOC_FIRST_CLASS_HEAPS__ tp be defined to 1.
+
+# Building
+To compile as a static library run the configure python script which generates a Ninja build script, then build using ninja. The ninja build produces two static libraries, one named `rpmalloc` and one named `rpmallocwrap`, where the latter includes the libc entry point overrides.
+
+The configure + ninja build also produces two shared object/dynamic libraries. The `rpmallocwrap` shared library can be used with LD_PRELOAD/DYLD_INSERT_LIBRARIES to inject in a preexisting binary, replacing any malloc/free family of function calls. This is only implemented for Linux and macOS targets. The list of libc entry points replaced may not be complete, use preloading as a convenience for testing the library on an existing binary, not a final solution. The dynamic library also provides automatic init/fini of process and threads for all platforms.
+
+The latest stable release is available in the master branch. For latest development code, use the develop branch.
+
+# Cache configuration options
+Free memory pages are cached both per thread and in a global cache for all threads. The size of the thread caches is determined by an adaptive scheme where each cache is limited by a percentage of the maximum allocation count of the corresponding size class. The size of the global caches is determined by a multiple of the maximum of all thread caches. The factors controlling the cache sizes can be set by editing the individual defines in the `rpmalloc.c` source file for fine tuned control.
+
+__ENABLE_UNLIMITED_CACHE__: By default defined to 0, set to 1 to make all caches infinite, i.e never release spans to global cache unless thread finishes and never unmap memory pages back to the OS. Highest performance but largest memory overhead.
+
+__ENABLE_UNLIMITED_GLOBAL_CACHE__: By default defined to 0, set to 1 to make global caches infinite, i.e never unmap memory pages back to the OS.
+
+__ENABLE_UNLIMITED_THREAD_CACHE__: By default defined to 0, set to 1 to make thread caches infinite, i.e never release spans to global cache unless thread finishes.
+
+__ENABLE_GLOBAL_CACHE__: By default defined to 1, enables the global cache shared between all threads. Set to 0 to disable the global cache and directly unmap pages evicted from the thread cache.
+
+__ENABLE_THREAD_CACHE__: By default defined to 1, enables the per-thread cache. Set to 0 to disable the thread cache and directly unmap pages no longer in use (also disables the global cache).
+
+__ENABLE_ADAPTIVE_THREAD_CACHE__: Introduces a simple heuristics in the thread cache size, keeping 25% of the high water mark for each span count class.
+
+# Other configuration options
+Detailed statistics are available if __ENABLE_STATISTICS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`. This will cause a slight overhead in runtime to collect statistics for each memory operation, and will also add 4 bytes overhead per allocation to track sizes.
+
+Integer safety checks on all calls are enabled if __ENABLE_VALIDATE_ARGS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`. If enabled, size arguments to the global entry points are verified not to cause integer overflows in calculations.
+
+Asserts are enabled if __ENABLE_ASSERTS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`.
+
+To include __malloc.c__ in compilation and provide overrides of standard library malloc entry points define __ENABLE_OVERRIDE__ to 1. To enable automatic initialization of finalization of process and threads in order to preload the library into executables using standard library malloc, define __ENABLE_PRELOAD__ to 1.
+
+To enable the runtime configurable memory page and span sizes, define __RPMALLOC_CONFIGURABLE__ to 1. By default, memory page size is determined by system APIs and memory span size is set to 64KiB.
+
+To enable support for first class heaps, define __RPMALLOC_FIRST_CLASS_HEAPS__ to 1. By default, the first class heap API is disabled.
+
+# Huge pages
+The allocator has support for huge/large pages on Windows, Linux and MacOS. To enable it, pass a non-zero value in the config value `enable_huge_pages` when initializing the allocator with `rpmalloc_initialize_config`. If the system does not support huge pages it will be automatically disabled. You can query the status by looking at `enable_huge_pages` in the config returned from a call to `rpmalloc_config` after initialization is done.
+
+# Quick overview
+The allocator is similar in spirit to tcmalloc from the [Google Performance Toolkit](https://github.com/gperftools/gperftools). It uses separate heaps for each thread and partitions memory blocks according to a preconfigured set of size classes, up to 2MiB. Larger blocks are mapped and unmapped directly. Allocations for different size classes will be served from different set of memory pages, each "span" of pages is dedicated to one size class. Spans of pages can flow between threads when the thread cache overflows and are released to a global cache, or when the thread ends. Unlike tcmalloc, single blocks do not flow between threads, only entire spans of pages.
+
+# Implementation details
+The allocator is based on a fixed but configurable page alignment (defaults to 64KiB) and 16 byte block alignment, where all runs of memory pages (spans) are mapped to this alignment boundary. On Windows this is automatically guaranteed up to 64KiB by the VirtualAlloc granularity, and on mmap systems it is achieved by oversizing the mapping and aligning the returned virtual memory address to the required boundaries. By aligning to a fixed size the free operation can locate the header of the memory span without having to do a table lookup (as tcmalloc does) by simply masking out the low bits of the address (for 64KiB this would be the low 16 bits).
+
+Memory blocks are divided into three categories. For 64KiB span size/alignment the small blocks are [16, 1024] bytes, medium blocks (1024, 32256] bytes, and large blocks (32256, 2097120] bytes. The three categories are further divided in size classes. If the span size is changed, the small block classes remain but medium blocks go from (1024, span size] bytes.
+
+Small blocks have a size class granularity of 16 bytes each in 64 buckets. Medium blocks have a granularity of 512 bytes, 61 buckets (default). Large blocks have the same granularity as the configured span size (default 64KiB). All allocations are fitted to these size class boundaries (an allocation of 36 bytes will allocate a block of 48 bytes). Each small and medium size class has an associated span (meaning a contiguous set of memory pages) configuration describing how many pages the size class will allocate each time the cache is empty and a new allocation is requested.
+
+Spans for small and medium blocks are cached in four levels to avoid calls to map/unmap memory pages. The first level is a per thread single active span for each size class. The second level is a per thread list of partially free spans for each size class. The third level is a per thread list of free spans. The fourth level is a global list of free spans.
+
+Each span for a small and medium size class keeps track of how many blocks are allocated/free, as well as a list of which blocks that are free for allocation. To avoid locks, each span is completely owned by the allocating thread, and all cross-thread deallocations will be deferred to the owner thread through a separate free list per span.
+
+Large blocks, or super spans, are cached in two levels. The first level is a per thread list of free super spans. The second level is a global list of free super spans.
+
+# Memory mapping
+By default the allocator uses OS APIs to map virtual memory pages as needed, either `VirtualAlloc` on Windows or `mmap` on POSIX systems. If you want to use your own custom memory mapping provider you can use __rpmalloc_initialize_config__ and pass function pointers to map and unmap virtual memory. These function should reserve and free the requested number of bytes.
+
+The returned memory address from the memory map function MUST be aligned to the memory page size and the memory span size (which ever is larger), both of which is configurable. Either provide the page and span sizes during initialization using __rpmalloc_initialize_config__, or use __rpmalloc_config__ to find the required alignment which is equal to the maximum of page and span size. The span size MUST be a power of two in [4096, 262144] range, and be a multiple or divisor of the memory page size.
+
+Memory mapping requests are always done in multiples of the memory page size. You can specify a custom page size when initializing rpmalloc with __rpmalloc_initialize_config__, or pass 0 to let rpmalloc determine the system memory page size using OS APIs. The page size MUST be a power of two.
+
+To reduce system call overhead, memory spans are mapped in batches controlled by the `span_map_count` configuration variable (which defaults to the `DEFAULT_SPAN_MAP_COUNT` value if 0, which in turn is sized according to the cache configuration define, defaulting to 64). If the memory page size is larger than the span size, the number of spans to map in a single call will be adjusted to guarantee a multiple of the page size, and the spans will be kept mapped until the entire span range can be unmapped in one call (to avoid trying to unmap partial pages).
+
+On macOS and iOS mmap requests are tagged with tag 240 for easy identification with the vmmap tool.
+
+# Span breaking
+Super spans (spans a multiple > 1 of the span size) can be subdivided into smaller spans to fulfill a need to map a new span of memory. By default the allocator will greedily grab and break any larger span from the available caches before mapping new virtual memory. However, spans can currently not be glued together to form larger super spans again. Subspans can traverse the cache and be used by different threads individually.
+
+A span that is a subspan of a larger super span can be individually decommitted to reduce physical memory pressure when the span is evicted from caches and scheduled to be unmapped. The entire original super span will keep track of the subspans it is broken up into, and when the entire range is decommitted the super span will be unmapped. This allows platforms like Windows that require the entire virtual memory range that was mapped in a call to VirtualAlloc to be unmapped in one call to VirtualFree, while still decommitting individual pages in subspans (if the page size is smaller than the span size).
+
+If you use a custom memory map/unmap function you need to take this into account by looking at the `release` parameter given to the `memory_unmap` function. It is set to 0 for decommitting individual pages and the total super span byte size for finally releasing the entire super span memory range.
+
+# Memory fragmentation
+There is no memory fragmentation by the allocator in the sense that it will not leave unallocated and unusable "holes" in the memory pages by calls to allocate and free blocks of different sizes. This is due to the fact that the memory pages allocated for each size class is split up in perfectly aligned blocks which are not reused for a request of a different size. The block freed by a call to `rpfree` will always be immediately available for an allocation request within the same size class.
+
+However, there is memory fragmentation in the meaning that a request for x bytes followed by a request of y bytes where x and y are at least one size class different in size will return blocks that are at least one memory page apart in virtual address space. Only blocks of the same size will potentially be within the same memory page span.
+
+rpmalloc keeps an "active span" and free list for each size class. This leads to back-to-back allocations will most likely be served from within the same span of memory pages (unless the span runs out of free blocks). The rpmalloc implementation will also use any "holes" in memory pages in semi-filled spans before using a completely free span.
+
+# First class heaps
+rpmalloc provides a first class heap type with explicit heap control API. Heaps are maintained with calls to __rpmalloc_heap_acquire__ and __rpmalloc_heap_release__ and allocations/frees are done with __rpmalloc_heap_alloc__ and __rpmalloc_heap_free__. See the `rpmalloc.h` documentation for the full list of functions in the heap API. The main use case of explicit heap control is to scope allocations in a heap and release everything with a single call to __rpmalloc_heap_free_all__ without having to maintain ownership of memory blocks. Note that the heap API is not thread-safe, the caller must make sure that each heap is only used in a single thread at any given time.
+
+# Producer-consumer scenario
+Compared to the some other allocators, rpmalloc does not suffer as much from a producer-consumer thread scenario where one thread allocates memory blocks and another thread frees the blocks. In some allocators the free blocks need to traverse both the thread cache of the thread doing the free operations as well as the global cache before being reused in the allocating thread. In rpmalloc the freed blocks will be reused as soon as the allocating thread needs to get new spans from the thread cache. This enables faster release of completely freed memory pages as blocks in a memory page will not be aliased between different owning threads.
+
+# Best case scenarios
+Threads that keep ownership of allocated memory blocks within the thread and free the blocks from the same thread will have optimal performance.
+
+Threads that have allocation patterns where the difference in memory usage high and low water marks fit within the thread cache thresholds in the allocator will never touch the global cache except during thread init/fini and have optimal performance. Tweaking the cache limits can be done on a per-size-class basis.
+
+# Worst case scenarios
+Since each thread cache maps spans of memory pages per size class, a thread that allocates just a few blocks of each size class (16, 32, ...) for many size classes will never fill each bucket, and thus map a lot of memory pages while only using a small fraction of the mapped memory. However, the wasted memory will always be less than 4KiB (or the configured memory page size) per size class as each span is initialized one memory page at a time. The cache for free spans will be reused by all size classes.
+
+Threads that perform a lot of allocations and deallocations in a pattern that have a large difference in high and low water marks, and that difference is larger than the thread cache size, will put a lot of contention on the global cache. What will happen is the thread cache will overflow on each low water mark causing pages to be released to the global cache, then underflow on high water mark causing pages to be re-acquired from the global cache. This can be mitigated by changing the __MAX_SPAN_CACHE_DIVISOR__ define in the source code (at the cost of higher average memory overhead).
+
+# Caveats
+VirtualAlloc has an internal granularity of 64KiB. However, mmap lacks this granularity control, and the implementation instead oversizes the memory mapping with configured span size to be able to always return a memory area with the required alignment. Since the extra memory pages are never touched this will not result in extra committed physical memory pages, but rather only increase virtual memory address space.
+
+All entry points assume the passed values are valid, for example passing an invalid pointer to free would most likely result in a segmentation fault. __The library does not try to guard against errors!__.
+
+To support global scope data doing dynamic allocation/deallocation such as C++ objects with custom constructors and destructors, the call to __rpmalloc_finalize__ will not completely terminate the allocator but rather empty all caches and put the allocator in finalization mode. Once this call has been made, the allocator is no longer thread safe and expects all remaining calls to originate from global data destruction on main thread. Any spans or heaps becoming free during this phase will be immediately unmapped to allow correct teardown of the process or dynamic library without any leaks.
+
+# Other languages
+
+[Johan Andersson](https://github.com/repi) at Embark has created a Rust wrapper available at [rpmalloc-rs](https://github.com/EmbarkStudios/rpmalloc-rs)
+
+[Stas Denisov](https://github.com/nxrighthere) has created a C# wrapper available at [Rpmalloc-CSharp](https://github.com/nxrighthere/Rpmalloc-CSharp)
+
+# License
+
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org>
+
+
+You can also use this software under the MIT license if public domain is
+not recognized in your country
+
+
+The MIT License (MIT)
+
+Copyright (c) 2017 Mattias Jansson
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/llvm/lib/Support/rpmalloc/malloc.c b/llvm/lib/Support/rpmalloc/malloc.c
index 59e13aab3ef7..3fcfe848250c 100644
--- a/llvm/lib/Support/rpmalloc/malloc.c
+++ b/llvm/lib/Support/rpmalloc/malloc.c
@@ -1,724 +1,724 @@
-//===------------------------ malloc.c ------------------*- C -*-=============//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This library provides a cross-platform lock free thread caching malloc
-// implementation in C11.
-//
-//
-// This file provides overrides for the standard library malloc entry points for
-// C and new/delete operators for C++ It also provides automatic
-// initialization/finalization of process and threads
-//
-//===----------------------------------------------------------------------===//
-
-#if defined(__TINYC__)
-#include <sys/types.h>
-#endif
-
-#ifndef ARCH_64BIT
-#if defined(__LLP64__) || defined(__LP64__) || defined(_WIN64)
-#define ARCH_64BIT 1
-_Static_assert(sizeof(size_t) == 8, "Data type size mismatch");
-_Static_assert(sizeof(void *) == 8, "Data type size mismatch");
-#else
-#define ARCH_64BIT 0
-_Static_assert(sizeof(size_t) == 4, "Data type size mismatch");
-_Static_assert(sizeof(void *) == 4, "Data type size mismatch");
-#endif
-#endif
-
-#if (defined(__GNUC__) || defined(__clang__))
-#pragma GCC visibility push(default)
-#endif
-
-#define USE_IMPLEMENT 1
-#define USE_INTERPOSE 0
-#define USE_ALIAS 0
-
-#if defined(__APPLE__)
-#undef USE_INTERPOSE
-#define USE_INTERPOSE 1
-
-typedef struct interpose_t {
-  void *new_func;
-  void *orig_func;
-} interpose_t;
-
-#define MAC_INTERPOSE_PAIR(newf, oldf) {(void *)newf, (void *)oldf}
-#define MAC_INTERPOSE_SINGLE(newf, oldf)                                       \
-  __attribute__((used)) static const interpose_t macinterpose##newf##oldf      \
-      __attribute__((section("__DATA, __interpose"))) =                        \
-          MAC_INTERPOSE_PAIR(newf, oldf)
-
-#endif
-
-#if !defined(_WIN32) && !defined(__APPLE__)
-#undef USE_IMPLEMENT
-#undef USE_ALIAS
-#define USE_IMPLEMENT 0
-#define USE_ALIAS 1
-#endif
-
-#ifdef _MSC_VER
-#pragma warning(disable : 4100)
-#undef malloc
-#undef free
-#undef calloc
-#define RPMALLOC_RESTRICT __declspec(restrict)
-#else
-#define RPMALLOC_RESTRICT
-#endif
-
-#if ENABLE_OVERRIDE
-
-typedef struct rp_nothrow_t {
-  int __dummy;
-} rp_nothrow_t;
-
-#if USE_IMPLEMENT
-
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL malloc(size_t size) {
-  return rpmalloc(size);
-}
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL calloc(size_t count,
-                                                            size_t size) {
-  return rpcalloc(count, size);
-}
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL realloc(void *ptr,
-                                                             size_t size) {
-  return rprealloc(ptr, size);
-}
-extern inline void *RPMALLOC_CDECL reallocf(void *ptr, size_t size) {
-  return rprealloc(ptr, size);
-}
-extern inline void *RPMALLOC_CDECL aligned_alloc(size_t alignment,
-                                                 size_t size) {
-  return rpaligned_alloc(alignment, size);
-}
-extern inline void *RPMALLOC_CDECL memalign(size_t alignment, size_t size) {
-  return rpmemalign(alignment, size);
-}
-extern inline int RPMALLOC_CDECL posix_memalign(void **memptr, size_t alignment,
-                                                size_t size) {
-  return rpposix_memalign(memptr, alignment, size);
-}
-extern inline void RPMALLOC_CDECL free(void *ptr) { rpfree(ptr); }
-extern inline void RPMALLOC_CDECL cfree(void *ptr) { rpfree(ptr); }
-extern inline size_t RPMALLOC_CDECL malloc_usable_size(void *ptr) {
-  return rpmalloc_usable_size(ptr);
-}
-extern inline size_t RPMALLOC_CDECL malloc_size(void *ptr) {
-  return rpmalloc_usable_size(ptr);
-}
-
-#ifdef _WIN32
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL _malloc_base(size_t size) {
-  return rpmalloc(size);
-}
-extern inline void RPMALLOC_CDECL _free_base(void *ptr) { rpfree(ptr); }
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL _calloc_base(size_t count,
-                                                                  size_t size) {
-  return rpcalloc(count, size);
-}
-extern inline size_t RPMALLOC_CDECL _msize(void *ptr) {
-  return rpmalloc_usable_size(ptr);
-}
-extern inline size_t RPMALLOC_CDECL _msize_base(void *ptr) {
-  return rpmalloc_usable_size(ptr);
-}
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL
-_realloc_base(void *ptr, size_t size) {
-  return rprealloc(ptr, size);
-}
-#endif
-
-#ifdef _WIN32
-// For Windows, #include <rpnew.h> in one source file to get the C++ operator
-// overrides implemented in your module
-#else
-// Overload the C++ operators using the mangled names
-// (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling) operators
-// delete and delete[]
-#define RPDEFVIS __attribute__((visibility("default")))
-extern void _ZdlPv(void *p);
-void RPDEFVIS _ZdlPv(void *p) { rpfree(p); }
-extern void _ZdaPv(void *p);
-void RPDEFVIS _ZdaPv(void *p) { rpfree(p); }
-#if ARCH_64BIT
-// 64-bit operators new and new[], normal and aligned
-extern void *_Znwm(uint64_t size);
-void *RPDEFVIS _Znwm(uint64_t size) { return rpmalloc(size); }
-extern void *_Znam(uint64_t size);
-void *RPDEFVIS _Znam(uint64_t size) { return rpmalloc(size); }
-extern void *_Znwmm(uint64_t size, uint64_t align);
-void *RPDEFVIS _Znwmm(uint64_t size, uint64_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_Znamm(uint64_t size, uint64_t align);
-void *RPDEFVIS _Znamm(uint64_t size, uint64_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnwmSt11align_val_t(uint64_t size, uint64_t align);
-void *RPDEFVIS _ZnwmSt11align_val_t(uint64_t size, uint64_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnamSt11align_val_t(uint64_t size, uint64_t align);
-void *RPDEFVIS _ZnamSt11align_val_t(uint64_t size, uint64_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t);
-void *RPDEFVIS _ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpmalloc(size);
-}
-extern void *_ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t);
-void *RPDEFVIS _ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpmalloc(size);
-}
-extern void *_ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
-                                                rp_nothrow_t t);
-void *RPDEFVIS _ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
-                                                  rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
-                                                rp_nothrow_t t);
-void *RPDEFVIS _ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
-                                                  rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpaligned_alloc(align, size);
-}
-// 64-bit operators sized delete and delete[], normal and aligned
-extern void _ZdlPvm(void *p, uint64_t size);
-void RPDEFVIS _ZdlPvm(void *p, uint64_t size) {
-  rpfree(p);
-  (void)sizeof(size);
-}
-extern void _ZdaPvm(void *p, uint64_t size);
-void RPDEFVIS _ZdaPvm(void *p, uint64_t size) {
-  rpfree(p);
-  (void)sizeof(size);
-}
-extern void _ZdlPvSt11align_val_t(void *p, uint64_t align);
-void RPDEFVIS _ZdlPvSt11align_val_t(void *p, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(align);
-}
-extern void _ZdaPvSt11align_val_t(void *p, uint64_t align);
-void RPDEFVIS _ZdaPvSt11align_val_t(void *p, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(align);
-}
-extern void _ZdlPvmSt11align_val_t(void *p, uint64_t size, uint64_t align);
-void RPDEFVIS _ZdlPvmSt11align_val_t(void *p, uint64_t size, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(size);
-  (void)sizeof(align);
-}
-extern void _ZdaPvmSt11align_val_t(void *p, uint64_t size, uint64_t align);
-void RPDEFVIS _ZdaPvmSt11align_val_t(void *p, uint64_t size, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(size);
-  (void)sizeof(align);
-}
-#else
-// 32-bit operators new and new[], normal and aligned
-extern void *_Znwj(uint32_t size);
-void *RPDEFVIS _Znwj(uint32_t size) { return rpmalloc(size); }
-extern void *_Znaj(uint32_t size);
-void *RPDEFVIS _Znaj(uint32_t size) { return rpmalloc(size); }
-extern void *_Znwjj(uint32_t size, uint32_t align);
-void *RPDEFVIS _Znwjj(uint32_t size, uint32_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_Znajj(uint32_t size, uint32_t align);
-void *RPDEFVIS _Znajj(uint32_t size, uint32_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnwjSt11align_val_t(size_t size, size_t align);
-void *RPDEFVIS _ZnwjSt11align_val_t(size_t size, size_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnajSt11align_val_t(size_t size, size_t align);
-void *RPDEFVIS _ZnajSt11align_val_t(size_t size, size_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t);
-void *RPDEFVIS _ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpmalloc(size);
-}
-extern void *_ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t);
-void *RPDEFVIS _ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpmalloc(size);
-}
-extern void *_ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
-                                                rp_nothrow_t t);
-void *RPDEFVIS _ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
-                                                  rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
-                                                rp_nothrow_t t);
-void *RPDEFVIS _ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
-                                                  rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpaligned_alloc(align, size);
-}
-// 32-bit operators sized delete and delete[], normal and aligned
-extern void _ZdlPvj(void *p, uint64_t size);
-void RPDEFVIS _ZdlPvj(void *p, uint64_t size) {
-  rpfree(p);
-  (void)sizeof(size);
-}
-extern void _ZdaPvj(void *p, uint64_t size);
-void RPDEFVIS _ZdaPvj(void *p, uint64_t size) {
-  rpfree(p);
-  (void)sizeof(size);
-}
-extern void _ZdlPvSt11align_val_t(void *p, uint32_t align);
-void RPDEFVIS _ZdlPvSt11align_val_t(void *p, uint64_t a) {
-  rpfree(p);
-  (void)sizeof(align);
-}
-extern void _ZdaPvSt11align_val_t(void *p, uint32_t align);
-void RPDEFVIS _ZdaPvSt11align_val_t(void *p, uint64_t a) {
-  rpfree(p);
-  (void)sizeof(align);
-}
-extern void _ZdlPvjSt11align_val_t(void *p, uint32_t size, uint32_t align);
-void RPDEFVIS _ZdlPvjSt11align_val_t(void *p, uint64_t size, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(size);
-  (void)sizeof(a);
-}
-extern void _ZdaPvjSt11align_val_t(void *p, uint32_t size, uint32_t align);
-void RPDEFVIS _ZdaPvjSt11align_val_t(void *p, uint64_t size, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(size);
-  (void)sizeof(a);
-}
-#endif
-#endif
-#endif
-
-#if USE_INTERPOSE || USE_ALIAS
-
-static void *rpmalloc_nothrow(size_t size, rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpmalloc(size);
-}
-static void *rpaligned_alloc_reverse(size_t size, size_t align) {
-  return rpaligned_alloc(align, size);
-}
-static void *rpaligned_alloc_reverse_nothrow(size_t size, size_t align,
-                                             rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpaligned_alloc(align, size);
-}
-static void rpfree_size(void *p, size_t size) {
-  (void)sizeof(size);
-  rpfree(p);
-}
-static void rpfree_aligned(void *p, size_t align) {
-  (void)sizeof(align);
-  rpfree(p);
-}
-static void rpfree_size_aligned(void *p, size_t size, size_t align) {
-  (void)sizeof(size);
-  (void)sizeof(align);
-  rpfree(p);
-}
-
-#endif
-
-#if USE_INTERPOSE
-
-__attribute__((used)) static const interpose_t macinterpose_malloc[]
-    __attribute__((section("__DATA, __interpose"))) = {
-        // new and new[]
-        MAC_INTERPOSE_PAIR(rpmalloc, _Znwm),
-        MAC_INTERPOSE_PAIR(rpmalloc, _Znam),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znwmm),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znamm),
-        MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnwmRKSt9nothrow_t),
-        MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnamRKSt9nothrow_t),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnwmSt11align_val_t),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnamSt11align_val_t),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow,
-                           _ZnwmSt11align_val_tRKSt9nothrow_t),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow,
-                           _ZnamSt11align_val_tRKSt9nothrow_t),
-        // delete and delete[]
-        MAC_INTERPOSE_PAIR(rpfree, _ZdlPv), MAC_INTERPOSE_PAIR(rpfree, _ZdaPv),
-        MAC_INTERPOSE_PAIR(rpfree_size, _ZdlPvm),
-        MAC_INTERPOSE_PAIR(rpfree_size, _ZdaPvm),
-        MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdlPvSt11align_val_t),
-        MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdaPvSt11align_val_t),
-        MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdlPvmSt11align_val_t),
-        MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdaPvmSt11align_val_t),
-        // libc entry points
-        MAC_INTERPOSE_PAIR(rpmalloc, malloc),
-        MAC_INTERPOSE_PAIR(rpmalloc, calloc),
-        MAC_INTERPOSE_PAIR(rprealloc, realloc),
-        MAC_INTERPOSE_PAIR(rprealloc, reallocf),
-#if defined(__MAC_10_15) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_15
-        MAC_INTERPOSE_PAIR(rpaligned_alloc, aligned_alloc),
-#endif
-        MAC_INTERPOSE_PAIR(rpmemalign, memalign),
-        MAC_INTERPOSE_PAIR(rpposix_memalign, posix_memalign),
-        MAC_INTERPOSE_PAIR(rpfree, free), MAC_INTERPOSE_PAIR(rpfree, cfree),
-        MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_usable_size),
-        MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_size)};
-
-#endif
-
-#if USE_ALIAS
-
-#define RPALIAS(fn) __attribute__((alias(#fn), used, visibility("default")));
-
-// Alias the C++ operators using the mangled names
-// (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
-
-// operators delete and delete[]
-void _ZdlPv(void *p) RPALIAS(rpfree) void _ZdaPv(void *p) RPALIAS(rpfree)
-
-#if ARCH_64BIT
-    // 64-bit operators new and new[], normal and aligned
-    void *_Znwm(uint64_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
-        RPALIAS(rpmalloc) void *_Znam(uint64_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1) RPALIAS(rpmalloc) void *_Znwmm(uint64_t size,
-                                                                 uint64_t align)
-        RPALIAS(rpaligned_alloc_reverse) void *_Znamm(uint64_t size,
-                                                      uint64_t align)
-            RPALIAS(rpaligned_alloc_reverse) void *_ZnwmSt11align_val_t(
-                size_t size, size_t align)
-                RPALIAS(rpaligned_alloc_reverse) void *_ZnamSt11align_val_t(
-                    size_t size, size_t align)
-                    RPALIAS(rpaligned_alloc_reverse) void *_ZnwmRKSt9nothrow_t(
-                        size_t size, rp_nothrow_t t)
-                        RPALIAS(rpmalloc_nothrow) void *_ZnamRKSt9nothrow_t(
-                            size_t size,
-                            rp_nothrow_t t) RPALIAS(rpmalloc_nothrow) void
-                            *_ZnwmSt11align_val_tRKSt9nothrow_t(size_t size,
-                                                                size_t align,
-                                                                rp_nothrow_t t)
-                                RPALIAS(rpaligned_alloc_reverse_nothrow) void
-                                    *_ZnamSt11align_val_tRKSt9nothrow_t(
-                                        size_t size, size_t align,
-                                        rp_nothrow_t t)
-                                        RPALIAS(rpaligned_alloc_reverse_nothrow)
-    // 64-bit operators delete and delete[], sized and aligned
-    void _ZdlPvm(void *p, size_t n) RPALIAS(rpfree_size) void _ZdaPvm(void *p,
-                                                                      size_t n)
-        RPALIAS(rpfree_size) void _ZdlPvSt11align_val_t(void *p, size_t a)
-            RPALIAS(rpfree_aligned) void _ZdaPvSt11align_val_t(void *p,
-                                                               size_t a)
-                RPALIAS(rpfree_aligned) void _ZdlPvmSt11align_val_t(void *p,
-                                                                    size_t n,
-                                                                    size_t a)
-                    RPALIAS(rpfree_size_aligned) void _ZdaPvmSt11align_val_t(
-                        void *p, size_t n, size_t a)
-                        RPALIAS(rpfree_size_aligned)
-#else
-    // 32-bit operators new and new[], normal and aligned
-    void *_Znwj(uint32_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
-        RPALIAS(rpmalloc) void *_Znaj(uint32_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1) RPALIAS(rpmalloc) void *_Znwjj(uint32_t size,
-                                                                 uint32_t align)
-        RPALIAS(rpaligned_alloc_reverse) void *_Znajj(uint32_t size,
-                                                      uint32_t align)
-            RPALIAS(rpaligned_alloc_reverse) void *_ZnwjSt11align_val_t(
-                size_t size, size_t align)
-                RPALIAS(rpaligned_alloc_reverse) void *_ZnajSt11align_val_t(
-                    size_t size, size_t align)
-                    RPALIAS(rpaligned_alloc_reverse) void *_ZnwjRKSt9nothrow_t(
-                        size_t size, rp_nothrow_t t)
-                        RPALIAS(rpmalloc_nothrow) void *_ZnajRKSt9nothrow_t(
-                            size_t size,
-                            rp_nothrow_t t) RPALIAS(rpmalloc_nothrow) void
-                            *_ZnwjSt11align_val_tRKSt9nothrow_t(size_t size,
-                                                                size_t align,
-                                                                rp_nothrow_t t)
-                                RPALIAS(rpaligned_alloc_reverse_nothrow) void
-                                    *_ZnajSt11align_val_tRKSt9nothrow_t(
-                                        size_t size, size_t align,
-                                        rp_nothrow_t t)
-                                        RPALIAS(rpaligned_alloc_reverse_nothrow)
-    // 32-bit operators delete and delete[], sized and aligned
-    void _ZdlPvj(void *p, size_t n) RPALIAS(rpfree_size) void _ZdaPvj(void *p,
-                                                                      size_t n)
-        RPALIAS(rpfree_size) void _ZdlPvSt11align_val_t(void *p, size_t a)
-            RPALIAS(rpfree_aligned) void _ZdaPvSt11align_val_t(void *p,
-                                                               size_t a)
-                RPALIAS(rpfree_aligned) void _ZdlPvjSt11align_val_t(void *p,
-                                                                    size_t n,
-                                                                    size_t a)
-                    RPALIAS(rpfree_size_aligned) void _ZdaPvjSt11align_val_t(
-                        void *p, size_t n, size_t a)
-                        RPALIAS(rpfree_size_aligned)
-#endif
-
-                            void *malloc(size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
-        RPALIAS(rpmalloc) void *calloc(size_t count, size_t size)
-            RPALIAS(rpcalloc) void *realloc(void *ptr, size_t size)
-                RPALIAS(rprealloc) void *reallocf(void *ptr, size_t size)
-                    RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
-        RPALIAS(rprealloc) void *aligned_alloc(size_t alignment, size_t size)
-            RPALIAS(rpaligned_alloc) void *memalign(
-                size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
-        RPALIAS(rpmemalign) int posix_memalign(void **memptr, size_t alignment,
-                                               size_t size)
-            RPALIAS(rpposix_memalign) void free(void *ptr)
-                RPALIAS(rpfree) void cfree(void *ptr) RPALIAS(rpfree)
-#if defined(__ANDROID__) || defined(__FreeBSD__)
-                    size_t
-    malloc_usable_size(const void *ptr) RPALIAS(rpmalloc_usable_size)
-#else
-                    size_t
-    malloc_usable_size(void *ptr) RPALIAS(rpmalloc_usable_size)
-#endif
-        size_t malloc_size(void *ptr) RPALIAS(rpmalloc_usable_size)
-
-#endif
-
-            static inline size_t _rpmalloc_page_size(void) {
-  return _memory_page_size;
-}
-
-extern void *RPMALLOC_CDECL reallocarray(void *ptr, size_t count, size_t size);
-
-extern void *RPMALLOC_CDECL reallocarray(void *ptr, size_t count, size_t size) {
-  size_t total;
-#if ENABLE_VALIDATE_ARGS
-#ifdef _MSC_VER
-  int err = SizeTMult(count, size, &total);
-  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#else
-  int err = __builtin_umull_overflow(count, size, &total);
-  if (err || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-#else
-  total = count * size;
-#endif
-  return realloc(ptr, total);
-}
-
-extern inline void *RPMALLOC_CDECL valloc(size_t size) {
-  get_thread_heap();
-  return rpaligned_alloc(_rpmalloc_page_size(), size);
-}
-
-extern inline void *RPMALLOC_CDECL pvalloc(size_t size) {
-  get_thread_heap();
-  const size_t page_size = _rpmalloc_page_size();
-  const size_t aligned_size = ((size + page_size - 1) / page_size) * page_size;
-#if ENABLE_VALIDATE_ARGS
-  if (aligned_size < size) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  return rpaligned_alloc(_rpmalloc_page_size(), aligned_size);
-}
-
-#endif // ENABLE_OVERRIDE
-
-#if ENABLE_PRELOAD
-
-#ifdef _WIN32
-
-#if defined(BUILD_DYNAMIC_LINK) && BUILD_DYNAMIC_LINK
-
-extern __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE instance,
-                                                 DWORD reason, LPVOID reserved);
-
-extern __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE instance,
-                                                 DWORD reason,
-                                                 LPVOID reserved) {
-  (void)sizeof(reserved);
-  (void)sizeof(instance);
-  if (reason == DLL_PROCESS_ATTACH)
-    rpmalloc_initialize();
-  else if (reason == DLL_PROCESS_DETACH)
-    rpmalloc_finalize();
-  else if (reason == DLL_THREAD_ATTACH)
-    rpmalloc_thread_initialize();
-  else if (reason == DLL_THREAD_DETACH)
-    rpmalloc_thread_finalize(1);
-  return TRUE;
-}
-
-// end BUILD_DYNAMIC_LINK
-#else
-
-extern void _global_rpmalloc_init(void) {
-  rpmalloc_set_main_thread();
-  rpmalloc_initialize();
-}
-
-#if defined(__clang__) || defined(__GNUC__)
-
-static void __attribute__((constructor)) initializer(void) {
-  _global_rpmalloc_init();
-}
-
-#elif defined(_MSC_VER)
-
-static int _global_rpmalloc_xib(void) {
-  _global_rpmalloc_init();
-  return 0;
-}
-
-#pragma section(".CRT$XIB", read)
-__declspec(allocate(".CRT$XIB")) void (*_rpmalloc_module_init)(void) =
-    _global_rpmalloc_xib;
-#if defined(_M_IX86) || defined(__i386__)
-#pragma comment(linker, "/include:"                                            \
-                        "__rpmalloc_module_init")
-#else
-#pragma comment(linker, "/include:"                                            \
-                        "_rpmalloc_module_init")
-#endif
-
-#endif
-
-// end !BUILD_DYNAMIC_LINK
-#endif
-
-#else
-
-#include <pthread.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-extern void rpmalloc_set_main_thread(void);
-
-static pthread_key_t destructor_key;
-
-static void thread_destructor(void *);
-
-static void __attribute__((constructor)) initializer(void) {
-  rpmalloc_set_main_thread();
-  rpmalloc_initialize();
-  pthread_key_create(&destructor_key, thread_destructor);
-}
-
-static void __attribute__((destructor)) finalizer(void) { rpmalloc_finalize(); }
-
-typedef struct {
-  void *(*real_start)(void *);
-  void *real_arg;
-} thread_starter_arg;
-
-static void *thread_starter(void *argptr) {
-  thread_starter_arg *arg = argptr;
-  void *(*real_start)(void *) = arg->real_start;
-  void *real_arg = arg->real_arg;
-  rpmalloc_thread_initialize();
-  rpfree(argptr);
-  pthread_setspecific(destructor_key, (void *)1);
-  return (*real_start)(real_arg);
-}
-
-static void thread_destructor(void *value) {
-  (void)sizeof(value);
-  rpmalloc_thread_finalize(1);
-}
-
-#ifdef __APPLE__
-
-static int pthread_create_proxy(pthread_t *thread, const pthread_attr_t *attr,
-                                void *(*start_routine)(void *), void *arg) {
-  rpmalloc_initialize();
-  thread_starter_arg *starter_arg = rpmalloc(sizeof(thread_starter_arg));
-  starter_arg->real_start = start_routine;
-  starter_arg->real_arg = arg;
-  return pthread_create(thread, attr, thread_starter, starter_arg);
-}
-
-MAC_INTERPOSE_SINGLE(pthread_create_proxy, pthread_create);
-
-#else
-
-#include <dlfcn.h>
-
-int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
-                   void *(*start_routine)(void *), void *arg) {
-#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) ||      \
-    defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) ||     \
-    defined(__HAIKU__)
-  char fname[] = "pthread_create";
-#else
-  char fname[] = "_pthread_create";
-#endif
-  void *real_pthread_create = dlsym(RTLD_NEXT, fname);
-  rpmalloc_thread_initialize();
-  thread_starter_arg *starter_arg = rpmalloc(sizeof(thread_starter_arg));
-  starter_arg->real_start = start_routine;
-  starter_arg->real_arg = arg;
-  return (*(int (*)(pthread_t *, const pthread_attr_t *, void *(*)(void *),
-                    void *))real_pthread_create)(thread, attr, thread_starter,
-                                                 starter_arg);
-}
-
-#endif
-
-#endif
-
-#endif
-
-#if ENABLE_OVERRIDE
-
-#if defined(__GLIBC__) && defined(__linux__)
-
-void *__libc_malloc(size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
-        RPALIAS(rpmalloc) void *__libc_calloc(size_t count, size_t size)
-            RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2)
-                RPALIAS(rpcalloc) void *__libc_realloc(void *p, size_t size)
-                    RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2) RPALIAS(rprealloc) void __libc_free(void *p)
-        RPALIAS(rpfree) void __libc_cfree(void *p)
-            RPALIAS(rpfree) void *__libc_memalign(size_t align, size_t size)
-                RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
-        RPALIAS(rpmemalign) int __posix_memalign(void **p, size_t align,
-                                                 size_t size)
-            RPALIAS(rpposix_memalign)
-
-                extern void *__libc_valloc(size_t size);
-extern void *__libc_pvalloc(size_t size);
-
-void *__libc_valloc(size_t size) { return valloc(size); }
-
-void *__libc_pvalloc(size_t size) { return pvalloc(size); }
-
-#endif
-
-#endif
-
-#if (defined(__GNUC__) || defined(__clang__))
-#pragma GCC visibility pop
-#endif
+//===------------------------ malloc.c ------------------*- C -*-=============//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This library provides a cross-platform lock free thread caching malloc
+// implementation in C11.
+//
+//
+// This file provides overrides for the standard library malloc entry points for
+// C and new/delete operators for C++ It also provides automatic
+// initialization/finalization of process and threads
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__TINYC__)
+#include <sys/types.h>
+#endif
+
+#ifndef ARCH_64BIT
+#if defined(__LLP64__) || defined(__LP64__) || defined(_WIN64)
+#define ARCH_64BIT 1
+_Static_assert(sizeof(size_t) == 8, "Data type size mismatch");
+_Static_assert(sizeof(void *) == 8, "Data type size mismatch");
+#else
+#define ARCH_64BIT 0
+_Static_assert(sizeof(size_t) == 4, "Data type size mismatch");
+_Static_assert(sizeof(void *) == 4, "Data type size mismatch");
+#endif
+#endif
+
+#if (defined(__GNUC__) || defined(__clang__))
+#pragma GCC visibility push(default)
+#endif
+
+#define USE_IMPLEMENT 1
+#define USE_INTERPOSE 0
+#define USE_ALIAS 0
+
+#if defined(__APPLE__)
+#undef USE_INTERPOSE
+#define USE_INTERPOSE 1
+
+typedef struct interpose_t {
+  void *new_func;
+  void *orig_func;
+} interpose_t;
+
+#define MAC_INTERPOSE_PAIR(newf, oldf) {(void *)newf, (void *)oldf}
+#define MAC_INTERPOSE_SINGLE(newf, oldf)                                       \
+  __attribute__((used)) static const interpose_t macinterpose##newf##oldf      \
+      __attribute__((section("__DATA, __interpose"))) =                        \
+          MAC_INTERPOSE_PAIR(newf, oldf)
+
+#endif
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+#undef USE_IMPLEMENT
+#undef USE_ALIAS
+#define USE_IMPLEMENT 0
+#define USE_ALIAS 1
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#undef malloc
+#undef free
+#undef calloc
+#define RPMALLOC_RESTRICT __declspec(restrict)
+#else
+#define RPMALLOC_RESTRICT
+#endif
+
+#if ENABLE_OVERRIDE
+
+typedef struct rp_nothrow_t {
+  int __dummy;
+} rp_nothrow_t;
+
+#if USE_IMPLEMENT
+
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL malloc(size_t size) {
+  return rpmalloc(size);
+}
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL calloc(size_t count,
+                                                            size_t size) {
+  return rpcalloc(count, size);
+}
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL realloc(void *ptr,
+                                                             size_t size) {
+  return rprealloc(ptr, size);
+}
+extern inline void *RPMALLOC_CDECL reallocf(void *ptr, size_t size) {
+  return rprealloc(ptr, size);
+}
+extern inline void *RPMALLOC_CDECL aligned_alloc(size_t alignment,
+                                                 size_t size) {
+  return rpaligned_alloc(alignment, size);
+}
+extern inline void *RPMALLOC_CDECL memalign(size_t alignment, size_t size) {
+  return rpmemalign(alignment, size);
+}
+extern inline int RPMALLOC_CDECL posix_memalign(void **memptr, size_t alignment,
+                                                size_t size) {
+  return rpposix_memalign(memptr, alignment, size);
+}
+extern inline void RPMALLOC_CDECL free(void *ptr) { rpfree(ptr); }
+extern inline void RPMALLOC_CDECL cfree(void *ptr) { rpfree(ptr); }
+extern inline size_t RPMALLOC_CDECL malloc_usable_size(void *ptr) {
+  return rpmalloc_usable_size(ptr);
+}
+extern inline size_t RPMALLOC_CDECL malloc_size(void *ptr) {
+  return rpmalloc_usable_size(ptr);
+}
+
+#ifdef _WIN32
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL _malloc_base(size_t size) {
+  return rpmalloc(size);
+}
+extern inline void RPMALLOC_CDECL _free_base(void *ptr) { rpfree(ptr); }
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL _calloc_base(size_t count,
+                                                                  size_t size) {
+  return rpcalloc(count, size);
+}
+extern inline size_t RPMALLOC_CDECL _msize(void *ptr) {
+  return rpmalloc_usable_size(ptr);
+}
+extern inline size_t RPMALLOC_CDECL _msize_base(void *ptr) {
+  return rpmalloc_usable_size(ptr);
+}
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL
+_realloc_base(void *ptr, size_t size) {
+  return rprealloc(ptr, size);
+}
+#endif
+
+#ifdef _WIN32
+// For Windows, #include <rpnew.h> in one source file to get the C++ operator
+// overrides implemented in your module
+#else
+// Overload the C++ operators using the mangled names
+// (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling) operators
+// delete and delete[]
+#define RPDEFVIS __attribute__((visibility("default")))
+extern void _ZdlPv(void *p);
+void RPDEFVIS _ZdlPv(void *p) { rpfree(p); }
+extern void _ZdaPv(void *p);
+void RPDEFVIS _ZdaPv(void *p) { rpfree(p); }
+#if ARCH_64BIT
+// 64-bit operators new and new[], normal and aligned
+extern void *_Znwm(uint64_t size);
+void *RPDEFVIS _Znwm(uint64_t size) { return rpmalloc(size); }
+extern void *_Znam(uint64_t size);
+void *RPDEFVIS _Znam(uint64_t size) { return rpmalloc(size); }
+extern void *_Znwmm(uint64_t size, uint64_t align);
+void *RPDEFVIS _Znwmm(uint64_t size, uint64_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_Znamm(uint64_t size, uint64_t align);
+void *RPDEFVIS _Znamm(uint64_t size, uint64_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnwmSt11align_val_t(uint64_t size, uint64_t align);
+void *RPDEFVIS _ZnwmSt11align_val_t(uint64_t size, uint64_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnamSt11align_val_t(uint64_t size, uint64_t align);
+void *RPDEFVIS _ZnamSt11align_val_t(uint64_t size, uint64_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t);
+void *RPDEFVIS _ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpmalloc(size);
+}
+extern void *_ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t);
+void *RPDEFVIS _ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpmalloc(size);
+}
+extern void *_ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
+                                                rp_nothrow_t t);
+void *RPDEFVIS _ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
+                                                  rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
+                                                rp_nothrow_t t);
+void *RPDEFVIS _ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
+                                                  rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpaligned_alloc(align, size);
+}
+// 64-bit operators sized delete and delete[], normal and aligned
+extern void _ZdlPvm(void *p, uint64_t size);
+void RPDEFVIS _ZdlPvm(void *p, uint64_t size) {
+  rpfree(p);
+  (void)sizeof(size);
+}
+extern void _ZdaPvm(void *p, uint64_t size);
+void RPDEFVIS _ZdaPvm(void *p, uint64_t size) {
+  rpfree(p);
+  (void)sizeof(size);
+}
+extern void _ZdlPvSt11align_val_t(void *p, uint64_t align);
+void RPDEFVIS _ZdlPvSt11align_val_t(void *p, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(align);
+}
+extern void _ZdaPvSt11align_val_t(void *p, uint64_t align);
+void RPDEFVIS _ZdaPvSt11align_val_t(void *p, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(align);
+}
+extern void _ZdlPvmSt11align_val_t(void *p, uint64_t size, uint64_t align);
+void RPDEFVIS _ZdlPvmSt11align_val_t(void *p, uint64_t size, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(size);
+  (void)sizeof(align);
+}
+extern void _ZdaPvmSt11align_val_t(void *p, uint64_t size, uint64_t align);
+void RPDEFVIS _ZdaPvmSt11align_val_t(void *p, uint64_t size, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(size);
+  (void)sizeof(align);
+}
+#else
+// 32-bit operators new and new[], normal and aligned
+extern void *_Znwj(uint32_t size);
+void *RPDEFVIS _Znwj(uint32_t size) { return rpmalloc(size); }
+extern void *_Znaj(uint32_t size);
+void *RPDEFVIS _Znaj(uint32_t size) { return rpmalloc(size); }
+extern void *_Znwjj(uint32_t size, uint32_t align);
+void *RPDEFVIS _Znwjj(uint32_t size, uint32_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_Znajj(uint32_t size, uint32_t align);
+void *RPDEFVIS _Znajj(uint32_t size, uint32_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnwjSt11align_val_t(size_t size, size_t align);
+void *RPDEFVIS _ZnwjSt11align_val_t(size_t size, size_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnajSt11align_val_t(size_t size, size_t align);
+void *RPDEFVIS _ZnajSt11align_val_t(size_t size, size_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t);
+void *RPDEFVIS _ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpmalloc(size);
+}
+extern void *_ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t);
+void *RPDEFVIS _ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpmalloc(size);
+}
+extern void *_ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
+                                                rp_nothrow_t t);
+void *RPDEFVIS _ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
+                                                  rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
+                                                rp_nothrow_t t);
+void *RPDEFVIS _ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
+                                                  rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpaligned_alloc(align, size);
+}
+// 32-bit operators sized delete and delete[], normal and aligned
+extern void _ZdlPvj(void *p, uint64_t size);
+void RPDEFVIS _ZdlPvj(void *p, uint64_t size) {
+  rpfree(p);
+  (void)sizeof(size);
+}
+extern void _ZdaPvj(void *p, uint64_t size);
+void RPDEFVIS _ZdaPvj(void *p, uint64_t size) {
+  rpfree(p);
+  (void)sizeof(size);
+}
+extern void _ZdlPvSt11align_val_t(void *p, uint32_t align);
+void RPDEFVIS _ZdlPvSt11align_val_t(void *p, uint64_t a) {
+  rpfree(p);
+  (void)sizeof(align);
+}
+extern void _ZdaPvSt11align_val_t(void *p, uint32_t align);
+void RPDEFVIS _ZdaPvSt11align_val_t(void *p, uint64_t a) {
+  rpfree(p);
+  (void)sizeof(align);
+}
+extern void _ZdlPvjSt11align_val_t(void *p, uint32_t size, uint32_t align);
+void RPDEFVIS _ZdlPvjSt11align_val_t(void *p, uint64_t size, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(size);
+  (void)sizeof(a);
+}
+extern void _ZdaPvjSt11align_val_t(void *p, uint32_t size, uint32_t align);
+void RPDEFVIS _ZdaPvjSt11align_val_t(void *p, uint64_t size, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(size);
+  (void)sizeof(a);
+}
+#endif
+#endif
+#endif
+
+#if USE_INTERPOSE || USE_ALIAS
+
+static void *rpmalloc_nothrow(size_t size, rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpmalloc(size);
+}
+static void *rpaligned_alloc_reverse(size_t size, size_t align) {
+  return rpaligned_alloc(align, size);
+}
+static void *rpaligned_alloc_reverse_nothrow(size_t size, size_t align,
+                                             rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpaligned_alloc(align, size);
+}
+static void rpfree_size(void *p, size_t size) {
+  (void)sizeof(size);
+  rpfree(p);
+}
+static void rpfree_aligned(void *p, size_t align) {
+  (void)sizeof(align);
+  rpfree(p);
+}
+static void rpfree_size_aligned(void *p, size_t size, size_t align) {
+  (void)sizeof(size);
+  (void)sizeof(align);
+  rpfree(p);
+}
+
+#endif
+
+#if USE_INTERPOSE
+
+__attribute__((used)) static const interpose_t macinterpose_malloc[]
+    __attribute__((section("__DATA, __interpose"))) = {
+        // new and new[]
+        MAC_INTERPOSE_PAIR(rpmalloc, _Znwm),
+        MAC_INTERPOSE_PAIR(rpmalloc, _Znam),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znwmm),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znamm),
+        MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnwmRKSt9nothrow_t),
+        MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnamRKSt9nothrow_t),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnwmSt11align_val_t),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnamSt11align_val_t),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow,
+                           _ZnwmSt11align_val_tRKSt9nothrow_t),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow,
+                           _ZnamSt11align_val_tRKSt9nothrow_t),
+        // delete and delete[]
+        MAC_INTERPOSE_PAIR(rpfree, _ZdlPv), MAC_INTERPOSE_PAIR(rpfree, _ZdaPv),
+        MAC_INTERPOSE_PAIR(rpfree_size, _ZdlPvm),
+        MAC_INTERPOSE_PAIR(rpfree_size, _ZdaPvm),
+        MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdlPvSt11align_val_t),
+        MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdaPvSt11align_val_t),
+        MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdlPvmSt11align_val_t),
+        MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdaPvmSt11align_val_t),
+        // libc entry points
+        MAC_INTERPOSE_PAIR(rpmalloc, malloc),
+        MAC_INTERPOSE_PAIR(rpmalloc, calloc),
+        MAC_INTERPOSE_PAIR(rprealloc, realloc),
+        MAC_INTERPOSE_PAIR(rprealloc, reallocf),
+#if defined(__MAC_10_15) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_15
+        MAC_INTERPOSE_PAIR(rpaligned_alloc, aligned_alloc),
+#endif
+        MAC_INTERPOSE_PAIR(rpmemalign, memalign),
+        MAC_INTERPOSE_PAIR(rpposix_memalign, posix_memalign),
+        MAC_INTERPOSE_PAIR(rpfree, free), MAC_INTERPOSE_PAIR(rpfree, cfree),
+        MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_usable_size),
+        MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_size)};
+
+#endif
+
+#if USE_ALIAS
+
+#define RPALIAS(fn) __attribute__((alias(#fn), used, visibility("default")));
+
+// Alias the C++ operators using the mangled names
+// (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
+
+// operators delete and delete[]
+void _ZdlPv(void *p) RPALIAS(rpfree) void _ZdaPv(void *p) RPALIAS(rpfree)
+
+#if ARCH_64BIT
+    // 64-bit operators new and new[], normal and aligned
+    void *_Znwm(uint64_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
+        RPALIAS(rpmalloc) void *_Znam(uint64_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1) RPALIAS(rpmalloc) void *_Znwmm(uint64_t size,
+                                                                 uint64_t align)
+        RPALIAS(rpaligned_alloc_reverse) void *_Znamm(uint64_t size,
+                                                      uint64_t align)
+            RPALIAS(rpaligned_alloc_reverse) void *_ZnwmSt11align_val_t(
+                size_t size, size_t align)
+                RPALIAS(rpaligned_alloc_reverse) void *_ZnamSt11align_val_t(
+                    size_t size, size_t align)
+                    RPALIAS(rpaligned_alloc_reverse) void *_ZnwmRKSt9nothrow_t(
+                        size_t size, rp_nothrow_t t)
+                        RPALIAS(rpmalloc_nothrow) void *_ZnamRKSt9nothrow_t(
+                            size_t size,
+                            rp_nothrow_t t) RPALIAS(rpmalloc_nothrow) void
+                            *_ZnwmSt11align_val_tRKSt9nothrow_t(size_t size,
+                                                                size_t align,
+                                                                rp_nothrow_t t)
+                                RPALIAS(rpaligned_alloc_reverse_nothrow) void
+                                    *_ZnamSt11align_val_tRKSt9nothrow_t(
+                                        size_t size, size_t align,
+                                        rp_nothrow_t t)
+                                        RPALIAS(rpaligned_alloc_reverse_nothrow)
+    // 64-bit operators delete and delete[], sized and aligned
+    void _ZdlPvm(void *p, size_t n) RPALIAS(rpfree_size) void _ZdaPvm(void *p,
+                                                                      size_t n)
+        RPALIAS(rpfree_size) void _ZdlPvSt11align_val_t(void *p, size_t a)
+            RPALIAS(rpfree_aligned) void _ZdaPvSt11align_val_t(void *p,
+                                                               size_t a)
+                RPALIAS(rpfree_aligned) void _ZdlPvmSt11align_val_t(void *p,
+                                                                    size_t n,
+                                                                    size_t a)
+                    RPALIAS(rpfree_size_aligned) void _ZdaPvmSt11align_val_t(
+                        void *p, size_t n, size_t a)
+                        RPALIAS(rpfree_size_aligned)
+#else
+    // 32-bit operators new and new[], normal and aligned
+    void *_Znwj(uint32_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
+        RPALIAS(rpmalloc) void *_Znaj(uint32_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1) RPALIAS(rpmalloc) void *_Znwjj(uint32_t size,
+                                                                 uint32_t align)
+        RPALIAS(rpaligned_alloc_reverse) void *_Znajj(uint32_t size,
+                                                      uint32_t align)
+            RPALIAS(rpaligned_alloc_reverse) void *_ZnwjSt11align_val_t(
+                size_t size, size_t align)
+                RPALIAS(rpaligned_alloc_reverse) void *_ZnajSt11align_val_t(
+                    size_t size, size_t align)
+                    RPALIAS(rpaligned_alloc_reverse) void *_ZnwjRKSt9nothrow_t(
+                        size_t size, rp_nothrow_t t)
+                        RPALIAS(rpmalloc_nothrow) void *_ZnajRKSt9nothrow_t(
+                            size_t size,
+                            rp_nothrow_t t) RPALIAS(rpmalloc_nothrow) void
+                            *_ZnwjSt11align_val_tRKSt9nothrow_t(size_t size,
+                                                                size_t align,
+                                                                rp_nothrow_t t)
+                                RPALIAS(rpaligned_alloc_reverse_nothrow) void
+                                    *_ZnajSt11align_val_tRKSt9nothrow_t(
+                                        size_t size, size_t align,
+                                        rp_nothrow_t t)
+                                        RPALIAS(rpaligned_alloc_reverse_nothrow)
+    // 32-bit operators delete and delete[], sized and aligned
+    void _ZdlPvj(void *p, size_t n) RPALIAS(rpfree_size) void _ZdaPvj(void *p,
+                                                                      size_t n)
+        RPALIAS(rpfree_size) void _ZdlPvSt11align_val_t(void *p, size_t a)
+            RPALIAS(rpfree_aligned) void _ZdaPvSt11align_val_t(void *p,
+                                                               size_t a)
+                RPALIAS(rpfree_aligned) void _ZdlPvjSt11align_val_t(void *p,
+                                                                    size_t n,
+                                                                    size_t a)
+                    RPALIAS(rpfree_size_aligned) void _ZdaPvjSt11align_val_t(
+                        void *p, size_t n, size_t a)
+                        RPALIAS(rpfree_size_aligned)
+#endif
+
+                            void *malloc(size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
+        RPALIAS(rpmalloc) void *calloc(size_t count, size_t size)
+            RPALIAS(rpcalloc) void *realloc(void *ptr, size_t size)
+                RPALIAS(rprealloc) void *reallocf(void *ptr, size_t size)
+                    RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
+        RPALIAS(rprealloc) void *aligned_alloc(size_t alignment, size_t size)
+            RPALIAS(rpaligned_alloc) void *memalign(
+                size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
+        RPALIAS(rpmemalign) int posix_memalign(void **memptr, size_t alignment,
+                                               size_t size)
+            RPALIAS(rpposix_memalign) void free(void *ptr)
+                RPALIAS(rpfree) void cfree(void *ptr) RPALIAS(rpfree)
+#if defined(__ANDROID__) || defined(__FreeBSD__)
+                    size_t
+    malloc_usable_size(const void *ptr) RPALIAS(rpmalloc_usable_size)
+#else
+                    size_t
+    malloc_usable_size(void *ptr) RPALIAS(rpmalloc_usable_size)
+#endif
+        size_t malloc_size(void *ptr) RPALIAS(rpmalloc_usable_size)
+
+#endif
+
+            static inline size_t _rpmalloc_page_size(void) {
+  return _memory_page_size;
+}
+
+extern void *RPMALLOC_CDECL reallocarray(void *ptr, size_t count, size_t size);
+
+extern void *RPMALLOC_CDECL reallocarray(void *ptr, size_t count, size_t size) {
+  size_t total;
+#if ENABLE_VALIDATE_ARGS
+#ifdef _MSC_VER
+  int err = SizeTMult(count, size, &total);
+  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#else
+  int err = __builtin_umull_overflow(count, size, &total);
+  if (err || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+#else
+  total = count * size;
+#endif
+  return realloc(ptr, total);
+}
+
+extern inline void *RPMALLOC_CDECL valloc(size_t size) {
+  get_thread_heap();
+  return rpaligned_alloc(_rpmalloc_page_size(), size);
+}
+
+extern inline void *RPMALLOC_CDECL pvalloc(size_t size) {
+  get_thread_heap();
+  const size_t page_size = _rpmalloc_page_size();
+  const size_t aligned_size = ((size + page_size - 1) / page_size) * page_size;
+#if ENABLE_VALIDATE_ARGS
+  if (aligned_size < size) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  return rpaligned_alloc(_rpmalloc_page_size(), aligned_size);
+}
+
+#endif // ENABLE_OVERRIDE
+
+#if ENABLE_PRELOAD
+
+#ifdef _WIN32
+
+#if defined(BUILD_DYNAMIC_LINK) && BUILD_DYNAMIC_LINK
+
+extern __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE instance,
+                                                 DWORD reason, LPVOID reserved);
+
+extern __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE instance,
+                                                 DWORD reason,
+                                                 LPVOID reserved) {
+  (void)sizeof(reserved);
+  (void)sizeof(instance);
+  if (reason == DLL_PROCESS_ATTACH)
+    rpmalloc_initialize();
+  else if (reason == DLL_PROCESS_DETACH)
+    rpmalloc_finalize();
+  else if (reason == DLL_THREAD_ATTACH)
+    rpmalloc_thread_initialize();
+  else if (reason == DLL_THREAD_DETACH)
+    rpmalloc_thread_finalize(1);
+  return TRUE;
+}
+
+// end BUILD_DYNAMIC_LINK
+#else
+
+extern void _global_rpmalloc_init(void) {
+  rpmalloc_set_main_thread();
+  rpmalloc_initialize();
+}
+
+#if defined(__clang__) || defined(__GNUC__)
+
+static void __attribute__((constructor)) initializer(void) {
+  _global_rpmalloc_init();
+}
+
+#elif defined(_MSC_VER)
+
+static int _global_rpmalloc_xib(void) {
+  _global_rpmalloc_init();
+  return 0;
+}
+
+#pragma section(".CRT$XIB", read)
+__declspec(allocate(".CRT$XIB")) void (*_rpmalloc_module_init)(void) =
+    _global_rpmalloc_xib;
+#if defined(_M_IX86) || defined(__i386__)
+#pragma comment(linker, "/include:"                                            \
+                        "__rpmalloc_module_init")
+#else
+#pragma comment(linker, "/include:"                                            \
+                        "_rpmalloc_module_init")
+#endif
+
+#endif
+
+// end !BUILD_DYNAMIC_LINK
+#endif
+
+#else
+
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+extern void rpmalloc_set_main_thread(void);
+
+static pthread_key_t destructor_key;
+
+static void thread_destructor(void *);
+
+static void __attribute__((constructor)) initializer(void) {
+  rpmalloc_set_main_thread();
+  rpmalloc_initialize();
+  pthread_key_create(&destructor_key, thread_destructor);
+}
+
+static void __attribute__((destructor)) finalizer(void) { rpmalloc_finalize(); }
+
+typedef struct {
+  void *(*real_start)(void *);
+  void *real_arg;
+} thread_starter_arg;
+
+static void *thread_starter(void *argptr) {
+  thread_starter_arg *arg = argptr;
+  void *(*real_start)(void *) = arg->real_start;
+  void *real_arg = arg->real_arg;
+  rpmalloc_thread_initialize();
+  rpfree(argptr);
+  pthread_setspecific(destructor_key, (void *)1);
+  return (*real_start)(real_arg);
+}
+
+static void thread_destructor(void *value) {
+  (void)sizeof(value);
+  rpmalloc_thread_finalize(1);
+}
+
+#ifdef __APPLE__
+
+static int pthread_create_proxy(pthread_t *thread, const pthread_attr_t *attr,
+                                void *(*start_routine)(void *), void *arg) {
+  rpmalloc_initialize();
+  thread_starter_arg *starter_arg = rpmalloc(sizeof(thread_starter_arg));
+  starter_arg->real_start = start_routine;
+  starter_arg->real_arg = arg;
+  return pthread_create(thread, attr, thread_starter, starter_arg);
+}
+
+MAC_INTERPOSE_SINGLE(pthread_create_proxy, pthread_create);
+
+#else
+
+#include <dlfcn.h>
+
+int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+                   void *(*start_routine)(void *), void *arg) {
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) ||      \
+    defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) ||     \
+    defined(__HAIKU__)
+  char fname[] = "pthread_create";
+#else
+  char fname[] = "_pthread_create";
+#endif
+  void *real_pthread_create = dlsym(RTLD_NEXT, fname);
+  rpmalloc_thread_initialize();
+  thread_starter_arg *starter_arg = rpmalloc(sizeof(thread_starter_arg));
+  starter_arg->real_start = start_routine;
+  starter_arg->real_arg = arg;
+  return (*(int (*)(pthread_t *, const pthread_attr_t *, void *(*)(void *),
+                    void *))real_pthread_create)(thread, attr, thread_starter,
+                                                 starter_arg);
+}
+
+#endif
+
+#endif
+
+#endif
+
+#if ENABLE_OVERRIDE
+
+#if defined(__GLIBC__) && defined(__linux__)
+
+void *__libc_malloc(size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
+        RPALIAS(rpmalloc) void *__libc_calloc(size_t count, size_t size)
+            RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2)
+                RPALIAS(rpcalloc) void *__libc_realloc(void *p, size_t size)
+                    RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2) RPALIAS(rprealloc) void __libc_free(void *p)
+        RPALIAS(rpfree) void __libc_cfree(void *p)
+            RPALIAS(rpfree) void *__libc_memalign(size_t align, size_t size)
+                RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
+        RPALIAS(rpmemalign) int __posix_memalign(void **p, size_t align,
+                                                 size_t size)
+            RPALIAS(rpposix_memalign)
+
+                extern void *__libc_valloc(size_t size);
+extern void *__libc_pvalloc(size_t size);
+
+void *__libc_valloc(size_t size) { return valloc(size); }
+
+void *__libc_pvalloc(size_t size) { return pvalloc(size); }
+
+#endif
+
+#endif
+
+#if (defined(__GNUC__) || defined(__clang__))
+#pragma GCC visibility pop
+#endif
diff --git a/llvm/lib/Support/rpmalloc/rpmalloc.c b/llvm/lib/Support/rpmalloc/rpmalloc.c
index 0976ec8ae6af..a06d3cdb5b52 100644
--- a/llvm/lib/Support/rpmalloc/rpmalloc.c
+++ b/llvm/lib/Support/rpmalloc/rpmalloc.c
@@ -1,3992 +1,3992 @@
-//===---------------------- rpmalloc.c ------------------*- C -*-=============//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This library provides a cross-platform lock free thread caching malloc
-// implementation in C11.
-//
-//===----------------------------------------------------------------------===//
-
-#include "rpmalloc.h"
-
-////////////
-///
-/// Build time configurable limits
-///
-//////
-
-#if defined(__clang__)
-#pragma clang diagnostic ignored "-Wunused-macros"
-#pragma clang diagnostic ignored "-Wunused-function"
-#if __has_warning("-Wreserved-identifier")
-#pragma clang diagnostic ignored "-Wreserved-identifier"
-#endif
-#if __has_warning("-Wstatic-in-inline")
-#pragma clang diagnostic ignored "-Wstatic-in-inline"
-#endif
-#elif defined(__GNUC__)
-#pragma GCC diagnostic ignored "-Wunused-macros"
-#pragma GCC diagnostic ignored "-Wunused-function"
-#endif
-
-#if !defined(__has_builtin)
-#define __has_builtin(b) 0
-#endif
-
-#if defined(__GNUC__) || defined(__clang__)
-
-#if __has_builtin(__builtin_memcpy_inline)
-#define _rpmalloc_memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
-#else
-#define _rpmalloc_memcpy_const(x, y, s)                                        \
-  do {                                                                         \
-    _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0),       \
-                   "len must be a constant integer");                          \
-    memcpy(x, y, s);                                                           \
-  } while (0)
-#endif
-
-#if __has_builtin(__builtin_memset_inline)
-#define _rpmalloc_memset_const(x, y, s) __builtin_memset_inline(x, y, s)
-#else
-#define _rpmalloc_memset_const(x, y, s)                                        \
-  do {                                                                         \
-    _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0),       \
-                   "len must be a constant integer");                          \
-    memset(x, y, s);                                                           \
-  } while (0)
-#endif
-#else
-#define _rpmalloc_memcpy_const(x, y, s) memcpy(x, y, s)
-#define _rpmalloc_memset_const(x, y, s) memset(x, y, s)
-#endif
-
-#if __has_builtin(__builtin_assume)
-#define rpmalloc_assume(cond) __builtin_assume(cond)
-#elif defined(__GNUC__)
-#define rpmalloc_assume(cond)                                                  \
-  do {                                                                         \
-    if (!__builtin_expect(cond, 0))                                            \
-      __builtin_unreachable();                                                 \
-  } while (0)
-#elif defined(_MSC_VER)
-#define rpmalloc_assume(cond) __assume(cond)
-#else
-#define rpmalloc_assume(cond) 0
-#endif
-
-#ifndef HEAP_ARRAY_SIZE
-//! Size of heap hashmap
-#define HEAP_ARRAY_SIZE 47
-#endif
-#ifndef ENABLE_THREAD_CACHE
-//! Enable per-thread cache
-#define ENABLE_THREAD_CACHE 1
-#endif
-#ifndef ENABLE_GLOBAL_CACHE
-//! Enable global cache shared between all threads, requires thread cache
-#define ENABLE_GLOBAL_CACHE 1
-#endif
-#ifndef ENABLE_VALIDATE_ARGS
-//! Enable validation of args to public entry points
-#define ENABLE_VALIDATE_ARGS 0
-#endif
-#ifndef ENABLE_STATISTICS
-//! Enable statistics collection
-#define ENABLE_STATISTICS 0
-#endif
-#ifndef ENABLE_ASSERTS
-//! Enable asserts
-#define ENABLE_ASSERTS 0
-#endif
-#ifndef ENABLE_OVERRIDE
-//! Override standard library malloc/free and new/delete entry points
-#define ENABLE_OVERRIDE 0
-#endif
-#ifndef ENABLE_PRELOAD
-//! Support preloading
-#define ENABLE_PRELOAD 0
-#endif
-#ifndef DISABLE_UNMAP
-//! Disable unmapping memory pages (also enables unlimited cache)
-#define DISABLE_UNMAP 0
-#endif
-#ifndef ENABLE_UNLIMITED_CACHE
-//! Enable unlimited global cache (no unmapping until finalization)
-#define ENABLE_UNLIMITED_CACHE 0
-#endif
-#ifndef ENABLE_ADAPTIVE_THREAD_CACHE
-//! Enable adaptive thread cache size based on use heuristics
-#define ENABLE_ADAPTIVE_THREAD_CACHE 0
-#endif
-#ifndef DEFAULT_SPAN_MAP_COUNT
-//! Default number of spans to map in call to map more virtual memory (default
-//! values yield 4MiB here)
-#define DEFAULT_SPAN_MAP_COUNT 64
-#endif
-#ifndef GLOBAL_CACHE_MULTIPLIER
-//! Multiplier for global cache
-#define GLOBAL_CACHE_MULTIPLIER 8
-#endif
-
-#if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
-#error Must use global cache if unmap is disabled
-#endif
-
-#if DISABLE_UNMAP
-#undef ENABLE_UNLIMITED_CACHE
-#define ENABLE_UNLIMITED_CACHE 1
-#endif
-
-#if !ENABLE_GLOBAL_CACHE
-#undef ENABLE_UNLIMITED_CACHE
-#define ENABLE_UNLIMITED_CACHE 0
-#endif
-
-#if !ENABLE_THREAD_CACHE
-#undef ENABLE_ADAPTIVE_THREAD_CACHE
-#define ENABLE_ADAPTIVE_THREAD_CACHE 0
-#endif
-
-#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
-#define PLATFORM_WINDOWS 1
-#define PLATFORM_POSIX 0
-#else
-#define PLATFORM_WINDOWS 0
-#define PLATFORM_POSIX 1
-#endif
-
-/// Platform and arch specifics
-#if defined(_MSC_VER) && !defined(__clang__)
-#pragma warning(disable : 5105)
-#ifndef FORCEINLINE
-#define FORCEINLINE inline __forceinline
-#endif
-#define _Static_assert static_assert
-#else
-#ifndef FORCEINLINE
-#define FORCEINLINE inline __attribute__((__always_inline__))
-#endif
-#endif
-#if PLATFORM_WINDOWS
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN
-#endif
-#include <windows.h>
-#if ENABLE_VALIDATE_ARGS
-#include <intsafe.h>
-#endif
-#else
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <unistd.h>
-#if defined(__linux__) || defined(__ANDROID__)
-#include <sys/prctl.h>
-#if !defined(PR_SET_VMA)
-#define PR_SET_VMA 0x53564d41
-#define PR_SET_VMA_ANON_NAME 0
-#endif
-#endif
-#if defined(__APPLE__)
-#include <TargetConditionals.h>
-#if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
-#include <mach/mach_vm.h>
-#include <mach/vm_statistics.h>
-#endif
-#include <pthread.h>
-#endif
-#if defined(__HAIKU__) || defined(__TINYC__)
-#include <pthread.h>
-#endif
-#endif
-
-#include <errno.h>
-#include <stdint.h>
-#include <string.h>
-
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-#include <fibersapi.h>
-static DWORD fls_key;
-#endif
-
-#if PLATFORM_POSIX
-#include <sched.h>
-#include <sys/mman.h>
-#ifdef __FreeBSD__
-#include <sys/sysctl.h>
-#define MAP_HUGETLB MAP_ALIGNED_SUPER
-#ifndef PROT_MAX
-#define PROT_MAX(f) 0
-#endif
-#else
-#define PROT_MAX(f) 0
-#endif
-#ifdef __sun
-extern int madvise(caddr_t, size_t, int);
-#endif
-#ifndef MAP_UNINITIALIZED
-#define MAP_UNINITIALIZED 0
-#endif
-#endif
-#include <errno.h>
-
-#if ENABLE_ASSERTS
-#undef NDEBUG
-#if defined(_MSC_VER) && !defined(_DEBUG)
-#define _DEBUG
-#endif
-#include <assert.h>
-#define RPMALLOC_TOSTRING_M(x) #x
-#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
-#define rpmalloc_assert(truth, message)                                        \
-  do {                                                                         \
-    if (!(truth)) {                                                            \
-      if (_memory_config.error_callback) {                                     \
-        _memory_config.error_callback(message " (" RPMALLOC_TOSTRING(          \
-            truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__));          \
-      } else {                                                                 \
-        assert((truth) && message);                                            \
-      }                                                                        \
-    }                                                                          \
-  } while (0)
-#else
-#define rpmalloc_assert(truth, message)                                        \
-  do {                                                                         \
-  } while (0)
-#endif
-#if ENABLE_STATISTICS
-#include <stdio.h>
-#endif
-
-//////
-///
-/// Atomic access abstraction (since MSVC does not do C11 yet)
-///
-//////
-
-#if defined(_MSC_VER) && !defined(__clang__)
-
-typedef volatile long atomic32_t;
-typedef volatile long long atomic64_t;
-typedef volatile void *atomicptr_t;
-
-static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return *src; }
-static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
-  *dst = val;
-}
-static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
-  return (int32_t)InterlockedIncrement(val);
-}
-static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
-  return (int32_t)InterlockedDecrement(val);
-}
-static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
-  return (int32_t)InterlockedExchangeAdd(val, add) + add;
-}
-static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
-                                            int32_t ref) {
-  return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0;
-}
-static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
-  *dst = val;
-}
-static FORCEINLINE int64_t atomic_load64(atomic64_t *src) { return *src; }
-static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
-  return (int64_t)InterlockedExchangeAdd64(val, add) + add;
-}
-static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
-  return (void *)*src;
-}
-static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
-  *dst = val;
-}
-static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
-  *dst = val;
-}
-static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
-                                                     void *val) {
-  return (void *)InterlockedExchangePointer((void *volatile *)dst, val);
-}
-static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
-  return (InterlockedCompareExchangePointer((void *volatile *)dst, val, ref) ==
-          ref)
-             ? 1
-             : 0;
-}
-
-#define EXPECTED(x) (x)
-#define UNEXPECTED(x) (x)
-
-#else
-
-#include <stdatomic.h>
-
-typedef volatile _Atomic(int32_t) atomic32_t;
-typedef volatile _Atomic(int64_t) atomic64_t;
-typedef volatile _Atomic(void *) atomicptr_t;
-
-static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
-  return atomic_load_explicit(src, memory_order_relaxed);
-}
-static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
-  atomic_store_explicit(dst, val, memory_order_relaxed);
-}
-static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
-  return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1;
-}
-static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
-  return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1;
-}
-static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
-  return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
-}
-static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
-                                            int32_t ref) {
-  return atomic_compare_exchange_weak_explicit(
-      dst, &ref, val, memory_order_acquire, memory_order_relaxed);
-}
-static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
-  atomic_store_explicit(dst, val, memory_order_release);
-}
-static FORCEINLINE int64_t atomic_load64(atomic64_t *val) {
-  return atomic_load_explicit(val, memory_order_relaxed);
-}
-static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
-  return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
-}
-static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
-  return atomic_load_explicit(src, memory_order_relaxed);
-}
-static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
-  atomic_store_explicit(dst, val, memory_order_relaxed);
-}
-static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
-  atomic_store_explicit(dst, val, memory_order_release);
-}
-static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
-                                                     void *val) {
-  return atomic_exchange_explicit(dst, val, memory_order_acquire);
-}
-static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
-  return atomic_compare_exchange_weak_explicit(
-      dst, &ref, val, memory_order_relaxed, memory_order_relaxed);
-}
-
-#define EXPECTED(x) __builtin_expect((x), 1)
-#define UNEXPECTED(x) __builtin_expect((x), 0)
-
-#endif
-
-////////////
-///
-/// Statistics related functions (evaluate to nothing when statistics not
-/// enabled)
-///
-//////
-
-#if ENABLE_STATISTICS
-#define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
-#define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
-#define _rpmalloc_stat_add(counter, value)                                     \
-  atomic_add32(counter, (int32_t)(value))
-#define _rpmalloc_stat_add64(counter, value)                                   \
-  atomic_add64(counter, (int64_t)(value))
-#define _rpmalloc_stat_add_peak(counter, value, peak)                          \
-  do {                                                                         \
-    int32_t _cur_count = atomic_add32(counter, (int32_t)(value));              \
-    if (_cur_count > (peak))                                                   \
-      peak = _cur_count;                                                       \
-  } while (0)
-#define _rpmalloc_stat_sub(counter, value)                                     \
-  atomic_add32(counter, -(int32_t)(value))
-#define _rpmalloc_stat_inc_alloc(heap, class_idx)                              \
-  do {                                                                         \
-    int32_t alloc_current =                                                    \
-        atomic_incr32(&heap->size_class_use[class_idx].alloc_current);         \
-    if (alloc_current > heap->size_class_use[class_idx].alloc_peak)            \
-      heap->size_class_use[class_idx].alloc_peak = alloc_current;              \
-    atomic_incr32(&heap->size_class_use[class_idx].alloc_total);               \
-  } while (0)
-#define _rpmalloc_stat_inc_free(heap, class_idx)                               \
-  do {                                                                         \
-    atomic_decr32(&heap->size_class_use[class_idx].alloc_current);             \
-    atomic_incr32(&heap->size_class_use[class_idx].free_total);                \
-  } while (0)
-#else
-#define _rpmalloc_stat_inc(counter)                                            \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_dec(counter)                                            \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_add(counter, value)                                     \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_add64(counter, value)                                   \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_add_peak(counter, value, peak)                          \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_sub(counter, value)                                     \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_inc_alloc(heap, class_idx)                              \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_inc_free(heap, class_idx)                               \
-  do {                                                                         \
-  } while (0)
-#endif
-
-///
-/// Preconfigured limits and sizes
-///
-
-//! Granularity of a small allocation block (must be power of two)
-#define SMALL_GRANULARITY 16
-//! Small granularity shift count
-#define SMALL_GRANULARITY_SHIFT 4
-//! Number of small block size classes
-#define SMALL_CLASS_COUNT 65
-//! Maximum size of a small block
-#define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
-//! Granularity of a medium allocation block
-#define MEDIUM_GRANULARITY 512
-//! Medium granularity shift count
-#define MEDIUM_GRANULARITY_SHIFT 9
-//! Number of medium block size classes
-#define MEDIUM_CLASS_COUNT 61
-//! Total number of small + medium size classes
-#define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
-//! Number of large block size classes
-#define LARGE_CLASS_COUNT 63
-//! Maximum size of a medium block
-#define MEDIUM_SIZE_LIMIT                                                      \
-  (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
-//! Maximum size of a large block
-#define LARGE_SIZE_LIMIT                                                       \
-  ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
-//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power
-//! of two)
-#define SPAN_HEADER_SIZE 128
-//! Number of spans in thread cache
-#define MAX_THREAD_SPAN_CACHE 400
-//! Number of spans to transfer between thread and global cache
-#define THREAD_SPAN_CACHE_TRANSFER 64
-//! Number of spans in thread cache for large spans (must be greater than
-//! LARGE_CLASS_COUNT / 2)
-#define MAX_THREAD_SPAN_LARGE_CACHE 100
-//! Number of spans to transfer between thread and global cache for large spans
-#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
-
-_Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0,
-               "Small granularity must be power of two");
-_Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0,
-               "Span header size must be power of two");
-
-#if ENABLE_VALIDATE_ARGS
-//! Maximum allocation size to avoid integer overflow
-#undef MAX_ALLOC_SIZE
-#define MAX_ALLOC_SIZE (((size_t) - 1) - _memory_span_size)
-#endif
-
-#define pointer_offset(ptr, ofs) (void *)((char *)(ptr) + (ptrdiff_t)(ofs))
-#define pointer_diff(first, second)                                            \
-  (ptrdiff_t)((const char *)(first) - (const char *)(second))
-
-#define INVALID_POINTER ((void *)((uintptr_t) - 1))
-
-#define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
-#define SIZE_CLASS_HUGE ((uint32_t) - 1)
-
-////////////
-///
-/// Data types
-///
-//////
-
-//! A memory heap, per thread
-typedef struct heap_t heap_t;
-//! Span of memory pages
-typedef struct span_t span_t;
-//! Span list
-typedef struct span_list_t span_list_t;
-//! Span active data
-typedef struct span_active_t span_active_t;
-//! Size class definition
-typedef struct size_class_t size_class_t;
-//! Global cache
-typedef struct global_cache_t global_cache_t;
-
-//! Flag indicating span is the first (master) span of a split superspan
-#define SPAN_FLAG_MASTER 1U
-//! Flag indicating span is a secondary (sub) span of a split superspan
-#define SPAN_FLAG_SUBSPAN 2U
-//! Flag indicating span has blocks with increased alignment
-#define SPAN_FLAG_ALIGNED_BLOCKS 4U
-//! Flag indicating an unmapped master span
-#define SPAN_FLAG_UNMAPPED_MASTER 8U
-
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
-struct span_use_t {
-  //! Current number of spans used (actually used, not in cache)
-  atomic32_t current;
-  //! High water mark of spans used
-  atomic32_t high;
-#if ENABLE_STATISTICS
-  //! Number of spans in deferred list
-  atomic32_t spans_deferred;
-  //! Number of spans transitioned to global cache
-  atomic32_t spans_to_global;
-  //! Number of spans transitioned from global cache
-  atomic32_t spans_from_global;
-  //! Number of spans transitioned to thread cache
-  atomic32_t spans_to_cache;
-  //! Number of spans transitioned from thread cache
-  atomic32_t spans_from_cache;
-  //! Number of spans transitioned to reserved state
-  atomic32_t spans_to_reserved;
-  //! Number of spans transitioned from reserved state
-  atomic32_t spans_from_reserved;
-  //! Number of raw memory map calls
-  atomic32_t spans_map_calls;
-#endif
-};
-typedef struct span_use_t span_use_t;
-#endif
-
-#if ENABLE_STATISTICS
-struct size_class_use_t {
-  //! Current number of allocations
-  atomic32_t alloc_current;
-  //! Peak number of allocations
-  int32_t alloc_peak;
-  //! Total number of allocations
-  atomic32_t alloc_total;
-  //! Total number of frees
-  atomic32_t free_total;
-  //! Number of spans in use
-  atomic32_t spans_current;
-  //! Number of spans transitioned to cache
-  int32_t spans_peak;
-  //! Number of spans transitioned to cache
-  atomic32_t spans_to_cache;
-  //! Number of spans transitioned from cache
-  atomic32_t spans_from_cache;
-  //! Number of spans transitioned from reserved state
-  atomic32_t spans_from_reserved;
-  //! Number of spans mapped
-  atomic32_t spans_map_calls;
-  int32_t unused;
-};
-typedef struct size_class_use_t size_class_use_t;
-#endif
-
-// A span can either represent a single span of memory pages with size declared
-// by span_map_count configuration variable, or a set of spans in a continuous
-// region, a super span. Any reference to the term "span" usually refers to both
-// a single span or a super span. A super span can further be divided into
-// multiple spans (or this, super spans), where the first (super)span is the
-// master and subsequent (super)spans are subspans. The master span keeps track
-// of how many subspans that are still alive and mapped in virtual memory, and
-// once all subspans and master have been unmapped the entire superspan region
-// is released and unmapped (on Windows for example, the entire superspan range
-// has to be released in the same call to release the virtual memory range, but
-// individual subranges can be decommitted individually to reduce physical
-// memory use).
-struct span_t {
-  //! Free list
-  void *free_list;
-  //! Total block count of size class
-  uint32_t block_count;
-  //! Size class
-  uint32_t size_class;
-  //! Index of last block initialized in free list
-  uint32_t free_list_limit;
-  //! Number of used blocks remaining when in partial state
-  uint32_t used_count;
-  //! Deferred free list
-  atomicptr_t free_list_deferred;
-  //! Size of deferred free list, or list of spans when part of a cache list
-  uint32_t list_size;
-  //! Size of a block
-  uint32_t block_size;
-  //! Flags and counters
-  uint32_t flags;
-  //! Number of spans
-  uint32_t span_count;
-  //! Total span counter for master spans
-  uint32_t total_spans;
-  //! Offset from master span for subspans
-  uint32_t offset_from_master;
-  //! Remaining span counter, for master spans
-  atomic32_t remaining_spans;
-  //! Alignment offset
-  uint32_t align_offset;
-  //! Owning heap
-  heap_t *heap;
-  //! Next span
-  span_t *next;
-  //! Previous span
-  span_t *prev;
-};
-_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
-
-struct span_cache_t {
-  size_t count;
-  span_t *span[MAX_THREAD_SPAN_CACHE];
-};
-typedef struct span_cache_t span_cache_t;
-
-struct span_large_cache_t {
-  size_t count;
-  span_t *span[MAX_THREAD_SPAN_LARGE_CACHE];
-};
-typedef struct span_large_cache_t span_large_cache_t;
-
-struct heap_size_class_t {
-  //! Free list of active span
-  void *free_list;
-  //! Double linked list of partially used spans with free blocks.
-  //  Previous span pointer in head points to tail span of list.
-  span_t *partial_span;
-  //! Early level cache of fully free spans
-  span_t *cache;
-};
-typedef struct heap_size_class_t heap_size_class_t;
-
-// Control structure for a heap, either a thread heap or a first class heap if
-// enabled
-struct heap_t {
-  //! Owning thread ID
-  uintptr_t owner_thread;
-  //! Free lists for each size class
-  heap_size_class_t size_class[SIZE_CLASS_COUNT];
-#if ENABLE_THREAD_CACHE
-  //! Arrays of fully freed spans, single span
-  span_cache_t span_cache;
-#endif
-  //! List of deferred free spans (single linked list)
-  atomicptr_t span_free_deferred;
-  //! Number of full spans
-  size_t full_span_count;
-  //! Mapped but unused spans
-  span_t *span_reserve;
-  //! Master span for mapped but unused spans
-  span_t *span_reserve_master;
-  //! Number of mapped but unused spans
-  uint32_t spans_reserved;
-  //! Child count
-  atomic32_t child_count;
-  //! Next heap in id list
-  heap_t *next_heap;
-  //! Next heap in orphan list
-  heap_t *next_orphan;
-  //! Heap ID
-  int32_t id;
-  //! Finalization state flag
-  int finalize;
-  //! Master heap owning the memory pages
-  heap_t *master_heap;
-#if ENABLE_THREAD_CACHE
-  //! Arrays of fully freed spans, large spans with > 1 span count
-  span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1];
-#endif
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  //! Double linked list of fully utilized spans with free blocks for each size
-  //! class.
-  //  Previous span pointer in head points to tail span of list.
-  span_t *full_span[SIZE_CLASS_COUNT];
-  //! Double linked list of large and huge spans allocated by this heap
-  span_t *large_huge_span;
-#endif
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
-  //! Current and high water mark of spans used per span count
-  span_use_t span_use[LARGE_CLASS_COUNT];
-#endif
-#if ENABLE_STATISTICS
-  //! Allocation stats per size class
-  size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
-  //! Number of bytes transitioned thread -> global
-  atomic64_t thread_to_global;
-  //! Number of bytes transitioned global -> thread
-  atomic64_t global_to_thread;
-#endif
-};
-
-// Size class for defining a block size bucket
-struct size_class_t {
-  //! Size of blocks in this class
-  uint32_t block_size;
-  //! Number of blocks in each chunk
-  uint16_t block_count;
-  //! Class index this class is merged with
-  uint16_t class_idx;
-};
-_Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
-
-struct global_cache_t {
-  //! Cache lock
-  atomic32_t lock;
-  //! Cache count
-  uint32_t count;
-#if ENABLE_STATISTICS
-  //! Insert count
-  size_t insert_count;
-  //! Extract count
-  size_t extract_count;
-#endif
-  //! Cached spans
-  span_t *span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE];
-  //! Unlimited cache overflow
-  span_t *overflow;
-};
-
-////////////
-///
-/// Global data
-///
-//////
-
-//! Default span size (64KiB)
-#define _memory_default_span_size (64 * 1024)
-#define _memory_default_span_size_shift 16
-#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1)))
-
-//! Initialized flag
-static int _rpmalloc_initialized;
-//! Main thread ID
-static uintptr_t _rpmalloc_main_thread_id;
-//! Configuration
-static rpmalloc_config_t _memory_config;
-//! Memory page size
-static size_t _memory_page_size;
-//! Shift to divide by page size
-static size_t _memory_page_size_shift;
-//! Granularity at which memory pages are mapped by OS
-static size_t _memory_map_granularity;
-#if RPMALLOC_CONFIGURABLE
-//! Size of a span of memory pages
-static size_t _memory_span_size;
-//! Shift to divide by span size
-static size_t _memory_span_size_shift;
-//! Mask to get to start of a memory span
-static uintptr_t _memory_span_mask;
-#else
-//! Hardwired span size
-#define _memory_span_size _memory_default_span_size
-#define _memory_span_size_shift _memory_default_span_size_shift
-#define _memory_span_mask _memory_default_span_mask
-#endif
-//! Number of spans to map in each map call
-static size_t _memory_span_map_count;
-//! Number of spans to keep reserved in each heap
-static size_t _memory_heap_reserve_count;
-//! Global size classes
-static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
-//! Run-time size limit of medium blocks
-static size_t _memory_medium_size_limit;
-//! Heap ID counter
-static atomic32_t _memory_heap_id;
-//! Huge page support
-static int _memory_huge_pages;
-#if ENABLE_GLOBAL_CACHE
-//! Global span cache
-static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
-#endif
-//! Global reserved spans
-static span_t *_memory_global_reserve;
-//! Global reserved count
-static size_t _memory_global_reserve_count;
-//! Global reserved master
-static span_t *_memory_global_reserve_master;
-//! All heaps
-static heap_t *_memory_heaps[HEAP_ARRAY_SIZE];
-//! Used to restrict access to mapping memory for huge pages
-static atomic32_t _memory_global_lock;
-//! Orphaned heaps
-static heap_t *_memory_orphan_heaps;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-//! Orphaned heaps (first class heaps)
-static heap_t *_memory_first_class_orphan_heaps;
-#endif
-#if ENABLE_STATISTICS
-//! Allocations counter
-static atomic64_t _allocation_counter;
-//! Deallocations counter
-static atomic64_t _deallocation_counter;
-//! Active heap count
-static atomic32_t _memory_active_heaps;
-//! Number of currently mapped memory pages
-static atomic32_t _mapped_pages;
-//! Peak number of concurrently mapped memory pages
-static int32_t _mapped_pages_peak;
-//! Number of mapped master spans
-static atomic32_t _master_spans;
-//! Number of unmapped dangling master spans
-static atomic32_t _unmapped_master_spans;
-//! Running counter of total number of mapped memory pages since start
-static atomic32_t _mapped_total;
-//! Running counter of total number of unmapped memory pages since start
-static atomic32_t _unmapped_total;
-//! Number of currently mapped memory pages in OS calls
-static atomic32_t _mapped_pages_os;
-//! Number of currently allocated pages in huge allocations
-static atomic32_t _huge_pages_current;
-//! Peak number of currently allocated pages in huge allocations
-static int32_t _huge_pages_peak;
-#endif
-
-////////////
-///
-/// Thread local heap and ID
-///
-//////
-
-//! Current thread heap
-#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
-    defined(__TINYC__)
-static pthread_key_t _memory_thread_heap;
-#else
-#ifdef _MSC_VER
-#define _Thread_local __declspec(thread)
-#define TLS_MODEL
-#else
-#ifndef __HAIKU__
-#define TLS_MODEL __attribute__((tls_model("initial-exec")))
-#else
-#define TLS_MODEL
-#endif
-#if !defined(__clang__) && defined(__GNUC__)
-#define _Thread_local __thread
-#endif
-#endif
-static _Thread_local heap_t *_memory_thread_heap TLS_MODEL;
-#endif
-
-static inline heap_t *get_thread_heap_raw(void) {
-#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
-  return pthread_getspecific(_memory_thread_heap);
-#else
-  return _memory_thread_heap;
-#endif
-}
-
-//! Get the current thread heap
-static inline heap_t *get_thread_heap(void) {
-  heap_t *heap = get_thread_heap_raw();
-#if ENABLE_PRELOAD
-  if (EXPECTED(heap != 0))
-    return heap;
-  rpmalloc_initialize();
-  return get_thread_heap_raw();
-#else
-  return heap;
-#endif
-}
-
-//! Fast thread ID
-static inline uintptr_t get_thread_id(void) {
-#if defined(_WIN32)
-  return (uintptr_t)((void *)NtCurrentTeb());
-#elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
-  uintptr_t tid;
-#if defined(__i386__)
-  __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
-#elif defined(__x86_64__)
-#if defined(__MACH__)
-  __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
-#else
-  __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
-#endif
-#elif defined(__arm__)
-  __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
-#elif defined(__aarch64__)
-#if defined(__MACH__)
-  // tpidr_el0 likely unused, always return 0 on iOS
-  __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
-#else
-  __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
-#endif
-#else
-#error This platform needs implementation of get_thread_id()
-#endif
-  return tid;
-#else
-#error This platform needs implementation of get_thread_id()
-#endif
-}
-
-//! Set the current thread heap
-static void set_thread_heap(heap_t *heap) {
-#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
-    defined(__TINYC__)
-  pthread_setspecific(_memory_thread_heap, heap);
-#else
-  _memory_thread_heap = heap;
-#endif
-  if (heap)
-    heap->owner_thread = get_thread_id();
-}
-
-//! Set main thread ID
-extern void rpmalloc_set_main_thread(void);
-
-void rpmalloc_set_main_thread(void) {
-  _rpmalloc_main_thread_id = get_thread_id();
-}
-
-static void _rpmalloc_spin(void) {
-#if defined(_MSC_VER)
-#if defined(_M_ARM64)
-  __yield();
-#else
-  _mm_pause();
-#endif
-#elif defined(__x86_64__) || defined(__i386__)
-  __asm__ volatile("pause" ::: "memory");
-#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
-  __asm__ volatile("yield" ::: "memory");
-#elif defined(__powerpc__) || defined(__powerpc64__)
-  // No idea if ever been compiled in such archs but ... as precaution
-  __asm__ volatile("or 27,27,27");
-#elif defined(__sparc__)
-  __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0");
-#else
-  struct timespec ts = {0};
-  nanosleep(&ts, 0);
-#endif
-}
-
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-static void NTAPI _rpmalloc_thread_destructor(void *value) {
-#if ENABLE_OVERRIDE
-  // If this is called on main thread it means rpmalloc_finalize
-  // has not been called and shutdown is forced (through _exit) or unclean
-  if (get_thread_id() == _rpmalloc_main_thread_id)
-    return;
-#endif
-  if (value)
-    rpmalloc_thread_finalize(1);
-}
-#endif
-
-////////////
-///
-/// Low level memory map/unmap
-///
-//////
-
-static void _rpmalloc_set_name(void *address, size_t size) {
-#if defined(__linux__) || defined(__ANDROID__)
-  const char *name = _memory_huge_pages ? _memory_config.huge_page_name
-                                        : _memory_config.page_name;
-  if (address == MAP_FAILED || !name)
-    return;
-  // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails
-  // (e.g. invalid name) it is a no-op basically.
-  (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size,
-              (uintptr_t)name);
-#else
-  (void)sizeof(size);
-  (void)sizeof(address);
-#endif
-}
-
-//! Map more virtual memory
-//  size is number of bytes to map
-//  offset receives the offset in bytes from start of mapped region
-//  returns address to start of mapped region to use
-static void *_rpmalloc_mmap(size_t size, size_t *offset) {
-  rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size");
-  rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
-  void *address = _memory_config.memory_map(size, offset);
-  if (EXPECTED(address != 0)) {
-    _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift),
-                            _mapped_pages_peak);
-    _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
-  }
-  return address;
-}
-
-//! Unmap virtual memory
-//  address is the memory address to unmap, as returned from _memory_map
-//  size is the number of bytes to unmap, which might be less than full region
-//  for a partial unmap offset is the offset in bytes to the actual mapped
-//  region, as set by _memory_map release is set to 0 for partial unmap, or size
-//  of entire range for a full unmap
-static void _rpmalloc_unmap(void *address, size_t size, size_t offset,
-                            size_t release) {
-  rpmalloc_assert(!release || (release >= size), "Invalid unmap size");
-  rpmalloc_assert(!release || (release >= _memory_page_size),
-                  "Invalid unmap size");
-  if (release) {
-    rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size");
-    _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
-    _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
-  }
-  _memory_config.memory_unmap(address, size, offset, release);
-}
-
-//! Default implementation to map new pages to virtual memory
-static void *_rpmalloc_mmap_os(size_t size, size_t *offset) {
-  // Either size is a heap (a single page) or a (multiple) span - we only need
-  // to align spans, and only if larger than map granularity
-  size_t padding = ((size >= _memory_span_size) &&
-                    (_memory_span_size > _memory_map_granularity))
-                       ? _memory_span_size
-                       : 0;
-  rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
-#if PLATFORM_WINDOWS
-  // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not
-  // allocated unless/until the virtual addresses are actually accessed"
-  void *ptr = VirtualAlloc(0, size + padding,
-                           (_memory_huge_pages ? MEM_LARGE_PAGES : 0) |
-                               MEM_RESERVE | MEM_COMMIT,
-                           PAGE_READWRITE);
-  if (!ptr) {
-    if (_memory_config.map_fail_callback) {
-      if (_memory_config.map_fail_callback(size + padding))
-        return _rpmalloc_mmap_os(size, offset);
-    } else {
-      rpmalloc_assert(ptr, "Failed to map virtual memory block");
-    }
-    return 0;
-  }
-#else
-  int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
-#if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
-  int fd = (int)VM_MAKE_TAG(240U);
-  if (_memory_huge_pages)
-    fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
-  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
-#elif defined(MAP_HUGETLB)
-  void *ptr = mmap(0, size + padding,
-                   PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE),
-                   (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
-#if defined(MADV_HUGEPAGE)
-  // In some configurations, huge pages allocations might fail thus
-  // we fallback to normal allocations and promote the region as transparent
-  // huge page
-  if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) {
-    ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
-    if (ptr && ptr != MAP_FAILED) {
-      int prm = madvise(ptr, size + padding, MADV_HUGEPAGE);
-      (void)prm;
-      rpmalloc_assert((prm == 0), "Failed to promote the page to THP");
-    }
-  }
-#endif
-  _rpmalloc_set_name(ptr, size + padding);
-#elif defined(MAP_ALIGNED)
-  const size_t align =
-      (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1));
-  void *ptr =
-      mmap(0, size + padding, PROT_READ | PROT_WRITE,
-           (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0);
-#elif defined(MAP_ALIGN)
-  caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0);
-  void *ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE,
-                   (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
-#else
-  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
-#endif
-  if ((ptr == MAP_FAILED) || !ptr) {
-    if (_memory_config.map_fail_callback) {
-      if (_memory_config.map_fail_callback(size + padding))
-        return _rpmalloc_mmap_os(size, offset);
-    } else if (errno != ENOMEM) {
-      rpmalloc_assert((ptr != MAP_FAILED) && ptr,
-                      "Failed to map virtual memory block");
-    }
-    return 0;
-  }
-#endif
-  _rpmalloc_stat_add(&_mapped_pages_os,
-                     (int32_t)((size + padding) >> _memory_page_size_shift));
-  if (padding) {
-    size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
-    rpmalloc_assert(final_padding <= _memory_span_size,
-                    "Internal failure in padding");
-    rpmalloc_assert(final_padding <= padding, "Internal failure in padding");
-    rpmalloc_assert(!(final_padding % 8), "Internal failure in padding");
-    ptr = pointer_offset(ptr, final_padding);
-    *offset = final_padding >> 3;
-  }
-  rpmalloc_assert((size < _memory_span_size) ||
-                      !((uintptr_t)ptr & ~_memory_span_mask),
-                  "Internal failure in padding");
-  return ptr;
-}
-
-//! Default implementation to unmap pages from virtual memory
-static void _rpmalloc_unmap_os(void *address, size_t size, size_t offset,
-                               size_t release) {
-  rpmalloc_assert(release || (offset == 0), "Invalid unmap size");
-  rpmalloc_assert(!release || (release >= _memory_page_size),
-                  "Invalid unmap size");
-  rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size");
-  if (release && offset) {
-    offset <<= 3;
-    address = pointer_offset(address, -(int32_t)offset);
-    if ((release >= _memory_span_size) &&
-        (_memory_span_size > _memory_map_granularity)) {
-      // Padding is always one span size
-      release += _memory_span_size;
-    }
-  }
-#if !DISABLE_UNMAP
-#if PLATFORM_WINDOWS
-  if (!VirtualFree(address, release ? 0 : size,
-                   release ? MEM_RELEASE : MEM_DECOMMIT)) {
-    rpmalloc_assert(0, "Failed to unmap virtual memory block");
-  }
-#else
-  if (release) {
-    if (munmap(address, release)) {
-      rpmalloc_assert(0, "Failed to unmap virtual memory block");
-    }
-  } else {
-#if defined(MADV_FREE_REUSABLE)
-    int ret;
-    while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 &&
-           (errno == EAGAIN))
-      errno = 0;
-    if ((ret == -1) && (errno != 0)) {
-#elif defined(MADV_DONTNEED)
-    if (madvise(address, size, MADV_DONTNEED)) {
-#elif defined(MADV_PAGEOUT)
-    if (madvise(address, size, MADV_PAGEOUT)) {
-#elif defined(MADV_FREE)
-    if (madvise(address, size, MADV_FREE)) {
-#else
-    if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
-#endif
-      rpmalloc_assert(0, "Failed to madvise virtual memory block as free");
-    }
-  }
-#endif
-#endif
-  if (release)
-    _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
-}
-
-static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
-                                                         span_t *subspan,
-                                                         size_t span_count);
-
-//! Use global reserved spans to fulfill a memory map request (reserve size must
-//! be checked by caller)
-static span_t *_rpmalloc_global_get_reserved_spans(size_t span_count) {
-  span_t *span = _memory_global_reserve;
-  _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master,
-                                               span, span_count);
-  _memory_global_reserve_count -= span_count;
-  if (_memory_global_reserve_count)
-    _memory_global_reserve =
-        (span_t *)pointer_offset(span, span_count << _memory_span_size_shift);
-  else
-    _memory_global_reserve = 0;
-  return span;
-}
-
-//! Store the given spans as global reserve (must only be called from within new
-//! heap allocation, not thread safe)
-static void _rpmalloc_global_set_reserved_spans(span_t *master, span_t *reserve,
-                                                size_t reserve_span_count) {
-  _memory_global_reserve_master = master;
-  _memory_global_reserve_count = reserve_span_count;
-  _memory_global_reserve = reserve;
-}
-
-////////////
-///
-/// Span linked list management
-///
-//////
-
-//! Add a span to double linked list at the head
-static void _rpmalloc_span_double_link_list_add(span_t **head, span_t *span) {
-  if (*head)
-    (*head)->prev = span;
-  span->next = *head;
-  *head = span;
-}
-
-//! Pop head span from double linked list
-static void _rpmalloc_span_double_link_list_pop_head(span_t **head,
-                                                     span_t *span) {
-  rpmalloc_assert(*head == span, "Linked list corrupted");
-  span = *head;
-  *head = span->next;
-}
-
-//! Remove a span from double linked list
-static void _rpmalloc_span_double_link_list_remove(span_t **head,
-                                                   span_t *span) {
-  rpmalloc_assert(*head, "Linked list corrupted");
-  if (*head == span) {
-    *head = span->next;
-  } else {
-    span_t *next_span = span->next;
-    span_t *prev_span = span->prev;
-    prev_span->next = next_span;
-    if (EXPECTED(next_span != 0))
-      next_span->prev = prev_span;
-  }
-}
-
-////////////
-///
-/// Span control
-///
-//////
-
-static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
-
-static void _rpmalloc_heap_finalize(heap_t *heap);
-
-static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
-                                              span_t *reserve,
-                                              size_t reserve_span_count);
-
-//! Declare the span to be a subspan and store distance from master span and
-//! span count
-static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
-                                                         span_t *subspan,
-                                                         size_t span_count) {
-  rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER),
-                  "Span master pointer and/or flag mismatch");
-  if (subspan != master) {
-    subspan->flags = SPAN_FLAG_SUBSPAN;
-    subspan->offset_from_master =
-        (uint32_t)((uintptr_t)pointer_diff(subspan, master) >>
-                   _memory_span_size_shift);
-    subspan->align_offset = 0;
-  }
-  subspan->span_count = (uint32_t)span_count;
-}
-
-//! Use reserved spans to fulfill a memory map request (reserve size must be
-//! checked by caller)
-static span_t *_rpmalloc_span_map_from_reserve(heap_t *heap,
-                                               size_t span_count) {
-  // Update the heap span reserve
-  span_t *span = heap->span_reserve;
-  heap->span_reserve =
-      (span_t *)pointer_offset(span, span_count * _memory_span_size);
-  heap->spans_reserved -= (uint32_t)span_count;
-
-  _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span,
-                                               span_count);
-  if (span_count <= LARGE_CLASS_COUNT)
-    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
-
-  return span;
-}
-
-//! Get the aligned number of spans to map in based on wanted count, configured
-//! mapping granularity and the page size
-static size_t _rpmalloc_span_align_count(size_t span_count) {
-  size_t request_count = (span_count > _memory_span_map_count)
-                             ? span_count
-                             : _memory_span_map_count;
-  if ((_memory_page_size > _memory_span_size) &&
-      ((request_count * _memory_span_size) % _memory_page_size))
-    request_count +=
-        _memory_span_map_count - (request_count % _memory_span_map_count);
-  return request_count;
-}
-
-//! Setup a newly mapped span
-static void _rpmalloc_span_initialize(span_t *span, size_t total_span_count,
-                                      size_t span_count, size_t align_offset) {
-  span->total_spans = (uint32_t)total_span_count;
-  span->span_count = (uint32_t)span_count;
-  span->align_offset = (uint32_t)align_offset;
-  span->flags = SPAN_FLAG_MASTER;
-  atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
-}
-
-static void _rpmalloc_span_unmap(span_t *span);
-
-//! Map an aligned set of spans, taking configured mapping granularity and the
-//! page size into account
-static span_t *_rpmalloc_span_map_aligned_count(heap_t *heap,
-                                                size_t span_count) {
-  // If we already have some, but not enough, reserved spans, release those to
-  // heap cache and map a new full set of spans. Otherwise we would waste memory
-  // if page size > span size (huge pages)
-  size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
-  size_t align_offset = 0;
-  span_t *span = (span_t *)_rpmalloc_mmap(
-      aligned_span_count * _memory_span_size, &align_offset);
-  if (!span)
-    return 0;
-  _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
-  _rpmalloc_stat_inc(&_master_spans);
-  if (span_count <= LARGE_CLASS_COUNT)
-    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
-  if (aligned_span_count > span_count) {
-    span_t *reserved_spans =
-        (span_t *)pointer_offset(span, span_count * _memory_span_size);
-    size_t reserved_count = aligned_span_count - span_count;
-    if (heap->spans_reserved) {
-      _rpmalloc_span_mark_as_subspan_unless_master(
-          heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
-      _rpmalloc_heap_cache_insert(heap, heap->span_reserve);
-    }
-    if (reserved_count > _memory_heap_reserve_count) {
-      // If huge pages or eager spam map count, the global reserve spin lock is
-      // held by caller, _rpmalloc_span_map
-      rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1,
-                      "Global spin lock not held as expected");
-      size_t remain_count = reserved_count - _memory_heap_reserve_count;
-      reserved_count = _memory_heap_reserve_count;
-      span_t *remain_span = (span_t *)pointer_offset(
-          reserved_spans, reserved_count * _memory_span_size);
-      if (_memory_global_reserve) {
-        _rpmalloc_span_mark_as_subspan_unless_master(
-            _memory_global_reserve_master, _memory_global_reserve,
-            _memory_global_reserve_count);
-        _rpmalloc_span_unmap(_memory_global_reserve);
-      }
-      _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count);
-    }
-    _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans,
-                                      reserved_count);
-  }
-  return span;
-}
-
-//! Map in memory pages for the given number of spans (or use previously
-//! reserved pages)
-static span_t *_rpmalloc_span_map(heap_t *heap, size_t span_count) {
-  if (span_count <= heap->spans_reserved)
-    return _rpmalloc_span_map_from_reserve(heap, span_count);
-  span_t *span = 0;
-  int use_global_reserve =
-      (_memory_page_size > _memory_span_size) ||
-      (_memory_span_map_count > _memory_heap_reserve_count);
-  if (use_global_reserve) {
-    // If huge pages, make sure only one thread maps more memory to avoid bloat
-    while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
-      _rpmalloc_spin();
-    if (_memory_global_reserve_count >= span_count) {
-      size_t reserve_count =
-          (!heap->spans_reserved ? _memory_heap_reserve_count : span_count);
-      if (_memory_global_reserve_count < reserve_count)
-        reserve_count = _memory_global_reserve_count;
-      span = _rpmalloc_global_get_reserved_spans(reserve_count);
-      if (span) {
-        if (reserve_count > span_count) {
-          span_t *reserved_span = (span_t *)pointer_offset(
-              span, span_count << _memory_span_size_shift);
-          _rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master,
-                                            reserved_span,
-                                            reserve_count - span_count);
-        }
-        // Already marked as subspan in _rpmalloc_global_get_reserved_spans
-        span->span_count = (uint32_t)span_count;
-      }
-    }
-  }
-  if (!span)
-    span = _rpmalloc_span_map_aligned_count(heap, span_count);
-  if (use_global_reserve)
-    atomic_store32_release(&_memory_global_lock, 0);
-  return span;
-}
-
-//! Unmap memory pages for the given number of spans (or mark as unused if no
-//! partial unmappings)
-static void _rpmalloc_span_unmap(span_t *span) {
-  rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
-                      (span->flags & SPAN_FLAG_SUBSPAN),
-                  "Span flag corrupted");
-  rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
-                      !(span->flags & SPAN_FLAG_SUBSPAN),
-                  "Span flag corrupted");
-
-  int is_master = !!(span->flags & SPAN_FLAG_MASTER);
-  span_t *master =
-      is_master ? span
-                : ((span_t *)pointer_offset(
-                      span, -(intptr_t)((uintptr_t)span->offset_from_master *
-                                        _memory_span_size)));
-  rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN),
-                  "Span flag corrupted");
-  rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
-
-  size_t span_count = span->span_count;
-  if (!is_master) {
-    // Directly unmap subspans (unless huge pages, in which case we defer and
-    // unmap entire page range with master)
-    rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted");
-    if (_memory_span_size >= _memory_page_size)
-      _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
-  } else {
-    // Special double flag to denote an unmapped master
-    // It must be kept in memory since span header must be used
-    span->flags |=
-        SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER;
-    _rpmalloc_stat_add(&_unmapped_master_spans, 1);
-  }
-
-  if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
-    // Everything unmapped, unmap the master span with release flag to unmap the
-    // entire range of the super span
-    rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) &&
-                        !!(master->flags & SPAN_FLAG_SUBSPAN),
-                    "Span flag corrupted");
-    size_t unmap_count = master->span_count;
-    if (_memory_span_size < _memory_page_size)
-      unmap_count = master->total_spans;
-    _rpmalloc_stat_sub(&_master_spans, 1);
-    _rpmalloc_stat_sub(&_unmapped_master_spans, 1);
-    _rpmalloc_unmap(master, unmap_count * _memory_span_size,
-                    master->align_offset,
-                    (size_t)master->total_spans * _memory_span_size);
-  }
-}
-
-//! Move the span (used for small or medium allocations) to the heap thread
-//! cache
-static void _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span) {
-  rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted");
-  rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT,
-                  "Invalid span size class");
-  rpmalloc_assert(span->span_count == 1, "Invalid span count");
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
-  atomic_decr32(&heap->span_use[0].current);
-#endif
-  _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
-  if (!heap->finalize) {
-    _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
-    _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
-    if (heap->size_class[span->size_class].cache)
-      _rpmalloc_heap_cache_insert(heap,
-                                  heap->size_class[span->size_class].cache);
-    heap->size_class[span->size_class].cache = span;
-  } else {
-    _rpmalloc_span_unmap(span);
-  }
-}
-
-//! Initialize a (partial) free list up to next system memory page, while
-//! reserving the first block as allocated, returning number of blocks in list
-static uint32_t free_list_partial_init(void **list, void **first_block,
-                                       void *page_start, void *block_start,
-                                       uint32_t block_count,
-                                       uint32_t block_size) {
-  rpmalloc_assert(block_count, "Internal failure");
-  *first_block = block_start;
-  if (block_count > 1) {
-    void *free_block = pointer_offset(block_start, block_size);
-    void *block_end =
-        pointer_offset(block_start, (size_t)block_size * block_count);
-    // If block size is less than half a memory page, bound init to next memory
-    // page boundary
-    if (block_size < (_memory_page_size >> 1)) {
-      void *page_end = pointer_offset(page_start, _memory_page_size);
-      if (page_end < block_end)
-        block_end = page_end;
-    }
-    *list = free_block;
-    block_count = 2;
-    void *next_block = pointer_offset(free_block, block_size);
-    while (next_block < block_end) {
-      *((void **)free_block) = next_block;
-      free_block = next_block;
-      ++block_count;
-      next_block = pointer_offset(next_block, block_size);
-    }
-    *((void **)free_block) = 0;
-  } else {
-    *list = 0;
-  }
-  return block_count;
-}
-
-//! Initialize an unused span (from cache or mapped) to be new active span,
-//! putting the initial free list in heap class free list
-static void *_rpmalloc_span_initialize_new(heap_t *heap,
-                                           heap_size_class_t *heap_size_class,
-                                           span_t *span, uint32_t class_idx) {
-  rpmalloc_assert(span->span_count == 1, "Internal failure");
-  size_class_t *size_class = _memory_size_class + class_idx;
-  span->size_class = class_idx;
-  span->heap = heap;
-  span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
-  span->block_size = size_class->block_size;
-  span->block_count = size_class->block_count;
-  span->free_list = 0;
-  span->list_size = 0;
-  atomic_store_ptr_release(&span->free_list_deferred, 0);
-
-  // Setup free list. Only initialize one system page worth of free blocks in
-  // list
-  void *block;
-  span->free_list_limit =
-      free_list_partial_init(&heap_size_class->free_list, &block, span,
-                             pointer_offset(span, SPAN_HEADER_SIZE),
-                             size_class->block_count, size_class->block_size);
-  // Link span as partial if there remains blocks to be initialized as free
-  // list, or full if fully initialized
-  if (span->free_list_limit < span->block_count) {
-    _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span);
-    span->used_count = span->free_list_limit;
-  } else {
-#if RPMALLOC_FIRST_CLASS_HEAPS
-    _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
-#endif
-    ++heap->full_span_count;
-    span->used_count = span->block_count;
-  }
-  return block;
-}
-
-static void _rpmalloc_span_extract_free_list_deferred(span_t *span) {
-  // We need acquire semantics on the CAS operation since we are interested in
-  // the list size Refer to _rpmalloc_deallocate_defer_small_or_medium for
-  // further comments on this dependency
-  do {
-    span->free_list =
-        atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER);
-  } while (span->free_list == INVALID_POINTER);
-  span->used_count -= span->list_size;
-  span->list_size = 0;
-  atomic_store_ptr_release(&span->free_list_deferred, 0);
-}
-
-static int _rpmalloc_span_is_fully_utilized(span_t *span) {
-  rpmalloc_assert(span->free_list_limit <= span->block_count,
-                  "Span free list corrupted");
-  return !span->free_list && (span->free_list_limit >= span->block_count);
-}
-
-static int _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span,
-                                   span_t **list_head) {
-  void *free_list = heap->size_class[iclass].free_list;
-  span_t *class_span = (span_t *)((uintptr_t)free_list & _memory_span_mask);
-  if (span == class_span) {
-    // Adopt the heap class free list back into the span free list
-    void *block = span->free_list;
-    void *last_block = 0;
-    while (block) {
-      last_block = block;
-      block = *((void **)block);
-    }
-    uint32_t free_count = 0;
-    block = free_list;
-    while (block) {
-      ++free_count;
-      block = *((void **)block);
-    }
-    if (last_block) {
-      *((void **)last_block) = free_list;
-    } else {
-      span->free_list = free_list;
-    }
-    heap->size_class[iclass].free_list = 0;
-    span->used_count -= free_count;
-  }
-  // If this assert triggers you have memory leaks
-  rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected");
-  if (span->list_size == span->used_count) {
-    _rpmalloc_stat_dec(&heap->span_use[0].current);
-    _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
-    // This function only used for spans in double linked lists
-    if (list_head)
-      _rpmalloc_span_double_link_list_remove(list_head, span);
-    _rpmalloc_span_unmap(span);
-    return 1;
-  }
-  return 0;
-}
-
-////////////
-///
-/// Global cache
-///
-//////
-
-#if ENABLE_GLOBAL_CACHE
-
-//! Finalize a global cache
-static void _rpmalloc_global_cache_finalize(global_cache_t *cache) {
-  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
-    _rpmalloc_spin();
-
-  for (size_t ispan = 0; ispan < cache->count; ++ispan)
-    _rpmalloc_span_unmap(cache->span[ispan]);
-  cache->count = 0;
-
-  while (cache->overflow) {
-    span_t *span = cache->overflow;
-    cache->overflow = span->next;
-    _rpmalloc_span_unmap(span);
-  }
-
-  atomic_store32_release(&cache->lock, 0);
-}
-
-static void _rpmalloc_global_cache_insert_spans(span_t **span,
-                                                size_t span_count,
-                                                size_t count) {
-  const size_t cache_limit =
-      (span_count == 1) ? GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE
-                        : GLOBAL_CACHE_MULTIPLIER *
-                              (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
-
-  global_cache_t *cache = &_memory_span_cache[span_count - 1];
-
-  size_t insert_count = count;
-  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
-    _rpmalloc_spin();
-
-#if ENABLE_STATISTICS
-  cache->insert_count += count;
-#endif
-  if ((cache->count + insert_count) > cache_limit)
-    insert_count = cache_limit - cache->count;
-
-  memcpy(cache->span + cache->count, span, sizeof(span_t *) * insert_count);
-  cache->count += (uint32_t)insert_count;
-
-#if ENABLE_UNLIMITED_CACHE
-  while (insert_count < count) {
-#else
-  // Enable unlimited cache if huge pages, or we will leak since it is unlikely
-  // that an entire huge page will be unmapped, and we're unable to partially
-  // decommit a huge page
-  while ((_memory_page_size > _memory_span_size) && (insert_count < count)) {
-#endif
-    span_t *current_span = span[insert_count++];
-    current_span->next = cache->overflow;
-    cache->overflow = current_span;
-  }
-  atomic_store32_release(&cache->lock, 0);
-
-  span_t *keep = 0;
-  for (size_t ispan = insert_count; ispan < count; ++ispan) {
-    span_t *current_span = span[ispan];
-    // Keep master spans that has remaining subspans to avoid dangling them
-    if ((current_span->flags & SPAN_FLAG_MASTER) &&
-        (atomic_load32(&current_span->remaining_spans) >
-         (int32_t)current_span->span_count)) {
-      current_span->next = keep;
-      keep = current_span;
-    } else {
-      _rpmalloc_span_unmap(current_span);
-    }
-  }
-
-  if (keep) {
-    while (!atomic_cas32_acquire(&cache->lock, 1, 0))
-      _rpmalloc_spin();
-
-    size_t islot = 0;
-    while (keep) {
-      for (; islot < cache->count; ++islot) {
-        span_t *current_span = cache->span[islot];
-        if (!(current_span->flags & SPAN_FLAG_MASTER) ||
-            ((current_span->flags & SPAN_FLAG_MASTER) &&
-             (atomic_load32(&current_span->remaining_spans) <=
-              (int32_t)current_span->span_count))) {
-          _rpmalloc_span_unmap(current_span);
-          cache->span[islot] = keep;
-          break;
-        }
-      }
-      if (islot == cache->count)
-        break;
-      keep = keep->next;
-    }
-
-    if (keep) {
-      span_t *tail = keep;
-      while (tail->next)
-        tail = tail->next;
-      tail->next = cache->overflow;
-      cache->overflow = keep;
-    }
-
-    atomic_store32_release(&cache->lock, 0);
-  }
-}
-
-static size_t _rpmalloc_global_cache_extract_spans(span_t **span,
-                                                   size_t span_count,
-                                                   size_t count) {
-  global_cache_t *cache = &_memory_span_cache[span_count - 1];
-
-  size_t extract_count = 0;
-  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
-    _rpmalloc_spin();
-
-#if ENABLE_STATISTICS
-  cache->extract_count += count;
-#endif
-  size_t want = count - extract_count;
-  if (want > cache->count)
-    want = cache->count;
-
-  memcpy(span + extract_count, cache->span + (cache->count - want),
-         sizeof(span_t *) * want);
-  cache->count -= (uint32_t)want;
-  extract_count += want;
-
-  while ((extract_count < count) && cache->overflow) {
-    span_t *current_span = cache->overflow;
-    span[extract_count++] = current_span;
-    cache->overflow = current_span->next;
-  }
-
-#if ENABLE_ASSERTS
-  for (size_t ispan = 0; ispan < extract_count; ++ispan) {
-    rpmalloc_assert(span[ispan]->span_count == span_count,
-                    "Global cache span count mismatch");
-  }
-#endif
-
-  atomic_store32_release(&cache->lock, 0);
-
-  return extract_count;
-}
-
-#endif
-
-////////////
-///
-/// Heap control
-///
-//////
-
-static void _rpmalloc_deallocate_huge(span_t *);
-
-//! Store the given spans as reserve in the given heap
-static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
-                                              span_t *reserve,
-                                              size_t reserve_span_count) {
-  heap->span_reserve_master = master;
-  heap->span_reserve = reserve;
-  heap->spans_reserved = (uint32_t)reserve_span_count;
-}
-
-//! Adopt the deferred span cache list, optionally extracting the first single
-//! span for immediate re-use
-static void _rpmalloc_heap_cache_adopt_deferred(heap_t *heap,
-                                                span_t **single_span) {
-  span_t *span = (span_t *)((void *)atomic_exchange_ptr_acquire(
-      &heap->span_free_deferred, 0));
-  while (span) {
-    span_t *next_span = (span_t *)span->free_list;
-    rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted");
-    if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
-      rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
-      --heap->full_span_count;
-      _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred);
-#if RPMALLOC_FIRST_CLASS_HEAPS
-      _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
-                                             span);
-#endif
-      _rpmalloc_stat_dec(&heap->span_use[0].current);
-      _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
-      if (single_span && !*single_span)
-        *single_span = span;
-      else
-        _rpmalloc_heap_cache_insert(heap, span);
-    } else {
-      if (span->size_class == SIZE_CLASS_HUGE) {
-        _rpmalloc_deallocate_huge(span);
-      } else {
-        rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE,
-                        "Span size class invalid");
-        rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
-        --heap->full_span_count;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-        _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
-#endif
-        uint32_t idx = span->span_count - 1;
-        _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred);
-        _rpmalloc_stat_dec(&heap->span_use[idx].current);
-        if (!idx && single_span && !*single_span)
-          *single_span = span;
-        else
-          _rpmalloc_heap_cache_insert(heap, span);
-      }
-    }
-    span = next_span;
-  }
-}
-
-static void _rpmalloc_heap_unmap(heap_t *heap) {
-  if (!heap->master_heap) {
-    if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) {
-      span_t *span = (span_t *)((uintptr_t)heap & _memory_span_mask);
-      _rpmalloc_span_unmap(span);
-    }
-  } else {
-    if (atomic_decr32(&heap->master_heap->child_count) == 0) {
-      _rpmalloc_heap_unmap(heap->master_heap);
-    }
-  }
-}
-
-static void _rpmalloc_heap_global_finalize(heap_t *heap) {
-  if (heap->finalize++ > 1) {
-    --heap->finalize;
-    return;
-  }
-
-  _rpmalloc_heap_finalize(heap);
-
-#if ENABLE_THREAD_CACHE
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    span_cache_t *span_cache;
-    if (!iclass)
-      span_cache = &heap->span_cache;
-    else
-      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
-    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
-      _rpmalloc_span_unmap(span_cache->span[ispan]);
-    span_cache->count = 0;
-  }
-#endif
-
-  if (heap->full_span_count) {
-    --heap->finalize;
-    return;
-  }
-
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    if (heap->size_class[iclass].free_list ||
-        heap->size_class[iclass].partial_span) {
-      --heap->finalize;
-      return;
-    }
-  }
-  // Heap is now completely free, unmap and remove from heap list
-  size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
-  heap_t *list_heap = _memory_heaps[list_idx];
-  if (list_heap == heap) {
-    _memory_heaps[list_idx] = heap->next_heap;
-  } else {
-    while (list_heap->next_heap != heap)
-      list_heap = list_heap->next_heap;
-    list_heap->next_heap = heap->next_heap;
-  }
-
-  _rpmalloc_heap_unmap(heap);
-}
-
-//! Insert a single span into thread heap cache, releasing to global cache if
-//! overflow
-static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
-  if (UNEXPECTED(heap->finalize != 0)) {
-    _rpmalloc_span_unmap(span);
-    _rpmalloc_heap_global_finalize(heap);
-    return;
-  }
-#if ENABLE_THREAD_CACHE
-  size_t span_count = span->span_count;
-  _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache);
-  if (span_count == 1) {
-    span_cache_t *span_cache = &heap->span_cache;
-    span_cache->span[span_cache->count++] = span;
-    if (span_cache->count == MAX_THREAD_SPAN_CACHE) {
-      const size_t remain_count =
-          MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER;
-#if ENABLE_GLOBAL_CACHE
-      _rpmalloc_stat_add64(&heap->thread_to_global,
-                           THREAD_SPAN_CACHE_TRANSFER * _memory_span_size);
-      _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
-                         THREAD_SPAN_CACHE_TRANSFER);
-      _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
-                                          span_count,
-                                          THREAD_SPAN_CACHE_TRANSFER);
-#else
-      for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan)
-        _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
-#endif
-      span_cache->count = remain_count;
-    }
-  } else {
-    size_t cache_idx = span_count - 2;
-    span_large_cache_t *span_cache = heap->span_large_cache + cache_idx;
-    span_cache->span[span_cache->count++] = span;
-    const size_t cache_limit =
-        (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
-    if (span_cache->count == cache_limit) {
-      const size_t transfer_limit = 2 + (cache_limit >> 2);
-      const size_t transfer_count =
-          (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit
-               ? THREAD_SPAN_LARGE_CACHE_TRANSFER
-               : transfer_limit);
-      const size_t remain_count = cache_limit - transfer_count;
-#if ENABLE_GLOBAL_CACHE
-      _rpmalloc_stat_add64(&heap->thread_to_global,
-                           transfer_count * span_count * _memory_span_size);
-      _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
-                         transfer_count);
-      _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
-                                          span_count, transfer_count);
-#else
-      for (size_t ispan = 0; ispan < transfer_count; ++ispan)
-        _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
-#endif
-      span_cache->count = remain_count;
-    }
-  }
-#else
-  (void)sizeof(heap);
-  _rpmalloc_span_unmap(span);
-#endif
-}
-
-//! Extract the given number of spans from the different cache levels
-static span_t *_rpmalloc_heap_thread_cache_extract(heap_t *heap,
-                                                   size_t span_count) {
-  span_t *span = 0;
-#if ENABLE_THREAD_CACHE
-  span_cache_t *span_cache;
-  if (span_count == 1)
-    span_cache = &heap->span_cache;
-  else
-    span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
-  if (span_cache->count) {
-    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache);
-    return span_cache->span[--span_cache->count];
-  }
-#endif
-  return span;
-}
-
-static span_t *_rpmalloc_heap_thread_cache_deferred_extract(heap_t *heap,
-                                                            size_t span_count) {
-  span_t *span = 0;
-  if (span_count == 1) {
-    _rpmalloc_heap_cache_adopt_deferred(heap, &span);
-  } else {
-    _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-    span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
-  }
-  return span;
-}
-
-static span_t *_rpmalloc_heap_reserved_extract(heap_t *heap,
-                                               size_t span_count) {
-  if (heap->spans_reserved >= span_count)
-    return _rpmalloc_span_map(heap, span_count);
-  return 0;
-}
-
-//! Extract a span from the global cache
-static span_t *_rpmalloc_heap_global_cache_extract(heap_t *heap,
-                                                   size_t span_count) {
-#if ENABLE_GLOBAL_CACHE
-#if ENABLE_THREAD_CACHE
-  span_cache_t *span_cache;
-  size_t wanted_count;
-  if (span_count == 1) {
-    span_cache = &heap->span_cache;
-    wanted_count = THREAD_SPAN_CACHE_TRANSFER;
-  } else {
-    span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
-    wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER;
-  }
-  span_cache->count = _rpmalloc_global_cache_extract_spans(
-      span_cache->span, span_count, wanted_count);
-  if (span_cache->count) {
-    _rpmalloc_stat_add64(&heap->global_to_thread,
-                         span_count * span_cache->count * _memory_span_size);
-    _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
-                       span_cache->count);
-    return span_cache->span[--span_cache->count];
-  }
-#else
-  span_t *span = 0;
-  size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1);
-  if (count) {
-    _rpmalloc_stat_add64(&heap->global_to_thread,
-                         span_count * count * _memory_span_size);
-    _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
-                       count);
-    return span;
-  }
-#endif
-#endif
-  (void)sizeof(heap);
-  (void)sizeof(span_count);
-  return 0;
-}
-
-static void _rpmalloc_inc_span_statistics(heap_t *heap, size_t span_count,
-                                          uint32_t class_idx) {
-  (void)sizeof(heap);
-  (void)sizeof(span_count);
-  (void)sizeof(class_idx);
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
-  uint32_t idx = (uint32_t)span_count - 1;
-  uint32_t current_count =
-      (uint32_t)atomic_incr32(&heap->span_use[idx].current);
-  if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
-    atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
-  _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1,
-                          heap->size_class_use[class_idx].spans_peak);
-#endif
-}
-
-//! Get a span from one of the cache levels (thread cache, reserved, global
-//! cache) or fallback to mapping more memory
-static span_t *
-_rpmalloc_heap_extract_new_span(heap_t *heap,
-                                heap_size_class_t *heap_size_class,
-                                size_t span_count, uint32_t class_idx) {
-  span_t *span;
-#if ENABLE_THREAD_CACHE
-  if (heap_size_class && heap_size_class->cache) {
-    span = heap_size_class->cache;
-    heap_size_class->cache =
-        (heap->span_cache.count
-             ? heap->span_cache.span[--heap->span_cache.count]
-             : 0);
-    _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-    return span;
-  }
-#endif
-  (void)sizeof(class_idx);
-  // Allow 50% overhead to increase cache hits
-  size_t base_span_count = span_count;
-  size_t limit_span_count =
-      (span_count > 2) ? (span_count + (span_count >> 1)) : span_count;
-  if (limit_span_count > LARGE_CLASS_COUNT)
-    limit_span_count = LARGE_CLASS_COUNT;
-  do {
-    span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
-    if (EXPECTED(span != 0)) {
-      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
-      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-      return span;
-    }
-    span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count);
-    if (EXPECTED(span != 0)) {
-      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
-      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-      return span;
-    }
-    span = _rpmalloc_heap_global_cache_extract(heap, span_count);
-    if (EXPECTED(span != 0)) {
-      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
-      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-      return span;
-    }
-    span = _rpmalloc_heap_reserved_extract(heap, span_count);
-    if (EXPECTED(span != 0)) {
-      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
-      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-      return span;
-    }
-    ++span_count;
-  } while (span_count <= limit_span_count);
-  // Final fallback, map in more virtual memory
-  span = _rpmalloc_span_map(heap, base_span_count);
-  _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx);
-  _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
-  return span;
-}
-
-static void _rpmalloc_heap_initialize(heap_t *heap) {
-  _rpmalloc_memset_const(heap, 0, sizeof(heap_t));
-  // Get a new heap ID
-  heap->id = 1 + atomic_incr32(&_memory_heap_id);
-
-  // Link in heap in heap ID map
-  size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
-  heap->next_heap = _memory_heaps[list_idx];
-  _memory_heaps[list_idx] = heap;
-}
-
-static void _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
-  heap->owner_thread = (uintptr_t)-1;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  heap_t **heap_list =
-      (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
-#else
-  (void)sizeof(first_class);
-  heap_t **heap_list = &_memory_orphan_heaps;
-#endif
-  heap->next_orphan = *heap_list;
-  *heap_list = heap;
-}
-
-//! Allocate a new heap from newly mapped memory pages
-static heap_t *_rpmalloc_heap_allocate_new(void) {
-  // Map in pages for a 16 heaps. If page size is greater than required size for
-  // this, map a page and use first part for heaps and remaining part for spans
-  // for allocations. Adds a lot of complexity, but saves a lot of memory on
-  // systems where page size > 64 spans (4MiB)
-  size_t heap_size = sizeof(heap_t);
-  size_t aligned_heap_size = 16 * ((heap_size + 15) / 16);
-  size_t request_heap_count = 16;
-  size_t heap_span_count = ((aligned_heap_size * request_heap_count) +
-                            sizeof(span_t) + _memory_span_size - 1) /
-                           _memory_span_size;
-  size_t block_size = _memory_span_size * heap_span_count;
-  size_t span_count = heap_span_count;
-  span_t *span = 0;
-  // If there are global reserved spans, use these first
-  if (_memory_global_reserve_count >= heap_span_count) {
-    span = _rpmalloc_global_get_reserved_spans(heap_span_count);
-  }
-  if (!span) {
-    if (_memory_page_size > block_size) {
-      span_count = _memory_page_size / _memory_span_size;
-      block_size = _memory_page_size;
-      // If using huge pages, make sure to grab enough heaps to avoid
-      // reallocating a huge page just to serve new heaps
-      size_t possible_heap_count =
-          (block_size - sizeof(span_t)) / aligned_heap_size;
-      if (possible_heap_count >= (request_heap_count * 16))
-        request_heap_count *= 16;
-      else if (possible_heap_count < request_heap_count)
-        request_heap_count = possible_heap_count;
-      heap_span_count = ((aligned_heap_size * request_heap_count) +
-                         sizeof(span_t) + _memory_span_size - 1) /
-                        _memory_span_size;
-    }
-
-    size_t align_offset = 0;
-    span = (span_t *)_rpmalloc_mmap(block_size, &align_offset);
-    if (!span)
-      return 0;
-
-    // Master span will contain the heaps
-    _rpmalloc_stat_inc(&_master_spans);
-    _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset);
-  }
-
-  size_t remain_size = _memory_span_size - sizeof(span_t);
-  heap_t *heap = (heap_t *)pointer_offset(span, sizeof(span_t));
-  _rpmalloc_heap_initialize(heap);
-
-  // Put extra heaps as orphans
-  size_t num_heaps = remain_size / aligned_heap_size;
-  if (num_heaps < request_heap_count)
-    num_heaps = request_heap_count;
-  atomic_store32(&heap->child_count, (int32_t)num_heaps - 1);
-  heap_t *extra_heap = (heap_t *)pointer_offset(heap, aligned_heap_size);
-  while (num_heaps > 1) {
-    _rpmalloc_heap_initialize(extra_heap);
-    extra_heap->master_heap = heap;
-    _rpmalloc_heap_orphan(extra_heap, 1);
-    extra_heap = (heap_t *)pointer_offset(extra_heap, aligned_heap_size);
-    --num_heaps;
-  }
-
-  if (span_count > heap_span_count) {
-    // Cap reserved spans
-    size_t remain_count = span_count - heap_span_count;
-    size_t reserve_count =
-        (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count
-                                                   : remain_count);
-    span_t *remain_span =
-        (span_t *)pointer_offset(span, heap_span_count * _memory_span_size);
-    _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count);
-
-    if (remain_count > reserve_count) {
-      // Set to global reserved spans
-      remain_span = (span_t *)pointer_offset(remain_span,
-                                             reserve_count * _memory_span_size);
-      reserve_count = remain_count - reserve_count;
-      _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count);
-    }
-  }
-
-  return heap;
-}
-
-static heap_t *_rpmalloc_heap_extract_orphan(heap_t **heap_list) {
-  heap_t *heap = *heap_list;
-  *heap_list = (heap ? heap->next_orphan : 0);
-  return heap;
-}
-
-//! Allocate a new heap, potentially reusing a previously orphaned heap
-static heap_t *_rpmalloc_heap_allocate(int first_class) {
-  heap_t *heap = 0;
-  while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
-    _rpmalloc_spin();
-  if (first_class == 0)
-    heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps);
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  if (!heap)
-    heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
-#endif
-  if (!heap)
-    heap = _rpmalloc_heap_allocate_new();
-  atomic_store32_release(&_memory_global_lock, 0);
-  if (heap)
-    _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-  return heap;
-}
-
-static void _rpmalloc_heap_release(void *heapptr, int first_class,
-                                   int release_cache) {
-  heap_t *heap = (heap_t *)heapptr;
-  if (!heap)
-    return;
-  // Release thread cache spans back to global cache
-  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-  if (release_cache || heap->finalize) {
-#if ENABLE_THREAD_CACHE
-    for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-      span_cache_t *span_cache;
-      if (!iclass)
-        span_cache = &heap->span_cache;
-      else
-        span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
-      if (!span_cache->count)
-        continue;
-#if ENABLE_GLOBAL_CACHE
-      if (heap->finalize) {
-        for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
-          _rpmalloc_span_unmap(span_cache->span[ispan]);
-      } else {
-        _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count *
-                                                          (iclass + 1) *
-                                                          _memory_span_size);
-        _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
-                           span_cache->count);
-        _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
-                                            span_cache->count);
-      }
-#else
-      for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
-        _rpmalloc_span_unmap(span_cache->span[ispan]);
-#endif
-      span_cache->count = 0;
-    }
-#endif
-  }
-
-  if (get_thread_heap_raw() == heap)
-    set_thread_heap(0);
-
-#if ENABLE_STATISTICS
-  atomic_decr32(&_memory_active_heaps);
-  rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0,
-                  "Still active heaps during finalization");
-#endif
-
-  // If we are forcibly terminating with _exit the state of the
-  // lock atomic is unknown and it's best to just go ahead and exit
-  if (get_thread_id() != _rpmalloc_main_thread_id) {
-    while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
-      _rpmalloc_spin();
-  }
-  _rpmalloc_heap_orphan(heap, first_class);
-  atomic_store32_release(&_memory_global_lock, 0);
-}
-
-static void _rpmalloc_heap_release_raw(void *heapptr, int release_cache) {
-  _rpmalloc_heap_release(heapptr, 0, release_cache);
-}
-
-static void _rpmalloc_heap_release_raw_fc(void *heapptr) {
-  _rpmalloc_heap_release_raw(heapptr, 1);
-}
-
-static void _rpmalloc_heap_finalize(heap_t *heap) {
-  if (heap->spans_reserved) {
-    span_t *span = _rpmalloc_span_map(heap, heap->spans_reserved);
-    _rpmalloc_span_unmap(span);
-    heap->spans_reserved = 0;
-  }
-
-  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    if (heap->size_class[iclass].cache)
-      _rpmalloc_span_unmap(heap->size_class[iclass].cache);
-    heap->size_class[iclass].cache = 0;
-    span_t *span = heap->size_class[iclass].partial_span;
-    while (span) {
-      span_t *next = span->next;
-      _rpmalloc_span_finalize(heap, iclass, span,
-                              &heap->size_class[iclass].partial_span);
-      span = next;
-    }
-    // If class still has a free list it must be a full span
-    if (heap->size_class[iclass].free_list) {
-      span_t *class_span =
-          (span_t *)((uintptr_t)heap->size_class[iclass].free_list &
-                     _memory_span_mask);
-      span_t **list = 0;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-      list = &heap->full_span[iclass];
-#endif
-      --heap->full_span_count;
-      if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
-        if (list)
-          _rpmalloc_span_double_link_list_remove(list, class_span);
-        _rpmalloc_span_double_link_list_add(
-            &heap->size_class[iclass].partial_span, class_span);
-      }
-    }
-  }
-
-#if ENABLE_THREAD_CACHE
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    span_cache_t *span_cache;
-    if (!iclass)
-      span_cache = &heap->span_cache;
-    else
-      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
-    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
-      _rpmalloc_span_unmap(span_cache->span[ispan]);
-    span_cache->count = 0;
-  }
-#endif
-  rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred),
-                  "Heaps still active during finalization");
-}
-
-////////////
-///
-/// Allocation entry points
-///
-//////
-
-//! Pop first block from a free list
-static void *free_list_pop(void **list) {
-  void *block = *list;
-  *list = *((void **)block);
-  return block;
-}
-
-//! Allocate a small/medium sized memory block from the given heap
-static void *_rpmalloc_allocate_from_heap_fallback(
-    heap_t *heap, heap_size_class_t *heap_size_class, uint32_t class_idx) {
-  span_t *span = heap_size_class->partial_span;
-  rpmalloc_assume(heap != 0);
-  if (EXPECTED(span != 0)) {
-    rpmalloc_assert(span->block_count ==
-                        _memory_size_class[span->size_class].block_count,
-                    "Span block count corrupted");
-    rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span),
-                    "Internal failure");
-    void *block;
-    if (span->free_list) {
-      // Span local free list is not empty, swap to size class free list
-      block = free_list_pop(&span->free_list);
-      heap_size_class->free_list = span->free_list;
-      span->free_list = 0;
-    } else {
-      // If the span did not fully initialize free list, link up another page
-      // worth of blocks
-      void *block_start = pointer_offset(
-          span, SPAN_HEADER_SIZE +
-                    ((size_t)span->free_list_limit * span->block_size));
-      span->free_list_limit += free_list_partial_init(
-          &heap_size_class->free_list, &block,
-          (void *)((uintptr_t)block_start & ~(_memory_page_size - 1)),
-          block_start, span->block_count - span->free_list_limit,
-          span->block_size);
-    }
-    rpmalloc_assert(span->free_list_limit <= span->block_count,
-                    "Span block count corrupted");
-    span->used_count = span->free_list_limit;
-
-    // Swap in deferred free list if present
-    if (atomic_load_ptr(&span->free_list_deferred))
-      _rpmalloc_span_extract_free_list_deferred(span);
-
-    // If span is still not fully utilized keep it in partial list and early
-    // return block
-    if (!_rpmalloc_span_is_fully_utilized(span))
-      return block;
-
-    // The span is fully utilized, unlink from partial list and add to fully
-    // utilized list
-    _rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span,
-                                             span);
-#if RPMALLOC_FIRST_CLASS_HEAPS
-    _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
-#endif
-    ++heap->full_span_count;
-    return block;
-  }
-
-  // Find a span in one of the cache levels
-  span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx);
-  if (EXPECTED(span != 0)) {
-    // Mark span as owned by this heap and set base data, return first block
-    return _rpmalloc_span_initialize_new(heap, heap_size_class, span,
-                                         class_idx);
-  }
-
-  return 0;
-}
-
-//! Allocate a small sized memory block from the given heap
-static void *_rpmalloc_allocate_small(heap_t *heap, size_t size) {
-  rpmalloc_assert(heap, "No thread heap");
-  // Small sizes have unique size classes
-  const uint32_t class_idx =
-      (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
-  heap_size_class_t *heap_size_class = heap->size_class + class_idx;
-  _rpmalloc_stat_inc_alloc(heap, class_idx);
-  if (EXPECTED(heap_size_class->free_list != 0))
-    return free_list_pop(&heap_size_class->free_list);
-  return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
-                                               class_idx);
-}
-
-//! Allocate a medium sized memory block from the given heap
-static void *_rpmalloc_allocate_medium(heap_t *heap, size_t size) {
-  rpmalloc_assert(heap, "No thread heap");
-  // Calculate the size class index and do a dependent lookup of the final class
-  // index (in case of merged classes)
-  const uint32_t base_idx =
-      (uint32_t)(SMALL_CLASS_COUNT +
-                 ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
-  const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
-  heap_size_class_t *heap_size_class = heap->size_class + class_idx;
-  _rpmalloc_stat_inc_alloc(heap, class_idx);
-  if (EXPECTED(heap_size_class->free_list != 0))
-    return free_list_pop(&heap_size_class->free_list);
-  return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
-                                               class_idx);
-}
-
-//! Allocate a large sized memory block from the given heap
-static void *_rpmalloc_allocate_large(heap_t *heap, size_t size) {
-  rpmalloc_assert(heap, "No thread heap");
-  // Calculate number of needed max sized spans (including header)
-  // Since this function is never called if size > LARGE_SIZE_LIMIT
-  // the span_count is guaranteed to be <= LARGE_CLASS_COUNT
-  size += SPAN_HEADER_SIZE;
-  size_t span_count = size >> _memory_span_size_shift;
-  if (size & (_memory_span_size - 1))
-    ++span_count;
-
-  // Find a span in one of the cache levels
-  span_t *span =
-      _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE);
-  if (!span)
-    return span;
-
-  // Mark span as owned by this heap and set base data
-  rpmalloc_assert(span->span_count >= span_count, "Internal failure");
-  span->size_class = SIZE_CLASS_LARGE;
-  span->heap = heap;
-
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
-#endif
-  ++heap->full_span_count;
-
-  return pointer_offset(span, SPAN_HEADER_SIZE);
-}
-
-//! Allocate a huge block by mapping memory pages directly
-static void *_rpmalloc_allocate_huge(heap_t *heap, size_t size) {
-  rpmalloc_assert(heap, "No thread heap");
-  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-  size += SPAN_HEADER_SIZE;
-  size_t num_pages = size >> _memory_page_size_shift;
-  if (size & (_memory_page_size - 1))
-    ++num_pages;
-  size_t align_offset = 0;
-  span_t *span =
-      (span_t *)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
-  if (!span)
-    return span;
-
-  // Store page count in span_count
-  span->size_class = SIZE_CLASS_HUGE;
-  span->span_count = (uint32_t)num_pages;
-  span->align_offset = (uint32_t)align_offset;
-  span->heap = heap;
-  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
-
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
-#endif
-  ++heap->full_span_count;
-
-  return pointer_offset(span, SPAN_HEADER_SIZE);
-}
-
-//! Allocate a block of the given size
-static void *_rpmalloc_allocate(heap_t *heap, size_t size) {
-  _rpmalloc_stat_add64(&_allocation_counter, 1);
-  if (EXPECTED(size <= SMALL_SIZE_LIMIT))
-    return _rpmalloc_allocate_small(heap, size);
-  else if (size <= _memory_medium_size_limit)
-    return _rpmalloc_allocate_medium(heap, size);
-  else if (size <= LARGE_SIZE_LIMIT)
-    return _rpmalloc_allocate_large(heap, size);
-  return _rpmalloc_allocate_huge(heap, size);
-}
-
-static void *_rpmalloc_aligned_allocate(heap_t *heap, size_t alignment,
-                                        size_t size) {
-  if (alignment <= SMALL_GRANULARITY)
-    return _rpmalloc_allocate(heap, size);
-
-#if ENABLE_VALIDATE_ARGS
-  if ((size + alignment) < size) {
-    errno = EINVAL;
-    return 0;
-  }
-  if (alignment & (alignment - 1)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-
-  if ((alignment <= SPAN_HEADER_SIZE) &&
-      ((size + SPAN_HEADER_SIZE) < _memory_medium_size_limit)) {
-    // If alignment is less or equal to span header size (which is power of
-    // two), and size aligned to span header size multiples is less than size +
-    // alignment, then use natural alignment of blocks to provide alignment
-    size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) &
-                                      ~(uintptr_t)(SPAN_HEADER_SIZE - 1)
-                                : SPAN_HEADER_SIZE;
-    rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE),
-                    "Failed alignment calculation");
-    if (multiple_size <= (size + alignment))
-      return _rpmalloc_allocate(heap, multiple_size);
-  }
-
-  void *ptr = 0;
-  size_t align_mask = alignment - 1;
-  if (alignment <= _memory_page_size) {
-    ptr = _rpmalloc_allocate(heap, size + alignment);
-    if ((uintptr_t)ptr & align_mask) {
-      ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
-      // Mark as having aligned blocks
-      span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
-      span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
-    }
-    return ptr;
-  }
-
-  // Fallback to mapping new pages for this request. Since pointers passed
-  // to rpfree must be able to reach the start of the span by bitmasking of
-  // the address with the span size, the returned aligned pointer from this
-  // function must be with a span size of the start of the mapped area.
-  // In worst case this requires us to loop and map pages until we get a
-  // suitable memory address. It also means we can never align to span size
-  // or greater, since the span header will push alignment more than one
-  // span size away from span start (thus causing pointer mask to give us
-  // an invalid span start on free)
-  if (alignment & align_mask) {
-    errno = EINVAL;
-    return 0;
-  }
-  if (alignment >= _memory_span_size) {
-    errno = EINVAL;
-    return 0;
-  }
-
-  size_t extra_pages = alignment / _memory_page_size;
-
-  // Since each span has a header, we will at least need one extra memory page
-  size_t num_pages = 1 + (size / _memory_page_size);
-  if (size & (_memory_page_size - 1))
-    ++num_pages;
-
-  if (extra_pages > num_pages)
-    num_pages = 1 + extra_pages;
-
-  size_t original_pages = num_pages;
-  size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
-  if (limit_pages < (original_pages * 2))
-    limit_pages = original_pages * 2;
-
-  size_t mapped_size, align_offset;
-  span_t *span;
-
-retry:
-  align_offset = 0;
-  mapped_size = num_pages * _memory_page_size;
-
-  span = (span_t *)_rpmalloc_mmap(mapped_size, &align_offset);
-  if (!span) {
-    errno = ENOMEM;
-    return 0;
-  }
-  ptr = pointer_offset(span, SPAN_HEADER_SIZE);
-
-  if ((uintptr_t)ptr & align_mask)
-    ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
-
-  if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
-      (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
-      (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
-    _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
-    ++num_pages;
-    if (num_pages > limit_pages) {
-      errno = EINVAL;
-      return 0;
-    }
-    goto retry;
-  }
-
-  // Store page count in span_count
-  span->size_class = SIZE_CLASS_HUGE;
-  span->span_count = (uint32_t)num_pages;
-  span->align_offset = (uint32_t)align_offset;
-  span->heap = heap;
-  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
-
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
-#endif
-  ++heap->full_span_count;
-
-  _rpmalloc_stat_add64(&_allocation_counter, 1);
-
-  return ptr;
-}
-
-////////////
-///
-/// Deallocation entry points
-///
-//////
-
-//! Deallocate the given small/medium memory block in the current thread local
-//! heap
-static void _rpmalloc_deallocate_direct_small_or_medium(span_t *span,
-                                                        void *block) {
-  heap_t *heap = span->heap;
-  rpmalloc_assert(heap->owner_thread == get_thread_id() ||
-                      !heap->owner_thread || heap->finalize,
-                  "Internal failure");
-  // Add block to free list
-  if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) {
-    span->used_count = span->block_count;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-    _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
-                                           span);
-#endif
-    _rpmalloc_span_double_link_list_add(
-        &heap->size_class[span->size_class].partial_span, span);
-    --heap->full_span_count;
-  }
-  *((void **)block) = span->free_list;
-  --span->used_count;
-  span->free_list = block;
-  if (UNEXPECTED(span->used_count == span->list_size)) {
-    // If there are no used blocks it is guaranteed that no other external
-    // thread is accessing the span
-    if (span->used_count) {
-      // Make sure we have synchronized the deferred list and list size by using
-      // acquire semantics and guarantee that no external thread is accessing
-      // span concurrently
-      void *free_list;
-      do {
-        free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred,
-                                                INVALID_POINTER);
-      } while (free_list == INVALID_POINTER);
-      atomic_store_ptr_release(&span->free_list_deferred, free_list);
-    }
-    _rpmalloc_span_double_link_list_remove(
-        &heap->size_class[span->size_class].partial_span, span);
-    _rpmalloc_span_release_to_cache(heap, span);
-  }
-}
-
-static void _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span) {
-  if (span->size_class != SIZE_CLASS_HUGE)
-    _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred);
-  // This list does not need ABA protection, no mutable side state
-  do {
-    span->free_list = (void *)atomic_load_ptr(&heap->span_free_deferred);
-  } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list));
-}
-
-//! Put the block in the deferred free list of the owning span
-static void _rpmalloc_deallocate_defer_small_or_medium(span_t *span,
-                                                       void *block) {
-  // The memory ordering here is a bit tricky, to avoid having to ABA protect
-  // the deferred free list to avoid desynchronization of list and list size
-  // we need to have acquire semantics on successful CAS of the pointer to
-  // guarantee the list_size variable validity + release semantics on pointer
-  // store
-  void *free_list;
-  do {
-    free_list =
-        atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER);
-  } while (free_list == INVALID_POINTER);
-  *((void **)block) = free_list;
-  uint32_t free_count = ++span->list_size;
-  int all_deferred_free = (free_count == span->block_count);
-  atomic_store_ptr_release(&span->free_list_deferred, block);
-  if (all_deferred_free) {
-    // Span was completely freed by this block. Due to the INVALID_POINTER spin
-    // lock no other thread can reach this state simultaneously on this span.
-    // Safe to move to owner heap deferred cache
-    _rpmalloc_deallocate_defer_free_span(span->heap, span);
-  }
-}
-
-static void _rpmalloc_deallocate_small_or_medium(span_t *span, void *p) {
-  _rpmalloc_stat_inc_free(span->heap, span->size_class);
-  if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
-    // Realign pointer to block start
-    void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
-    uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
-    p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
-  }
-  // Check if block belongs to this heap or if deallocation should be deferred
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  int defer =
-      (span->heap->owner_thread &&
-       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#else
-  int defer =
-      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#endif
-  if (!defer)
-    _rpmalloc_deallocate_direct_small_or_medium(span, p);
-  else
-    _rpmalloc_deallocate_defer_small_or_medium(span, p);
-}
-
-//! Deallocate the given large memory block to the current heap
-static void _rpmalloc_deallocate_large(span_t *span) {
-  rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class");
-  rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
-                      !(span->flags & SPAN_FLAG_SUBSPAN),
-                  "Span flag corrupted");
-  rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
-                      (span->flags & SPAN_FLAG_SUBSPAN),
-                  "Span flag corrupted");
-  // We must always defer (unless finalizing) if from another heap since we
-  // cannot touch the list or counters of another heap
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  int defer =
-      (span->heap->owner_thread &&
-       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#else
-  int defer =
-      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#endif
-  if (defer) {
-    _rpmalloc_deallocate_defer_free_span(span->heap, span);
-    return;
-  }
-  rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
-  --span->heap->full_span_count;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
-#endif
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
-  // Decrease counter
-  size_t idx = span->span_count - 1;
-  atomic_decr32(&span->heap->span_use[idx].current);
-#endif
-  heap_t *heap = span->heap;
-  rpmalloc_assert(heap, "No thread heap");
-#if ENABLE_THREAD_CACHE
-  const int set_as_reserved =
-      ((span->span_count > 1) && (heap->span_cache.count == 0) &&
-       !heap->finalize && !heap->spans_reserved);
-#else
-  const int set_as_reserved =
-      ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved);
-#endif
-  if (set_as_reserved) {
-    heap->span_reserve = span;
-    heap->spans_reserved = span->span_count;
-    if (span->flags & SPAN_FLAG_MASTER) {
-      heap->span_reserve_master = span;
-    } else { // SPAN_FLAG_SUBSPAN
-      span_t *master = (span_t *)pointer_offset(
-          span,
-          -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
-      heap->span_reserve_master = master;
-      rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
-      rpmalloc_assert(atomic_load32(&master->remaining_spans) >=
-                          (int32_t)span->span_count,
-                      "Master span count corrupted");
-    }
-    _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
-  } else {
-    // Insert into cache list
-    _rpmalloc_heap_cache_insert(heap, span);
-  }
-}
-
-//! Deallocate the given huge span
-static void _rpmalloc_deallocate_huge(span_t *span) {
-  rpmalloc_assert(span->heap, "No span heap");
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  int defer =
-      (span->heap->owner_thread &&
-       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#else
-  int defer =
-      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#endif
-  if (defer) {
-    _rpmalloc_deallocate_defer_free_span(span->heap, span);
-    return;
-  }
-  rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
-  --span->heap->full_span_count;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
-#endif
-
-  // Oversized allocation, page count is stored in span_count
-  size_t num_pages = span->span_count;
-  _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset,
-                  num_pages * _memory_page_size);
-  _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
-}
-
-//! Deallocate the given block
-static void _rpmalloc_deallocate(void *p) {
-  _rpmalloc_stat_add64(&_deallocation_counter, 1);
-  // Grab the span (always at start of span, using span alignment)
-  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
-  if (UNEXPECTED(!span))
-    return;
-  if (EXPECTED(span->size_class < SIZE_CLASS_COUNT))
-    _rpmalloc_deallocate_small_or_medium(span, p);
-  else if (span->size_class == SIZE_CLASS_LARGE)
-    _rpmalloc_deallocate_large(span);
-  else
-    _rpmalloc_deallocate_huge(span);
-}
-
-////////////
-///
-/// Reallocation entry points
-///
-//////
-
-static size_t _rpmalloc_usable_size(void *p);
-
-//! Reallocate the given block to the given size
-static void *_rpmalloc_reallocate(heap_t *heap, void *p, size_t size,
-                                  size_t oldsize, unsigned int flags) {
-  if (p) {
-    // Grab the span using guaranteed span alignment
-    span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
-    if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
-      // Small/medium sized block
-      rpmalloc_assert(span->span_count == 1, "Span counter corrupted");
-      void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
-      uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
-      uint32_t block_idx = block_offset / span->block_size;
-      void *block =
-          pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
-      if (!oldsize)
-        oldsize =
-            (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
-      if ((size_t)span->block_size >= size) {
-        // Still fits in block, never mind trying to save memory, but preserve
-        // data if alignment changed
-        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
-          memmove(block, p, oldsize);
-        return block;
-      }
-    } else if (span->size_class == SIZE_CLASS_LARGE) {
-      // Large block
-      size_t total_size = size + SPAN_HEADER_SIZE;
-      size_t num_spans = total_size >> _memory_span_size_shift;
-      if (total_size & (_memory_span_mask - 1))
-        ++num_spans;
-      size_t current_spans = span->span_count;
-      void *block = pointer_offset(span, SPAN_HEADER_SIZE);
-      if (!oldsize)
-        oldsize = (current_spans * _memory_span_size) -
-                  (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
-      if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) {
-        // Still fits in block, never mind trying to save memory, but preserve
-        // data if alignment changed
-        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
-          memmove(block, p, oldsize);
-        return block;
-      }
-    } else {
-      // Oversized block
-      size_t total_size = size + SPAN_HEADER_SIZE;
-      size_t num_pages = total_size >> _memory_page_size_shift;
-      if (total_size & (_memory_page_size - 1))
-        ++num_pages;
-      // Page count is stored in span_count
-      size_t current_pages = span->span_count;
-      void *block = pointer_offset(span, SPAN_HEADER_SIZE);
-      if (!oldsize)
-        oldsize = (current_pages * _memory_page_size) -
-                  (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
-      if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
-        // Still fits in block, never mind trying to save memory, but preserve
-        // data if alignment changed
-        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
-          memmove(block, p, oldsize);
-        return block;
-      }
-    }
-  } else {
-    oldsize = 0;
-  }
-
-  if (!!(flags & RPMALLOC_GROW_OR_FAIL))
-    return 0;
-
-  // Size is greater than block size, need to allocate a new block and
-  // deallocate the old Avoid hysteresis by overallocating if increase is small
-  // (below 37%)
-  size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
-  size_t new_size =
-      (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
-  void *block = _rpmalloc_allocate(heap, new_size);
-  if (p && block) {
-    if (!(flags & RPMALLOC_NO_PRESERVE))
-      memcpy(block, p, oldsize < new_size ? oldsize : new_size);
-    _rpmalloc_deallocate(p);
-  }
-
-  return block;
-}
-
-static void *_rpmalloc_aligned_reallocate(heap_t *heap, void *ptr,
-                                          size_t alignment, size_t size,
-                                          size_t oldsize, unsigned int flags) {
-  if (alignment <= SMALL_GRANULARITY)
-    return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
-
-  int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
-  size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0);
-  if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
-    if (no_alloc || (size >= (usablesize / 2)))
-      return ptr;
-  }
-  // Aligned alloc marks span as having aligned blocks
-  void *block =
-      (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
-  if (EXPECTED(block != 0)) {
-    if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
-      if (!oldsize)
-        oldsize = usablesize;
-      memcpy(block, ptr, oldsize < size ? oldsize : size);
-    }
-    _rpmalloc_deallocate(ptr);
-  }
-  return block;
-}
-
-////////////
-///
-/// Initialization, finalization and utility
-///
-//////
-
-//! Get the usable size of the given block
-static size_t _rpmalloc_usable_size(void *p) {
-  // Grab the span using guaranteed span alignment
-  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
-  if (span->size_class < SIZE_CLASS_COUNT) {
-    // Small/medium block
-    void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
-    return span->block_size -
-           ((size_t)pointer_diff(p, blocks_start) % span->block_size);
-  }
-  if (span->size_class == SIZE_CLASS_LARGE) {
-    // Large block
-    size_t current_spans = span->span_count;
-    return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
-  }
-  // Oversized block, page count is stored in span_count
-  size_t current_pages = span->span_count;
-  return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
-}
-
-//! Adjust and optimize the size class properties for the given class
-static void _rpmalloc_adjust_size_class(size_t iclass) {
-  size_t block_size = _memory_size_class[iclass].block_size;
-  size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
-
-  _memory_size_class[iclass].block_count = (uint16_t)block_count;
-  _memory_size_class[iclass].class_idx = (uint16_t)iclass;
-
-  // Check if previous size classes can be merged
-  if (iclass >= SMALL_CLASS_COUNT) {
-    size_t prevclass = iclass;
-    while (prevclass > 0) {
-      --prevclass;
-      // A class can be merged if number of pages and number of blocks are equal
-      if (_memory_size_class[prevclass].block_count ==
-          _memory_size_class[iclass].block_count)
-        _rpmalloc_memcpy_const(_memory_size_class + prevclass,
-                               _memory_size_class + iclass,
-                               sizeof(_memory_size_class[iclass]));
-      else
-        break;
-    }
-  }
-}
-
-//! Initialize the allocator and setup global data
-extern inline int rpmalloc_initialize(void) {
-  if (_rpmalloc_initialized) {
-    rpmalloc_thread_initialize();
-    return 0;
-  }
-  return rpmalloc_initialize_config(0);
-}
-
-int rpmalloc_initialize_config(const rpmalloc_config_t *config) {
-  if (_rpmalloc_initialized) {
-    rpmalloc_thread_initialize();
-    return 0;
-  }
-  _rpmalloc_initialized = 1;
-
-  if (config)
-    memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
-  else
-    _rpmalloc_memset_const(&_memory_config, 0, sizeof(rpmalloc_config_t));
-
-  if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
-    _memory_config.memory_map = _rpmalloc_mmap_os;
-    _memory_config.memory_unmap = _rpmalloc_unmap_os;
-  }
-
-#if PLATFORM_WINDOWS
-  SYSTEM_INFO system_info;
-  memset(&system_info, 0, sizeof(system_info));
-  GetSystemInfo(&system_info);
-  _memory_map_granularity = system_info.dwAllocationGranularity;
-#else
-  _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
-#endif
-
-#if RPMALLOC_CONFIGURABLE
-  _memory_page_size = _memory_config.page_size;
-#else
-  _memory_page_size = 0;
-#endif
-  _memory_huge_pages = 0;
-  if (!_memory_page_size) {
-#if PLATFORM_WINDOWS
-    _memory_page_size = system_info.dwPageSize;
-#else
-    _memory_page_size = _memory_map_granularity;
-    if (_memory_config.enable_huge_pages) {
-#if defined(__linux__)
-      size_t huge_page_size = 0;
-      FILE *meminfo = fopen("/proc/meminfo", "r");
-      if (meminfo) {
-        char line[128];
-        while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
-          line[sizeof(line) - 1] = 0;
-          if (strstr(line, "Hugepagesize:"))
-            huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
-        }
-        fclose(meminfo);
-      }
-      if (huge_page_size) {
-        _memory_huge_pages = 1;
-        _memory_page_size = huge_page_size;
-        _memory_map_granularity = huge_page_size;
-      }
-#elif defined(__FreeBSD__)
-      int rc;
-      size_t sz = sizeof(rc);
-
-      if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 &&
-          rc == 1) {
-        static size_t defsize = 2 * 1024 * 1024;
-        int nsize = 0;
-        size_t sizes[4] = {0};
-        _memory_huge_pages = 1;
-        _memory_page_size = defsize;
-        if ((nsize = getpagesizes(sizes, 4)) >= 2) {
-          nsize--;
-          for (size_t csize = sizes[nsize]; nsize >= 0 && csize;
-               --nsize, csize = sizes[nsize]) {
-            //! Unlikely, but as a precaution..
-            rpmalloc_assert(!(csize & (csize - 1)) && !(csize % 1024),
-                            "Invalid page size");
-            if (defsize < csize) {
-              _memory_page_size = csize;
-              break;
-            }
-          }
-        }
-        _memory_map_granularity = _memory_page_size;
-      }
-#elif defined(__APPLE__) || defined(__NetBSD__)
-      _memory_huge_pages = 1;
-      _memory_page_size = 2 * 1024 * 1024;
-      _memory_map_granularity = _memory_page_size;
-#endif
-    }
-#endif
-  } else {
-    if (_memory_config.enable_huge_pages)
-      _memory_huge_pages = 1;
-  }
-
-#if PLATFORM_WINDOWS
-  if (_memory_config.enable_huge_pages) {
-    HANDLE token = 0;
-    size_t large_page_minimum = GetLargePageMinimum();
-    if (large_page_minimum)
-      OpenProcessToken(GetCurrentProcess(),
-                       TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
-    if (token) {
-      LUID luid;
-      if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
-        TOKEN_PRIVILEGES token_privileges;
-        memset(&token_privileges, 0, sizeof(token_privileges));
-        token_privileges.PrivilegeCount = 1;
-        token_privileges.Privileges[0].Luid = luid;
-        token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-        if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
-          if (GetLastError() == ERROR_SUCCESS)
-            _memory_huge_pages = 1;
-        }
-      }
-      CloseHandle(token);
-    }
-    if (_memory_huge_pages) {
-      if (large_page_minimum > _memory_page_size)
-        _memory_page_size = large_page_minimum;
-      if (large_page_minimum > _memory_map_granularity)
-        _memory_map_granularity = large_page_minimum;
-    }
-  }
-#endif
-
-  size_t min_span_size = 256;
-  size_t max_page_size;
-#if UINTPTR_MAX > 0xFFFFFFFF
-  max_page_size = 4096ULL * 1024ULL * 1024ULL;
-#else
-  max_page_size = 4 * 1024 * 1024;
-#endif
-  if (_memory_page_size < min_span_size)
-    _memory_page_size = min_span_size;
-  if (_memory_page_size > max_page_size)
-    _memory_page_size = max_page_size;
-  _memory_page_size_shift = 0;
-  size_t page_size_bit = _memory_page_size;
-  while (page_size_bit != 1) {
-    ++_memory_page_size_shift;
-    page_size_bit >>= 1;
-  }
-  _memory_page_size = ((size_t)1 << _memory_page_size_shift);
-
-#if RPMALLOC_CONFIGURABLE
-  if (!_memory_config.span_size) {
-    _memory_span_size = _memory_default_span_size;
-    _memory_span_size_shift = _memory_default_span_size_shift;
-    _memory_span_mask = _memory_default_span_mask;
-  } else {
-    size_t span_size = _memory_config.span_size;
-    if (span_size > (256 * 1024))
-      span_size = (256 * 1024);
-    _memory_span_size = 4096;
-    _memory_span_size_shift = 12;
-    while (_memory_span_size < span_size) {
-      _memory_span_size <<= 1;
-      ++_memory_span_size_shift;
-    }
-    _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
-  }
-#endif
-
-  _memory_span_map_count =
-      (_memory_config.span_map_count ? _memory_config.span_map_count
-                                     : DEFAULT_SPAN_MAP_COUNT);
-  if ((_memory_span_size * _memory_span_map_count) < _memory_page_size)
-    _memory_span_map_count = (_memory_page_size / _memory_span_size);
-  if ((_memory_page_size >= _memory_span_size) &&
-      ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
-    _memory_span_map_count = (_memory_page_size / _memory_span_size);
-  _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT)
-                                   ? DEFAULT_SPAN_MAP_COUNT
-                                   : _memory_span_map_count;
-
-  _memory_config.page_size = _memory_page_size;
-  _memory_config.span_size = _memory_span_size;
-  _memory_config.span_map_count = _memory_span_map_count;
-  _memory_config.enable_huge_pages = _memory_huge_pages;
-
-#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
-    defined(__TINYC__)
-  if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc))
-    return -1;
-#endif
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-  fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
-#endif
-
-  // Setup all small and medium size classes
-  size_t iclass = 0;
-  _memory_size_class[iclass].block_size = SMALL_GRANULARITY;
-  _rpmalloc_adjust_size_class(iclass);
-  for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
-    size_t size = iclass * SMALL_GRANULARITY;
-    _memory_size_class[iclass].block_size = (uint32_t)size;
-    _rpmalloc_adjust_size_class(iclass);
-  }
-  // At least two blocks per span, then fall back to large allocations
-  _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1;
-  if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
-    _memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
-  for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
-    size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
-    if (size > _memory_medium_size_limit) {
-      _memory_medium_size_limit =
-          SMALL_SIZE_LIMIT + (iclass * MEDIUM_GRANULARITY);
-      break;
-    }
-    _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
-    _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass);
-  }
-
-  _memory_orphan_heaps = 0;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _memory_first_class_orphan_heaps = 0;
-#endif
-#if ENABLE_STATISTICS
-  atomic_store32(&_memory_active_heaps, 0);
-  atomic_store32(&_mapped_pages, 0);
-  _mapped_pages_peak = 0;
-  atomic_store32(&_master_spans, 0);
-  atomic_store32(&_mapped_total, 0);
-  atomic_store32(&_unmapped_total, 0);
-  atomic_store32(&_mapped_pages_os, 0);
-  atomic_store32(&_huge_pages_current, 0);
-  _huge_pages_peak = 0;
-#endif
-  memset(_memory_heaps, 0, sizeof(_memory_heaps));
-  atomic_store32_release(&_memory_global_lock, 0);
-
-  rpmalloc_linker_reference();
-
-  // Initialize this thread
-  rpmalloc_thread_initialize();
-  return 0;
-}
-
-//! Finalize the allocator
-void rpmalloc_finalize(void) {
-  rpmalloc_thread_finalize(1);
-  // rpmalloc_dump_statistics(stdout);
-
-  if (_memory_global_reserve) {
-    atomic_add32(&_memory_global_reserve_master->remaining_spans,
-                 -(int32_t)_memory_global_reserve_count);
-    _memory_global_reserve_master = 0;
-    _memory_global_reserve_count = 0;
-    _memory_global_reserve = 0;
-  }
-  atomic_store32_release(&_memory_global_lock, 0);
-
-  // Free all thread caches and fully free spans
-  for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
-    heap_t *heap = _memory_heaps[list_idx];
-    while (heap) {
-      heap_t *next_heap = heap->next_heap;
-      heap->finalize = 1;
-      _rpmalloc_heap_global_finalize(heap);
-      heap = next_heap;
-    }
-  }
-
-#if ENABLE_GLOBAL_CACHE
-  // Free global caches
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
-    _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
-#endif
-
-#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
-  pthread_key_delete(_memory_thread_heap);
-#endif
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-  FlsFree(fls_key);
-  fls_key = 0;
-#endif
-#if ENABLE_STATISTICS
-  // If you hit these asserts you probably have memory leaks (perhaps global
-  // scope data doing dynamic allocations) or double frees in your code
-  rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected");
-  rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0,
-                  "Memory leak detected");
-#endif
-
-  _rpmalloc_initialized = 0;
-}
-
-//! Initialize thread, assign heap
-extern inline void rpmalloc_thread_initialize(void) {
-  if (!get_thread_heap_raw()) {
-    heap_t *heap = _rpmalloc_heap_allocate(0);
-    if (heap) {
-      _rpmalloc_stat_inc(&_memory_active_heaps);
-      set_thread_heap(heap);
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-      FlsSetValue(fls_key, heap);
-#endif
-    }
-  }
-}
-
-//! Finalize thread, orphan heap
-void rpmalloc_thread_finalize(int release_caches) {
-  heap_t *heap = get_thread_heap_raw();
-  if (heap)
-    _rpmalloc_heap_release_raw(heap, release_caches);
-  set_thread_heap(0);
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-  FlsSetValue(fls_key, 0);
-#endif
-}
-
-int rpmalloc_is_thread_initialized(void) {
-  return (get_thread_heap_raw() != 0) ? 1 : 0;
-}
-
-const rpmalloc_config_t *rpmalloc_config(void) { return &_memory_config; }
-
-// Extern interface
-
-extern inline RPMALLOC_ALLOCATOR void *rpmalloc(size_t size) {
-#if ENABLE_VALIDATE_ARGS
-  if (size >= MAX_ALLOC_SIZE) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  heap_t *heap = get_thread_heap();
-  return _rpmalloc_allocate(heap, size);
-}
-
-extern inline void rpfree(void *ptr) { _rpmalloc_deallocate(ptr); }
-
-extern inline RPMALLOC_ALLOCATOR void *rpcalloc(size_t num, size_t size) {
-  size_t total;
-#if ENABLE_VALIDATE_ARGS
-#if PLATFORM_WINDOWS
-  int err = SizeTMult(num, size, &total);
-  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#else
-  int err = __builtin_umull_overflow(num, size, &total);
-  if (err || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-#else
-  total = num * size;
-#endif
-  heap_t *heap = get_thread_heap();
-  void *block = _rpmalloc_allocate(heap, total);
-  if (block)
-    memset(block, 0, total);
-  return block;
-}
-
-extern inline RPMALLOC_ALLOCATOR void *rprealloc(void *ptr, size_t size) {
-#if ENABLE_VALIDATE_ARGS
-  if (size >= MAX_ALLOC_SIZE) {
-    errno = EINVAL;
-    return ptr;
-  }
-#endif
-  heap_t *heap = get_thread_heap();
-  return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
-}
-
-extern RPMALLOC_ALLOCATOR void *rpaligned_realloc(void *ptr, size_t alignment,
-                                                  size_t size, size_t oldsize,
-                                                  unsigned int flags) {
-#if ENABLE_VALIDATE_ARGS
-  if ((size + alignment < size) || (alignment > _memory_page_size)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  heap_t *heap = get_thread_heap();
-  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize,
-                                      flags);
-}
-
-extern RPMALLOC_ALLOCATOR void *rpaligned_alloc(size_t alignment, size_t size) {
-  heap_t *heap = get_thread_heap();
-  return _rpmalloc_aligned_allocate(heap, alignment, size);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpaligned_calloc(size_t alignment, size_t num, size_t size) {
-  size_t total;
-#if ENABLE_VALIDATE_ARGS
-#if PLATFORM_WINDOWS
-  int err = SizeTMult(num, size, &total);
-  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#else
-  int err = __builtin_umull_overflow(num, size, &total);
-  if (err || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-#else
-  total = num * size;
-#endif
-  void *block = rpaligned_alloc(alignment, total);
-  if (block)
-    memset(block, 0, total);
-  return block;
-}
-
-extern inline RPMALLOC_ALLOCATOR void *rpmemalign(size_t alignment,
-                                                  size_t size) {
-  return rpaligned_alloc(alignment, size);
-}
-
-extern inline int rpposix_memalign(void **memptr, size_t alignment,
-                                   size_t size) {
-  if (memptr)
-    *memptr = rpaligned_alloc(alignment, size);
-  else
-    return EINVAL;
-  return *memptr ? 0 : ENOMEM;
-}
-
-extern inline size_t rpmalloc_usable_size(void *ptr) {
-  return (ptr ? _rpmalloc_usable_size(ptr) : 0);
-}
-
-extern inline void rpmalloc_thread_collect(void) {}
-
-void rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats) {
-  memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
-  heap_t *heap = get_thread_heap_raw();
-  if (!heap)
-    return;
-
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    size_class_t *size_class = _memory_size_class + iclass;
-    span_t *span = heap->size_class[iclass].partial_span;
-    while (span) {
-      size_t free_count = span->list_size;
-      size_t block_count = size_class->block_count;
-      if (span->free_list_limit < block_count)
-        block_count = span->free_list_limit;
-      free_count += (block_count - span->used_count);
-      stats->sizecache += free_count * size_class->block_size;
-      span = span->next;
-    }
-  }
-
-#if ENABLE_THREAD_CACHE
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    span_cache_t *span_cache;
-    if (!iclass)
-      span_cache = &heap->span_cache;
-    else
-      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
-    stats->spancache += span_cache->count * (iclass + 1) * _memory_span_size;
-  }
-#endif
-
-  span_t *deferred = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
-  while (deferred) {
-    if (deferred->size_class != SIZE_CLASS_HUGE)
-      stats->spancache += (size_t)deferred->span_count * _memory_span_size;
-    deferred = (span_t *)deferred->free_list;
-  }
-
-#if ENABLE_STATISTICS
-  stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
-  stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
-
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    stats->span_use[iclass].current =
-        (size_t)atomic_load32(&heap->span_use[iclass].current);
-    stats->span_use[iclass].peak =
-        (size_t)atomic_load32(&heap->span_use[iclass].high);
-    stats->span_use[iclass].to_global =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
-    stats->span_use[iclass].from_global =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
-    stats->span_use[iclass].to_cache =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
-    stats->span_use[iclass].from_cache =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
-    stats->span_use[iclass].to_reserved =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
-    stats->span_use[iclass].from_reserved =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
-    stats->span_use[iclass].map_calls =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
-  }
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    stats->size_use[iclass].alloc_current =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
-    stats->size_use[iclass].alloc_peak =
-        (size_t)heap->size_class_use[iclass].alloc_peak;
-    stats->size_use[iclass].alloc_total =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
-    stats->size_use[iclass].free_total =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
-    stats->size_use[iclass].spans_to_cache =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
-    stats->size_use[iclass].spans_from_cache =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
-    stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(
-        &heap->size_class_use[iclass].spans_from_reserved);
-    stats->size_use[iclass].map_calls =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
-  }
-#endif
-}
-
-void rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats) {
-  memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
-#if ENABLE_STATISTICS
-  stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
-  stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
-  stats->mapped_total =
-      (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
-  stats->unmapped_total =
-      (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
-  stats->huge_alloc =
-      (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
-  stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
-#endif
-#if ENABLE_GLOBAL_CACHE
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    global_cache_t *cache = &_memory_span_cache[iclass];
-    while (!atomic_cas32_acquire(&cache->lock, 1, 0))
-      _rpmalloc_spin();
-    uint32_t count = cache->count;
-#if ENABLE_UNLIMITED_CACHE
-    span_t *current_span = cache->overflow;
-    while (current_span) {
-      ++count;
-      current_span = current_span->next;
-    }
-#endif
-    atomic_store32_release(&cache->lock, 0);
-    stats->cached += count * (iclass + 1) * _memory_span_size;
-  }
-#endif
-}
-
-#if ENABLE_STATISTICS
-
-static void _memory_heap_dump_statistics(heap_t *heap, void *file) {
-  fprintf(file, "Heap %d stats:\n", heap->id);
-  fprintf(file, "Class   CurAlloc  PeakAlloc   TotAlloc    TotFree  BlkSize "
-                "BlkCount SpansCur SpansPeak  PeakAllocMiB  ToCacheMiB "
-                "FromCacheMiB FromReserveMiB MmapCalls\n");
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
-      continue;
-    fprintf(
-        file,
-        "%3u:  %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu "
-        "%9u\n",
-        (uint32_t)iclass,
-        atomic_load32(&heap->size_class_use[iclass].alloc_current),
-        heap->size_class_use[iclass].alloc_peak,
-        atomic_load32(&heap->size_class_use[iclass].alloc_total),
-        atomic_load32(&heap->size_class_use[iclass].free_total),
-        _memory_size_class[iclass].block_size,
-        _memory_size_class[iclass].block_count,
-        atomic_load32(&heap->size_class_use[iclass].spans_current),
-        heap->size_class_use[iclass].spans_peak,
-        ((size_t)heap->size_class_use[iclass].alloc_peak *
-         (size_t)_memory_size_class[iclass].block_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) *
-         _memory_span_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) *
-         _memory_span_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(
-             &heap->size_class_use[iclass].spans_from_reserved) *
-         _memory_span_size) /
-            (size_t)(1024 * 1024),
-        atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
-  }
-  fprintf(file, "Spans  Current     Peak Deferred  PeakMiB  Cached  ToCacheMiB "
-                "FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB "
-                "FromGlobalMiB  MmapCalls\n");
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    if (!atomic_load32(&heap->span_use[iclass].high) &&
-        !atomic_load32(&heap->span_use[iclass].spans_map_calls))
-      continue;
-    fprintf(
-        file,
-        "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n",
-        (uint32_t)(iclass + 1), atomic_load32(&heap->span_use[iclass].current),
-        atomic_load32(&heap->span_use[iclass].high),
-        atomic_load32(&heap->span_use[iclass].spans_deferred),
-        ((size_t)atomic_load32(&heap->span_use[iclass].high) *
-         (size_t)_memory_span_size * (iclass + 1)) /
-            (size_t)(1024 * 1024),
-#if ENABLE_THREAD_CACHE
-        (unsigned int)(!iclass ? heap->span_cache.count
-                               : heap->span_large_cache[iclass - 1].count),
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) *
-         (iclass + 1) * _memory_span_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) *
-         (iclass + 1) * _memory_span_size) /
-            (size_t)(1024 * 1024),
-#else
-        0, (size_t)0, (size_t)0,
-#endif
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) *
-         (iclass + 1) * _memory_span_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) *
-         (iclass + 1) * _memory_span_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) *
-         (size_t)_memory_span_size * (iclass + 1)) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) *
-         (size_t)_memory_span_size * (iclass + 1)) /
-            (size_t)(1024 * 1024),
-        atomic_load32(&heap->span_use[iclass].spans_map_calls));
-  }
-  fprintf(file, "Full spans: %zu\n", heap->full_span_count);
-  fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
-  fprintf(
-      file, "%17zu %17zu\n",
-      (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024),
-      (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
-}
-
-#endif
-
-void rpmalloc_dump_statistics(void *file) {
-#if ENABLE_STATISTICS
-  for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
-    heap_t *heap = _memory_heaps[list_idx];
-    while (heap) {
-      int need_dump = 0;
-      for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT);
-           ++iclass) {
-        if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
-          rpmalloc_assert(
-              !atomic_load32(&heap->size_class_use[iclass].free_total),
-              "Heap statistics counter mismatch");
-          rpmalloc_assert(
-              !atomic_load32(&heap->size_class_use[iclass].spans_map_calls),
-              "Heap statistics counter mismatch");
-          continue;
-        }
-        need_dump = 1;
-      }
-      for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT);
-           ++iclass) {
-        if (!atomic_load32(&heap->span_use[iclass].high) &&
-            !atomic_load32(&heap->span_use[iclass].spans_map_calls))
-          continue;
-        need_dump = 1;
-      }
-      if (need_dump)
-        _memory_heap_dump_statistics(heap, file);
-      heap = heap->next_heap;
-    }
-  }
-  fprintf(file, "Global stats:\n");
-  size_t huge_current =
-      (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
-  size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
-  fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
-  fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024),
-          huge_peak / (size_t)(1024 * 1024));
-
-#if ENABLE_GLOBAL_CACHE
-  fprintf(file, "GlobalCacheMiB\n");
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    global_cache_t *cache = _memory_span_cache + iclass;
-    size_t global_cache = (size_t)cache->count * iclass * _memory_span_size;
-
-    size_t global_overflow_cache = 0;
-    span_t *span = cache->overflow;
-    while (span) {
-      global_overflow_cache += iclass * _memory_span_size;
-      span = span->next;
-    }
-    if (global_cache || global_overflow_cache || cache->insert_count ||
-        cache->extract_count)
-      fprintf(file,
-              "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n",
-              iclass + 1, global_cache / (size_t)(1024 * 1024),
-              global_overflow_cache / (size_t)(1024 * 1024),
-              cache->insert_count, cache->extract_count);
-  }
-#endif
-
-  size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
-  size_t mapped_os =
-      (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
-  size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
-  size_t mapped_total =
-      (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
-  size_t unmapped_total =
-      (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
-  fprintf(
-      file,
-      "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n");
-  fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n",
-          mapped / (size_t)(1024 * 1024), mapped_os / (size_t)(1024 * 1024),
-          mapped_peak / (size_t)(1024 * 1024),
-          mapped_total / (size_t)(1024 * 1024),
-          unmapped_total / (size_t)(1024 * 1024));
-
-  fprintf(file, "\n");
-#if 0
-	int64_t allocated = atomic_load64(&_allocation_counter);
-	int64_t deallocated = atomic_load64(&_deallocation_counter);
-	fprintf(file, "Allocation count: %lli\n", allocated);
-	fprintf(file, "Deallocation count: %lli\n", deallocated);
-	fprintf(file, "Current allocations: %lli\n", (allocated - deallocated));
-	fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans));
-	fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans));
-#endif
-#endif
-  (void)sizeof(file);
-}
-
-#if RPMALLOC_FIRST_CLASS_HEAPS
-
-extern inline rpmalloc_heap_t *rpmalloc_heap_acquire(void) {
-  // Must be a pristine heap from newly mapped memory pages, or else memory
-  // blocks could already be allocated from the heap which would (wrongly) be
-  // released when heap is cleared with rpmalloc_heap_free_all(). Also heaps
-  // guaranteed to be pristine from the dedicated orphan list can be used.
-  heap_t *heap = _rpmalloc_heap_allocate(1);
-  rpmalloc_assume(heap != NULL);
-  heap->owner_thread = 0;
-  _rpmalloc_stat_inc(&_memory_active_heaps);
-  return heap;
-}
-
-extern inline void rpmalloc_heap_release(rpmalloc_heap_t *heap) {
-  if (heap)
-    _rpmalloc_heap_release(heap, 1, 1);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
-#if ENABLE_VALIDATE_ARGS
-  if (size >= MAX_ALLOC_SIZE) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  return _rpmalloc_allocate(heap, size);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
-                            size_t size) {
-#if ENABLE_VALIDATE_ARGS
-  if (size >= MAX_ALLOC_SIZE) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  return _rpmalloc_aligned_allocate(heap, alignment, size);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
-  return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
-                             size_t num, size_t size) {
-  size_t total;
-#if ENABLE_VALIDATE_ARGS
-#if PLATFORM_WINDOWS
-  int err = SizeTMult(num, size, &total);
-  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#else
-  int err = __builtin_umull_overflow(num, size, &total);
-  if (err || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-#else
-  total = num * size;
-#endif
-  void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
-  if (block)
-    memset(block, 0, total);
-  return block;
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
-                      unsigned int flags) {
-#if ENABLE_VALIDATE_ARGS
-  if (size >= MAX_ALLOC_SIZE) {
-    errno = EINVAL;
-    return ptr;
-  }
-#endif
-  return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr,
-                              size_t alignment, size_t size,
-                              unsigned int flags) {
-#if ENABLE_VALIDATE_ARGS
-  if ((size + alignment < size) || (alignment > _memory_page_size)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
-}
-
-extern inline void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
-  (void)sizeof(heap);
-  _rpmalloc_deallocate(ptr);
-}
-
-extern inline void rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
-  span_t *span;
-  span_t *next_span;
-
-  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    span = heap->size_class[iclass].partial_span;
-    while (span) {
-      next_span = span->next;
-      _rpmalloc_heap_cache_insert(heap, span);
-      span = next_span;
-    }
-    heap->size_class[iclass].partial_span = 0;
-    span = heap->full_span[iclass];
-    while (span) {
-      next_span = span->next;
-      _rpmalloc_heap_cache_insert(heap, span);
-      span = next_span;
-    }
-
-    span = heap->size_class[iclass].cache;
-    if (span)
-      _rpmalloc_heap_cache_insert(heap, span);
-    heap->size_class[iclass].cache = 0;
-  }
-  memset(heap->size_class, 0, sizeof(heap->size_class));
-  memset(heap->full_span, 0, sizeof(heap->full_span));
-
-  span = heap->large_huge_span;
-  while (span) {
-    next_span = span->next;
-    if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE))
-      _rpmalloc_deallocate_huge(span);
-    else
-      _rpmalloc_heap_cache_insert(heap, span);
-    span = next_span;
-  }
-  heap->large_huge_span = 0;
-  heap->full_span_count = 0;
-
-#if ENABLE_THREAD_CACHE
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    span_cache_t *span_cache;
-    if (!iclass)
-      span_cache = &heap->span_cache;
-    else
-      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
-    if (!span_cache->count)
-      continue;
-#if ENABLE_GLOBAL_CACHE
-    _rpmalloc_stat_add64(&heap->thread_to_global,
-                         span_cache->count * (iclass + 1) * _memory_span_size);
-    _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
-                       span_cache->count);
-    _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
-                                        span_cache->count);
-#else
-    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
-      _rpmalloc_span_unmap(span_cache->span[ispan]);
-#endif
-    span_cache->count = 0;
-  }
-#endif
-
-#if ENABLE_STATISTICS
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
-    atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
-  }
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    atomic_store32(&heap->span_use[iclass].current, 0);
-  }
-#endif
-}
-
-extern inline void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
-  heap_t *prev_heap = get_thread_heap_raw();
-  if (prev_heap != heap) {
-    set_thread_heap(heap);
-    if (prev_heap)
-      rpmalloc_heap_release(prev_heap);
-  }
-}
-
-extern inline rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr) {
-  // Grab the span, and then the heap from the span
-  span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
-  if (span) {
-    return span->heap;
-  }
-  return 0;
-}
-
-#endif
-
-#if ENABLE_PRELOAD || ENABLE_OVERRIDE
-
-#include "malloc.c"
-
-#endif
-
-void rpmalloc_linker_reference(void) { (void)sizeof(_rpmalloc_initialized); }
+//===---------------------- rpmalloc.c ------------------*- C -*-=============//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This library provides a cross-platform lock free thread caching malloc
+// implementation in C11.
+//
+//===----------------------------------------------------------------------===//
+
+#include "rpmalloc.h"
+
+////////////
+///
+/// Build time configurable limits
+///
+//////
+
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wunused-macros"
+#pragma clang diagnostic ignored "-Wunused-function"
+#if __has_warning("-Wreserved-identifier")
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#endif
+#if __has_warning("-Wstatic-in-inline")
+#pragma clang diagnostic ignored "-Wstatic-in-inline"
+#endif
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wunused-macros"
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#if !defined(__has_builtin)
+#define __has_builtin(b) 0
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+
+#if __has_builtin(__builtin_memcpy_inline)
+#define _rpmalloc_memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
+#else
+#define _rpmalloc_memcpy_const(x, y, s)                                        \
+  do {                                                                         \
+    _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0),       \
+                   "len must be a constant integer");                          \
+    memcpy(x, y, s);                                                           \
+  } while (0)
+#endif
+
+#if __has_builtin(__builtin_memset_inline)
+#define _rpmalloc_memset_const(x, y, s) __builtin_memset_inline(x, y, s)
+#else
+#define _rpmalloc_memset_const(x, y, s)                                        \
+  do {                                                                         \
+    _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0),       \
+                   "len must be a constant integer");                          \
+    memset(x, y, s);                                                           \
+  } while (0)
+#endif
+#else
+#define _rpmalloc_memcpy_const(x, y, s) memcpy(x, y, s)
+#define _rpmalloc_memset_const(x, y, s) memset(x, y, s)
+#endif
+
+#if __has_builtin(__builtin_assume)
+#define rpmalloc_assume(cond) __builtin_assume(cond)
+#elif defined(__GNUC__)
+#define rpmalloc_assume(cond)                                                  \
+  do {                                                                         \
+    if (!__builtin_expect(cond, 0))                                            \
+      __builtin_unreachable();                                                 \
+  } while (0)
+#elif defined(_MSC_VER)
+#define rpmalloc_assume(cond) __assume(cond)
+#else
+#define rpmalloc_assume(cond) 0
+#endif
+
+#ifndef HEAP_ARRAY_SIZE
+//! Size of heap hashmap
+#define HEAP_ARRAY_SIZE 47
+#endif
+#ifndef ENABLE_THREAD_CACHE
+//! Enable per-thread cache
+#define ENABLE_THREAD_CACHE 1
+#endif
+#ifndef ENABLE_GLOBAL_CACHE
+//! Enable global cache shared between all threads, requires thread cache
+#define ENABLE_GLOBAL_CACHE 1
+#endif
+#ifndef ENABLE_VALIDATE_ARGS
+//! Enable validation of args to public entry points
+#define ENABLE_VALIDATE_ARGS 0
+#endif
+#ifndef ENABLE_STATISTICS
+//! Enable statistics collection
+#define ENABLE_STATISTICS 0
+#endif
+#ifndef ENABLE_ASSERTS
+//! Enable asserts
+#define ENABLE_ASSERTS 0
+#endif
+#ifndef ENABLE_OVERRIDE
+//! Override standard library malloc/free and new/delete entry points
+#define ENABLE_OVERRIDE 0
+#endif
+#ifndef ENABLE_PRELOAD
+//! Support preloading
+#define ENABLE_PRELOAD 0
+#endif
+#ifndef DISABLE_UNMAP
+//! Disable unmapping memory pages (also enables unlimited cache)
+#define DISABLE_UNMAP 0
+#endif
+#ifndef ENABLE_UNLIMITED_CACHE
+//! Enable unlimited global cache (no unmapping until finalization)
+#define ENABLE_UNLIMITED_CACHE 0
+#endif
+#ifndef ENABLE_ADAPTIVE_THREAD_CACHE
+//! Enable adaptive thread cache size based on use heuristics
+#define ENABLE_ADAPTIVE_THREAD_CACHE 0
+#endif
+#ifndef DEFAULT_SPAN_MAP_COUNT
+//! Default number of spans to map in call to map more virtual memory (default
+//! values yield 4MiB here)
+#define DEFAULT_SPAN_MAP_COUNT 64
+#endif
+#ifndef GLOBAL_CACHE_MULTIPLIER
+//! Multiplier for global cache
+#define GLOBAL_CACHE_MULTIPLIER 8
+#endif
+
+#if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
+#error Must use global cache if unmap is disabled
+#endif
+
+#if DISABLE_UNMAP
+#undef ENABLE_UNLIMITED_CACHE
+#define ENABLE_UNLIMITED_CACHE 1
+#endif
+
+#if !ENABLE_GLOBAL_CACHE
+#undef ENABLE_UNLIMITED_CACHE
+#define ENABLE_UNLIMITED_CACHE 0
+#endif
+
+#if !ENABLE_THREAD_CACHE
+#undef ENABLE_ADAPTIVE_THREAD_CACHE
+#define ENABLE_ADAPTIVE_THREAD_CACHE 0
+#endif
+
+#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
+#define PLATFORM_WINDOWS 1
+#define PLATFORM_POSIX 0
+#else
+#define PLATFORM_WINDOWS 0
+#define PLATFORM_POSIX 1
+#endif
+
+/// Platform and arch specifics
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma warning(disable : 5105)
+#ifndef FORCEINLINE
+#define FORCEINLINE inline __forceinline
+#endif
+#define _Static_assert static_assert
+#else
+#ifndef FORCEINLINE
+#define FORCEINLINE inline __attribute__((__always_inline__))
+#endif
+#endif
+#if PLATFORM_WINDOWS
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#if ENABLE_VALIDATE_ARGS
+#include <intsafe.h>
+#endif
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#if defined(__linux__) || defined(__ANDROID__)
+#include <sys/prctl.h>
+#if !defined(PR_SET_VMA)
+#define PR_SET_VMA 0x53564d41
+#define PR_SET_VMA_ANON_NAME 0
+#endif
+#endif
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+#include <mach/mach_vm.h>
+#include <mach/vm_statistics.h>
+#endif
+#include <pthread.h>
+#endif
+#if defined(__HAIKU__) || defined(__TINYC__)
+#include <pthread.h>
+#endif
+#endif
+
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+#include <fibersapi.h>
+static DWORD fls_key;
+#endif
+
+#if PLATFORM_POSIX
+#include <sched.h>
+#include <sys/mman.h>
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#define MAP_HUGETLB MAP_ALIGNED_SUPER
+#ifndef PROT_MAX
+#define PROT_MAX(f) 0
+#endif
+#else
+#define PROT_MAX(f) 0
+#endif
+#ifdef __sun
+extern int madvise(caddr_t, size_t, int);
+#endif
+#ifndef MAP_UNINITIALIZED
+#define MAP_UNINITIALIZED 0
+#endif
+#endif
+#include <errno.h>
+
+#if ENABLE_ASSERTS
+#undef NDEBUG
+#if defined(_MSC_VER) && !defined(_DEBUG)
+#define _DEBUG
+#endif
+#include <assert.h>
+#define RPMALLOC_TOSTRING_M(x) #x
+#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
+#define rpmalloc_assert(truth, message)                                        \
+  do {                                                                         \
+    if (!(truth)) {                                                            \
+      if (_memory_config.error_callback) {                                     \
+        _memory_config.error_callback(message " (" RPMALLOC_TOSTRING(          \
+            truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__));          \
+      } else {                                                                 \
+        assert((truth) && message);                                            \
+      }                                                                        \
+    }                                                                          \
+  } while (0)
+#else
+#define rpmalloc_assert(truth, message)                                        \
+  do {                                                                         \
+  } while (0)
+#endif
+#if ENABLE_STATISTICS
+#include <stdio.h>
+#endif
+
+//////
+///
+/// Atomic access abstraction (since MSVC does not do C11 yet)
+///
+//////
+
+#if defined(_MSC_VER) && !defined(__clang__)
+
+typedef volatile long atomic32_t;
+typedef volatile long long atomic64_t;
+typedef volatile void *atomicptr_t;
+
+static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return *src; }
+static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
+  *dst = val;
+}
+static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
+  return (int32_t)InterlockedIncrement(val);
+}
+static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
+  return (int32_t)InterlockedDecrement(val);
+}
+static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
+  return (int32_t)InterlockedExchangeAdd(val, add) + add;
+}
+static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
+                                            int32_t ref) {
+  return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0;
+}
+static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
+  *dst = val;
+}
+static FORCEINLINE int64_t atomic_load64(atomic64_t *src) { return *src; }
+static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
+  return (int64_t)InterlockedExchangeAdd64(val, add) + add;
+}
+static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
+  return (void *)*src;
+}
+static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
+  *dst = val;
+}
+static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
+  *dst = val;
+}
+static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
+                                                     void *val) {
+  return (void *)InterlockedExchangePointer((void *volatile *)dst, val);
+}
+static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
+  return (InterlockedCompareExchangePointer((void *volatile *)dst, val, ref) ==
+          ref)
+             ? 1
+             : 0;
+}
+
+#define EXPECTED(x) (x)
+#define UNEXPECTED(x) (x)
+
+#else
+
+#include <stdatomic.h>
+
+typedef volatile _Atomic(int32_t) atomic32_t;
+typedef volatile _Atomic(int64_t) atomic64_t;
+typedef volatile _Atomic(void *) atomicptr_t;
+
+static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
+  return atomic_load_explicit(src, memory_order_relaxed);
+}
+static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
+  atomic_store_explicit(dst, val, memory_order_relaxed);
+}
+static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
+  return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1;
+}
+static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
+  return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1;
+}
+static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
+  return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
+}
+static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
+                                            int32_t ref) {
+  return atomic_compare_exchange_weak_explicit(
+      dst, &ref, val, memory_order_acquire, memory_order_relaxed);
+}
+static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
+  atomic_store_explicit(dst, val, memory_order_release);
+}
+static FORCEINLINE int64_t atomic_load64(atomic64_t *val) {
+  return atomic_load_explicit(val, memory_order_relaxed);
+}
+static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
+  return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
+}
+static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
+  return atomic_load_explicit(src, memory_order_relaxed);
+}
+static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
+  atomic_store_explicit(dst, val, memory_order_relaxed);
+}
+static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
+  atomic_store_explicit(dst, val, memory_order_release);
+}
+static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
+                                                     void *val) {
+  return atomic_exchange_explicit(dst, val, memory_order_acquire);
+}
+static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
+  return atomic_compare_exchange_weak_explicit(
+      dst, &ref, val, memory_order_relaxed, memory_order_relaxed);
+}
+
+#define EXPECTED(x) __builtin_expect((x), 1)
+#define UNEXPECTED(x) __builtin_expect((x), 0)
+
+#endif
+
+////////////
+///
+/// Statistics related functions (evaluate to nothing when statistics not
+/// enabled)
+///
+//////
+
+#if ENABLE_STATISTICS
+#define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
+#define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
+#define _rpmalloc_stat_add(counter, value)                                     \
+  atomic_add32(counter, (int32_t)(value))
+#define _rpmalloc_stat_add64(counter, value)                                   \
+  atomic_add64(counter, (int64_t)(value))
+#define _rpmalloc_stat_add_peak(counter, value, peak)                          \
+  do {                                                                         \
+    int32_t _cur_count = atomic_add32(counter, (int32_t)(value));              \
+    if (_cur_count > (peak))                                                   \
+      peak = _cur_count;                                                       \
+  } while (0)
+#define _rpmalloc_stat_sub(counter, value)                                     \
+  atomic_add32(counter, -(int32_t)(value))
+#define _rpmalloc_stat_inc_alloc(heap, class_idx)                              \
+  do {                                                                         \
+    int32_t alloc_current =                                                    \
+        atomic_incr32(&heap->size_class_use[class_idx].alloc_current);         \
+    if (alloc_current > heap->size_class_use[class_idx].alloc_peak)            \
+      heap->size_class_use[class_idx].alloc_peak = alloc_current;              \
+    atomic_incr32(&heap->size_class_use[class_idx].alloc_total);               \
+  } while (0)
+#define _rpmalloc_stat_inc_free(heap, class_idx)                               \
+  do {                                                                         \
+    atomic_decr32(&heap->size_class_use[class_idx].alloc_current);             \
+    atomic_incr32(&heap->size_class_use[class_idx].free_total);                \
+  } while (0)
+#else
+#define _rpmalloc_stat_inc(counter)                                            \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_dec(counter)                                            \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_add(counter, value)                                     \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_add64(counter, value)                                   \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_add_peak(counter, value, peak)                          \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_sub(counter, value)                                     \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_inc_alloc(heap, class_idx)                              \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_inc_free(heap, class_idx)                               \
+  do {                                                                         \
+  } while (0)
+#endif
+
+///
+/// Preconfigured limits and sizes
+///
+
+//! Granularity of a small allocation block (must be power of two)
+#define SMALL_GRANULARITY 16
+//! Small granularity shift count
+#define SMALL_GRANULARITY_SHIFT 4
+//! Number of small block size classes
+#define SMALL_CLASS_COUNT 65
+//! Maximum size of a small block
+#define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
+//! Granularity of a medium allocation block
+#define MEDIUM_GRANULARITY 512
+//! Medium granularity shift count
+#define MEDIUM_GRANULARITY_SHIFT 9
+//! Number of medium block size classes
+#define MEDIUM_CLASS_COUNT 61
+//! Total number of small + medium size classes
+#define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
+//! Number of large block size classes
+#define LARGE_CLASS_COUNT 63
+//! Maximum size of a medium block
+#define MEDIUM_SIZE_LIMIT                                                      \
+  (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
+//! Maximum size of a large block
+#define LARGE_SIZE_LIMIT                                                       \
+  ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
+//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power
+//! of two)
+#define SPAN_HEADER_SIZE 128
+//! Number of spans in thread cache
+#define MAX_THREAD_SPAN_CACHE 400
+//! Number of spans to transfer between thread and global cache
+#define THREAD_SPAN_CACHE_TRANSFER 64
+//! Number of spans in thread cache for large spans (must be greater than
+//! LARGE_CLASS_COUNT / 2)
+#define MAX_THREAD_SPAN_LARGE_CACHE 100
+//! Number of spans to transfer between thread and global cache for large spans
+#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
+
+_Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0,
+               "Small granularity must be power of two");
+_Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0,
+               "Span header size must be power of two");
+
+#if ENABLE_VALIDATE_ARGS
+//! Maximum allocation size to avoid integer overflow
+#undef MAX_ALLOC_SIZE
+#define MAX_ALLOC_SIZE (((size_t) - 1) - _memory_span_size)
+#endif
+
+#define pointer_offset(ptr, ofs) (void *)((char *)(ptr) + (ptrdiff_t)(ofs))
+#define pointer_diff(first, second)                                            \
+  (ptrdiff_t)((const char *)(first) - (const char *)(second))
+
+#define INVALID_POINTER ((void *)((uintptr_t) - 1))
+
+#define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
+#define SIZE_CLASS_HUGE ((uint32_t) - 1)
+
+////////////
+///
+/// Data types
+///
+//////
+
+//! A memory heap, per thread
+typedef struct heap_t heap_t;
+//! Span of memory pages
+typedef struct span_t span_t;
+//! Span list
+typedef struct span_list_t span_list_t;
+//! Span active data
+typedef struct span_active_t span_active_t;
+//! Size class definition
+typedef struct size_class_t size_class_t;
+//! Global cache
+typedef struct global_cache_t global_cache_t;
+
+//! Flag indicating span is the first (master) span of a split superspan
+#define SPAN_FLAG_MASTER 1U
+//! Flag indicating span is a secondary (sub) span of a split superspan
+#define SPAN_FLAG_SUBSPAN 2U
+//! Flag indicating span has blocks with increased alignment
+#define SPAN_FLAG_ALIGNED_BLOCKS 4U
+//! Flag indicating an unmapped master span
+#define SPAN_FLAG_UNMAPPED_MASTER 8U
+
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+struct span_use_t {
+  //! Current number of spans used (actually used, not in cache)
+  atomic32_t current;
+  //! High water mark of spans used
+  atomic32_t high;
+#if ENABLE_STATISTICS
+  //! Number of spans in deferred list
+  atomic32_t spans_deferred;
+  //! Number of spans transitioned to global cache
+  atomic32_t spans_to_global;
+  //! Number of spans transitioned from global cache
+  atomic32_t spans_from_global;
+  //! Number of spans transitioned to thread cache
+  atomic32_t spans_to_cache;
+  //! Number of spans transitioned from thread cache
+  atomic32_t spans_from_cache;
+  //! Number of spans transitioned to reserved state
+  atomic32_t spans_to_reserved;
+  //! Number of spans transitioned from reserved state
+  atomic32_t spans_from_reserved;
+  //! Number of raw memory map calls
+  atomic32_t spans_map_calls;
+#endif
+};
+typedef struct span_use_t span_use_t;
+#endif
+
+#if ENABLE_STATISTICS
+struct size_class_use_t {
+  //! Current number of allocations
+  atomic32_t alloc_current;
+  //! Peak number of allocations
+  int32_t alloc_peak;
+  //! Total number of allocations
+  atomic32_t alloc_total;
+  //! Total number of frees
+  atomic32_t free_total;
+  //! Number of spans in use
+  atomic32_t spans_current;
+  //! Number of spans transitioned to cache
+  int32_t spans_peak;
+  //! Number of spans transitioned to cache
+  atomic32_t spans_to_cache;
+  //! Number of spans transitioned from cache
+  atomic32_t spans_from_cache;
+  //! Number of spans transitioned from reserved state
+  atomic32_t spans_from_reserved;
+  //! Number of spans mapped
+  atomic32_t spans_map_calls;
+  int32_t unused;
+};
+typedef struct size_class_use_t size_class_use_t;
+#endif
+
+// A span can either represent a single span of memory pages with size declared
+// by span_map_count configuration variable, or a set of spans in a continuous
+// region, a super span. Any reference to the term "span" usually refers to both
+// a single span or a super span. A super span can further be divided into
+// multiple spans (or this, super spans), where the first (super)span is the
+// master and subsequent (super)spans are subspans. The master span keeps track
+// of how many subspans that are still alive and mapped in virtual memory, and
+// once all subspans and master have been unmapped the entire superspan region
+// is released and unmapped (on Windows for example, the entire superspan range
+// has to be released in the same call to release the virtual memory range, but
+// individual subranges can be decommitted individually to reduce physical
+// memory use).
+struct span_t {
+  //! Free list
+  void *free_list;
+  //! Total block count of size class
+  uint32_t block_count;
+  //! Size class
+  uint32_t size_class;
+  //! Index of last block initialized in free list
+  uint32_t free_list_limit;
+  //! Number of used blocks remaining when in partial state
+  uint32_t used_count;
+  //! Deferred free list
+  atomicptr_t free_list_deferred;
+  //! Size of deferred free list, or list of spans when part of a cache list
+  uint32_t list_size;
+  //! Size of a block
+  uint32_t block_size;
+  //! Flags and counters
+  uint32_t flags;
+  //! Number of spans
+  uint32_t span_count;
+  //! Total span counter for master spans
+  uint32_t total_spans;
+  //! Offset from master span for subspans
+  uint32_t offset_from_master;
+  //! Remaining span counter, for master spans
+  atomic32_t remaining_spans;
+  //! Alignment offset
+  uint32_t align_offset;
+  //! Owning heap
+  heap_t *heap;
+  //! Next span
+  span_t *next;
+  //! Previous span
+  span_t *prev;
+};
+_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
+
+struct span_cache_t {
+  size_t count;
+  span_t *span[MAX_THREAD_SPAN_CACHE];
+};
+typedef struct span_cache_t span_cache_t;
+
+struct span_large_cache_t {
+  size_t count;
+  span_t *span[MAX_THREAD_SPAN_LARGE_CACHE];
+};
+typedef struct span_large_cache_t span_large_cache_t;
+
+struct heap_size_class_t {
+  //! Free list of active span
+  void *free_list;
+  //! Double linked list of partially used spans with free blocks.
+  //  Previous span pointer in head points to tail span of list.
+  span_t *partial_span;
+  //! Early level cache of fully free spans
+  span_t *cache;
+};
+typedef struct heap_size_class_t heap_size_class_t;
+
+// Control structure for a heap, either a thread heap or a first class heap if
+// enabled
+struct heap_t {
+  //! Owning thread ID
+  uintptr_t owner_thread;
+  //! Free lists for each size class
+  heap_size_class_t size_class[SIZE_CLASS_COUNT];
+#if ENABLE_THREAD_CACHE
+  //! Arrays of fully freed spans, single span
+  span_cache_t span_cache;
+#endif
+  //! List of deferred free spans (single linked list)
+  atomicptr_t span_free_deferred;
+  //! Number of full spans
+  size_t full_span_count;
+  //! Mapped but unused spans
+  span_t *span_reserve;
+  //! Master span for mapped but unused spans
+  span_t *span_reserve_master;
+  //! Number of mapped but unused spans
+  uint32_t spans_reserved;
+  //! Child count
+  atomic32_t child_count;
+  //! Next heap in id list
+  heap_t *next_heap;
+  //! Next heap in orphan list
+  heap_t *next_orphan;
+  //! Heap ID
+  int32_t id;
+  //! Finalization state flag
+  int finalize;
+  //! Master heap owning the memory pages
+  heap_t *master_heap;
+#if ENABLE_THREAD_CACHE
+  //! Arrays of fully freed spans, large spans with > 1 span count
+  span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1];
+#endif
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  //! Double linked list of fully utilized spans with free blocks for each size
+  //! class.
+  //  Previous span pointer in head points to tail span of list.
+  span_t *full_span[SIZE_CLASS_COUNT];
+  //! Double linked list of large and huge spans allocated by this heap
+  span_t *large_huge_span;
+#endif
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+  //! Current and high water mark of spans used per span count
+  span_use_t span_use[LARGE_CLASS_COUNT];
+#endif
+#if ENABLE_STATISTICS
+  //! Allocation stats per size class
+  size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
+  //! Number of bytes transitioned thread -> global
+  atomic64_t thread_to_global;
+  //! Number of bytes transitioned global -> thread
+  atomic64_t global_to_thread;
+#endif
+};
+
+// Size class for defining a block size bucket
+struct size_class_t {
+  //! Size of blocks in this class
+  uint32_t block_size;
+  //! Number of blocks in each chunk
+  uint16_t block_count;
+  //! Class index this class is merged with
+  uint16_t class_idx;
+};
+_Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
+
+struct global_cache_t {
+  //! Cache lock
+  atomic32_t lock;
+  //! Cache count
+  uint32_t count;
+#if ENABLE_STATISTICS
+  //! Insert count
+  size_t insert_count;
+  //! Extract count
+  size_t extract_count;
+#endif
+  //! Cached spans
+  span_t *span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE];
+  //! Unlimited cache overflow
+  span_t *overflow;
+};
+
+////////////
+///
+/// Global data
+///
+//////
+
+//! Default span size (64KiB)
+#define _memory_default_span_size (64 * 1024)
+#define _memory_default_span_size_shift 16
+#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1)))
+
+//! Initialized flag
+static int _rpmalloc_initialized;
+//! Main thread ID
+static uintptr_t _rpmalloc_main_thread_id;
+//! Configuration
+static rpmalloc_config_t _memory_config;
+//! Memory page size
+static size_t _memory_page_size;
+//! Shift to divide by page size
+static size_t _memory_page_size_shift;
+//! Granularity at which memory pages are mapped by OS
+static size_t _memory_map_granularity;
+#if RPMALLOC_CONFIGURABLE
+//! Size of a span of memory pages
+static size_t _memory_span_size;
+//! Shift to divide by span size
+static size_t _memory_span_size_shift;
+//! Mask to get to start of a memory span
+static uintptr_t _memory_span_mask;
+#else
+//! Hardwired span size
+#define _memory_span_size _memory_default_span_size
+#define _memory_span_size_shift _memory_default_span_size_shift
+#define _memory_span_mask _memory_default_span_mask
+#endif
+//! Number of spans to map in each map call
+static size_t _memory_span_map_count;
+//! Number of spans to keep reserved in each heap
+static size_t _memory_heap_reserve_count;
+//! Global size classes
+static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
+//! Run-time size limit of medium blocks
+static size_t _memory_medium_size_limit;
+//! Heap ID counter
+static atomic32_t _memory_heap_id;
+//! Huge page support
+static int _memory_huge_pages;
+#if ENABLE_GLOBAL_CACHE
+//! Global span cache
+static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
+#endif
+//! Global reserved spans
+static span_t *_memory_global_reserve;
+//! Global reserved count
+static size_t _memory_global_reserve_count;
+//! Global reserved master
+static span_t *_memory_global_reserve_master;
+//! All heaps
+static heap_t *_memory_heaps[HEAP_ARRAY_SIZE];
+//! Used to restrict access to mapping memory for huge pages
+static atomic32_t _memory_global_lock;
+//! Orphaned heaps
+static heap_t *_memory_orphan_heaps;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+//! Orphaned heaps (first class heaps)
+static heap_t *_memory_first_class_orphan_heaps;
+#endif
+#if ENABLE_STATISTICS
+//! Allocations counter
+static atomic64_t _allocation_counter;
+//! Deallocations counter
+static atomic64_t _deallocation_counter;
+//! Active heap count
+static atomic32_t _memory_active_heaps;
+//! Number of currently mapped memory pages
+static atomic32_t _mapped_pages;
+//! Peak number of concurrently mapped memory pages
+static int32_t _mapped_pages_peak;
+//! Number of mapped master spans
+static atomic32_t _master_spans;
+//! Number of unmapped dangling master spans
+static atomic32_t _unmapped_master_spans;
+//! Running counter of total number of mapped memory pages since start
+static atomic32_t _mapped_total;
+//! Running counter of total number of unmapped memory pages since start
+static atomic32_t _unmapped_total;
+//! Number of currently mapped memory pages in OS calls
+static atomic32_t _mapped_pages_os;
+//! Number of currently allocated pages in huge allocations
+static atomic32_t _huge_pages_current;
+//! Peak number of currently allocated pages in huge allocations
+static int32_t _huge_pages_peak;
+#endif
+
+////////////
+///
+/// Thread local heap and ID
+///
+//////
+
+//! Current thread heap
+#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
+    defined(__TINYC__)
+static pthread_key_t _memory_thread_heap;
+#else
+#ifdef _MSC_VER
+#define _Thread_local __declspec(thread)
+#define TLS_MODEL
+#else
+#ifndef __HAIKU__
+#define TLS_MODEL __attribute__((tls_model("initial-exec")))
+#else
+#define TLS_MODEL
+#endif
+#if !defined(__clang__) && defined(__GNUC__)
+#define _Thread_local __thread
+#endif
+#endif
+static _Thread_local heap_t *_memory_thread_heap TLS_MODEL;
+#endif
+
+static inline heap_t *get_thread_heap_raw(void) {
+#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
+  return pthread_getspecific(_memory_thread_heap);
+#else
+  return _memory_thread_heap;
+#endif
+}
+
+//! Get the current thread heap
+static inline heap_t *get_thread_heap(void) {
+  heap_t *heap = get_thread_heap_raw();
+#if ENABLE_PRELOAD
+  if (EXPECTED(heap != 0))
+    return heap;
+  rpmalloc_initialize();
+  return get_thread_heap_raw();
+#else
+  return heap;
+#endif
+}
+
+//! Fast thread ID
+static inline uintptr_t get_thread_id(void) {
+#if defined(_WIN32)
+  return (uintptr_t)((void *)NtCurrentTeb());
+#elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
+  uintptr_t tid;
+#if defined(__i386__)
+  __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
+#elif defined(__x86_64__)
+#if defined(__MACH__)
+  __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
+#else
+  __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
+#endif
+#elif defined(__arm__)
+  __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
+#elif defined(__aarch64__)
+#if defined(__MACH__)
+  // tpidr_el0 likely unused, always return 0 on iOS
+  __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
+#else
+  __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
+#endif
+#else
+#error This platform needs implementation of get_thread_id()
+#endif
+  return tid;
+#else
+#error This platform needs implementation of get_thread_id()
+#endif
+}
+
+//! Set the current thread heap
+static void set_thread_heap(heap_t *heap) {
+#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
+    defined(__TINYC__)
+  pthread_setspecific(_memory_thread_heap, heap);
+#else
+  _memory_thread_heap = heap;
+#endif
+  if (heap)
+    heap->owner_thread = get_thread_id();
+}
+
+//! Set main thread ID
+extern void rpmalloc_set_main_thread(void);
+
+void rpmalloc_set_main_thread(void) {
+  _rpmalloc_main_thread_id = get_thread_id();
+}
+
+static void _rpmalloc_spin(void) {
+#if defined(_MSC_VER)
+#if defined(_M_ARM64)
+  __yield();
+#else
+  _mm_pause();
+#endif
+#elif defined(__x86_64__) || defined(__i386__)
+  __asm__ volatile("pause" ::: "memory");
+#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
+  __asm__ volatile("yield" ::: "memory");
+#elif defined(__powerpc__) || defined(__powerpc64__)
+  // No idea if ever been compiled in such archs but ... as precaution
+  __asm__ volatile("or 27,27,27");
+#elif defined(__sparc__)
+  __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0");
+#else
+  struct timespec ts = {0};
+  nanosleep(&ts, 0);
+#endif
+}
+
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+static void NTAPI _rpmalloc_thread_destructor(void *value) {
+#if ENABLE_OVERRIDE
+  // If this is called on main thread it means rpmalloc_finalize
+  // has not been called and shutdown is forced (through _exit) or unclean
+  if (get_thread_id() == _rpmalloc_main_thread_id)
+    return;
+#endif
+  if (value)
+    rpmalloc_thread_finalize(1);
+}
+#endif
+
+////////////
+///
+/// Low level memory map/unmap
+///
+//////
+
+static void _rpmalloc_set_name(void *address, size_t size) {
+#if defined(__linux__) || defined(__ANDROID__)
+  const char *name = _memory_huge_pages ? _memory_config.huge_page_name
+                                        : _memory_config.page_name;
+  if (address == MAP_FAILED || !name)
+    return;
+  // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails
+  // (e.g. invalid name) it is a no-op basically.
+  (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size,
+              (uintptr_t)name);
+#else
+  (void)sizeof(size);
+  (void)sizeof(address);
+#endif
+}
+
+//! Map more virtual memory
+//  size is number of bytes to map
+//  offset receives the offset in bytes from start of mapped region
+//  returns address to start of mapped region to use
+static void *_rpmalloc_mmap(size_t size, size_t *offset) {
+  rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size");
+  rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
+  void *address = _memory_config.memory_map(size, offset);
+  if (EXPECTED(address != 0)) {
+    _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift),
+                            _mapped_pages_peak);
+    _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
+  }
+  return address;
+}
+
+//! Unmap virtual memory
+//  address is the memory address to unmap, as returned from _memory_map
+//  size is the number of bytes to unmap, which might be less than full region
+//  for a partial unmap offset is the offset in bytes to the actual mapped
+//  region, as set by _memory_map release is set to 0 for partial unmap, or size
+//  of entire range for a full unmap
+static void _rpmalloc_unmap(void *address, size_t size, size_t offset,
+                            size_t release) {
+  rpmalloc_assert(!release || (release >= size), "Invalid unmap size");
+  rpmalloc_assert(!release || (release >= _memory_page_size),
+                  "Invalid unmap size");
+  if (release) {
+    rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size");
+    _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
+    _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
+  }
+  _memory_config.memory_unmap(address, size, offset, release);
+}
+
+//! Default implementation to map new pages to virtual memory
+static void *_rpmalloc_mmap_os(size_t size, size_t *offset) {
+  // Either size is a heap (a single page) or a (multiple) span - we only need
+  // to align spans, and only if larger than map granularity
+  size_t padding = ((size >= _memory_span_size) &&
+                    (_memory_span_size > _memory_map_granularity))
+                       ? _memory_span_size
+                       : 0;
+  rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
+#if PLATFORM_WINDOWS
+  // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not
+  // allocated unless/until the virtual addresses are actually accessed"
+  void *ptr = VirtualAlloc(0, size + padding,
+                           (_memory_huge_pages ? MEM_LARGE_PAGES : 0) |
+                               MEM_RESERVE | MEM_COMMIT,
+                           PAGE_READWRITE);
+  if (!ptr) {
+    if (_memory_config.map_fail_callback) {
+      if (_memory_config.map_fail_callback(size + padding))
+        return _rpmalloc_mmap_os(size, offset);
+    } else {
+      rpmalloc_assert(ptr, "Failed to map virtual memory block");
+    }
+    return 0;
+  }
+#else
+  int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
+#if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+  int fd = (int)VM_MAKE_TAG(240U);
+  if (_memory_huge_pages)
+    fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
+  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
+#elif defined(MAP_HUGETLB)
+  void *ptr = mmap(0, size + padding,
+                   PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE),
+                   (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
+#if defined(MADV_HUGEPAGE)
+  // In some configurations, huge pages allocations might fail thus
+  // we fallback to normal allocations and promote the region as transparent
+  // huge page
+  if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) {
+    ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
+    if (ptr && ptr != MAP_FAILED) {
+      int prm = madvise(ptr, size + padding, MADV_HUGEPAGE);
+      (void)prm;
+      rpmalloc_assert((prm == 0), "Failed to promote the page to THP");
+    }
+  }
+#endif
+  _rpmalloc_set_name(ptr, size + padding);
+#elif defined(MAP_ALIGNED)
+  const size_t align =
+      (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1));
+  void *ptr =
+      mmap(0, size + padding, PROT_READ | PROT_WRITE,
+           (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0);
+#elif defined(MAP_ALIGN)
+  caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0);
+  void *ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE,
+                   (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
+#else
+  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
+#endif
+  if ((ptr == MAP_FAILED) || !ptr) {
+    if (_memory_config.map_fail_callback) {
+      if (_memory_config.map_fail_callback(size + padding))
+        return _rpmalloc_mmap_os(size, offset);
+    } else if (errno != ENOMEM) {
+      rpmalloc_assert((ptr != MAP_FAILED) && ptr,
+                      "Failed to map virtual memory block");
+    }
+    return 0;
+  }
+#endif
+  _rpmalloc_stat_add(&_mapped_pages_os,
+                     (int32_t)((size + padding) >> _memory_page_size_shift));
+  if (padding) {
+    size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
+    rpmalloc_assert(final_padding <= _memory_span_size,
+                    "Internal failure in padding");
+    rpmalloc_assert(final_padding <= padding, "Internal failure in padding");
+    rpmalloc_assert(!(final_padding % 8), "Internal failure in padding");
+    ptr = pointer_offset(ptr, final_padding);
+    *offset = final_padding >> 3;
+  }
+  rpmalloc_assert((size < _memory_span_size) ||
+                      !((uintptr_t)ptr & ~_memory_span_mask),
+                  "Internal failure in padding");
+  return ptr;
+}
+
+//! Default implementation to unmap pages from virtual memory
+static void _rpmalloc_unmap_os(void *address, size_t size, size_t offset,
+                               size_t release) {
+  rpmalloc_assert(release || (offset == 0), "Invalid unmap size");
+  rpmalloc_assert(!release || (release >= _memory_page_size),
+                  "Invalid unmap size");
+  rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size");
+  if (release && offset) {
+    offset <<= 3;
+    address = pointer_offset(address, -(int32_t)offset);
+    if ((release >= _memory_span_size) &&
+        (_memory_span_size > _memory_map_granularity)) {
+      // Padding is always one span size
+      release += _memory_span_size;
+    }
+  }
+#if !DISABLE_UNMAP
+#if PLATFORM_WINDOWS
+  if (!VirtualFree(address, release ? 0 : size,
+                   release ? MEM_RELEASE : MEM_DECOMMIT)) {
+    rpmalloc_assert(0, "Failed to unmap virtual memory block");
+  }
+#else
+  if (release) {
+    if (munmap(address, release)) {
+      rpmalloc_assert(0, "Failed to unmap virtual memory block");
+    }
+  } else {
+#if defined(MADV_FREE_REUSABLE)
+    int ret;
+    while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 &&
+           (errno == EAGAIN))
+      errno = 0;
+    if ((ret == -1) && (errno != 0)) {
+#elif defined(MADV_DONTNEED)
+    if (madvise(address, size, MADV_DONTNEED)) {
+#elif defined(MADV_PAGEOUT)
+    if (madvise(address, size, MADV_PAGEOUT)) {
+#elif defined(MADV_FREE)
+    if (madvise(address, size, MADV_FREE)) {
+#else
+    if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
+#endif
+      rpmalloc_assert(0, "Failed to madvise virtual memory block as free");
+    }
+  }
+#endif
+#endif
+  if (release)
+    _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
+}
+
+static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
+                                                         span_t *subspan,
+                                                         size_t span_count);
+
+//! Use global reserved spans to fulfill a memory map request (reserve size must
+//! be checked by caller)
+static span_t *_rpmalloc_global_get_reserved_spans(size_t span_count) {
+  span_t *span = _memory_global_reserve;
+  _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master,
+                                               span, span_count);
+  _memory_global_reserve_count -= span_count;
+  if (_memory_global_reserve_count)
+    _memory_global_reserve =
+        (span_t *)pointer_offset(span, span_count << _memory_span_size_shift);
+  else
+    _memory_global_reserve = 0;
+  return span;
+}
+
+//! Store the given spans as global reserve (must only be called from within new
+//! heap allocation, not thread safe)
+static void _rpmalloc_global_set_reserved_spans(span_t *master, span_t *reserve,
+                                                size_t reserve_span_count) {
+  _memory_global_reserve_master = master;
+  _memory_global_reserve_count = reserve_span_count;
+  _memory_global_reserve = reserve;
+}
+
+////////////
+///
+/// Span linked list management
+///
+//////
+
+//! Add a span to double linked list at the head
+static void _rpmalloc_span_double_link_list_add(span_t **head, span_t *span) {
+  if (*head)
+    (*head)->prev = span;
+  span->next = *head;
+  *head = span;
+}
+
+//! Pop head span from double linked list
+static void _rpmalloc_span_double_link_list_pop_head(span_t **head,
+                                                     span_t *span) {
+  rpmalloc_assert(*head == span, "Linked list corrupted");
+  span = *head;
+  *head = span->next;
+}
+
+//! Remove a span from double linked list
+static void _rpmalloc_span_double_link_list_remove(span_t **head,
+                                                   span_t *span) {
+  rpmalloc_assert(*head, "Linked list corrupted");
+  if (*head == span) {
+    *head = span->next;
+  } else {
+    span_t *next_span = span->next;
+    span_t *prev_span = span->prev;
+    prev_span->next = next_span;
+    if (EXPECTED(next_span != 0))
+      next_span->prev = prev_span;
+  }
+}
+
+////////////
+///
+/// Span control
+///
+//////
+
+static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
+
+static void _rpmalloc_heap_finalize(heap_t *heap);
+
+static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
+                                              span_t *reserve,
+                                              size_t reserve_span_count);
+
+//! Declare the span to be a subspan and store distance from master span and
+//! span count
+static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
+                                                         span_t *subspan,
+                                                         size_t span_count) {
+  rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER),
+                  "Span master pointer and/or flag mismatch");
+  if (subspan != master) {
+    subspan->flags = SPAN_FLAG_SUBSPAN;
+    subspan->offset_from_master =
+        (uint32_t)((uintptr_t)pointer_diff(subspan, master) >>
+                   _memory_span_size_shift);
+    subspan->align_offset = 0;
+  }
+  subspan->span_count = (uint32_t)span_count;
+}
+
+//! Use reserved spans to fulfill a memory map request (reserve size must be
+//! checked by caller)
+static span_t *_rpmalloc_span_map_from_reserve(heap_t *heap,
+                                               size_t span_count) {
+  // Update the heap span reserve
+  span_t *span = heap->span_reserve;
+  heap->span_reserve =
+      (span_t *)pointer_offset(span, span_count * _memory_span_size);
+  heap->spans_reserved -= (uint32_t)span_count;
+
+  _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span,
+                                               span_count);
+  if (span_count <= LARGE_CLASS_COUNT)
+    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
+
+  return span;
+}
+
+//! Get the aligned number of spans to map in based on wanted count, configured
+//! mapping granularity and the page size
+static size_t _rpmalloc_span_align_count(size_t span_count) {
+  size_t request_count = (span_count > _memory_span_map_count)
+                             ? span_count
+                             : _memory_span_map_count;
+  if ((_memory_page_size > _memory_span_size) &&
+      ((request_count * _memory_span_size) % _memory_page_size))
+    request_count +=
+        _memory_span_map_count - (request_count % _memory_span_map_count);
+  return request_count;
+}
+
+//! Setup a newly mapped span
+static void _rpmalloc_span_initialize(span_t *span, size_t total_span_count,
+                                      size_t span_count, size_t align_offset) {
+  span->total_spans = (uint32_t)total_span_count;
+  span->span_count = (uint32_t)span_count;
+  span->align_offset = (uint32_t)align_offset;
+  span->flags = SPAN_FLAG_MASTER;
+  atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
+}
+
+static void _rpmalloc_span_unmap(span_t *span);
+
+//! Map an aligned set of spans, taking configured mapping granularity and the
+//! page size into account
+static span_t *_rpmalloc_span_map_aligned_count(heap_t *heap,
+                                                size_t span_count) {
+  // If we already have some, but not enough, reserved spans, release those to
+  // heap cache and map a new full set of spans. Otherwise we would waste memory
+  // if page size > span size (huge pages)
+  size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
+  size_t align_offset = 0;
+  span_t *span = (span_t *)_rpmalloc_mmap(
+      aligned_span_count * _memory_span_size, &align_offset);
+  if (!span)
+    return 0;
+  _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
+  _rpmalloc_stat_inc(&_master_spans);
+  if (span_count <= LARGE_CLASS_COUNT)
+    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
+  if (aligned_span_count > span_count) {
+    span_t *reserved_spans =
+        (span_t *)pointer_offset(span, span_count * _memory_span_size);
+    size_t reserved_count = aligned_span_count - span_count;
+    if (heap->spans_reserved) {
+      _rpmalloc_span_mark_as_subspan_unless_master(
+          heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
+      _rpmalloc_heap_cache_insert(heap, heap->span_reserve);
+    }
+    if (reserved_count > _memory_heap_reserve_count) {
+      // If huge pages or eager spam map count, the global reserve spin lock is
+      // held by caller, _rpmalloc_span_map
+      rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1,
+                      "Global spin lock not held as expected");
+      size_t remain_count = reserved_count - _memory_heap_reserve_count;
+      reserved_count = _memory_heap_reserve_count;
+      span_t *remain_span = (span_t *)pointer_offset(
+          reserved_spans, reserved_count * _memory_span_size);
+      if (_memory_global_reserve) {
+        _rpmalloc_span_mark_as_subspan_unless_master(
+            _memory_global_reserve_master, _memory_global_reserve,
+            _memory_global_reserve_count);
+        _rpmalloc_span_unmap(_memory_global_reserve);
+      }
+      _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count);
+    }
+    _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans,
+                                      reserved_count);
+  }
+  return span;
+}
+
+//! Map in memory pages for the given number of spans (or use previously
+//! reserved pages)
+static span_t *_rpmalloc_span_map(heap_t *heap, size_t span_count) {
+  if (span_count <= heap->spans_reserved)
+    return _rpmalloc_span_map_from_reserve(heap, span_count);
+  span_t *span = 0;
+  int use_global_reserve =
+      (_memory_page_size > _memory_span_size) ||
+      (_memory_span_map_count > _memory_heap_reserve_count);
+  if (use_global_reserve) {
+    // If huge pages, make sure only one thread maps more memory to avoid bloat
+    while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
+      _rpmalloc_spin();
+    if (_memory_global_reserve_count >= span_count) {
+      size_t reserve_count =
+          (!heap->spans_reserved ? _memory_heap_reserve_count : span_count);
+      if (_memory_global_reserve_count < reserve_count)
+        reserve_count = _memory_global_reserve_count;
+      span = _rpmalloc_global_get_reserved_spans(reserve_count);
+      if (span) {
+        if (reserve_count > span_count) {
+          span_t *reserved_span = (span_t *)pointer_offset(
+              span, span_count << _memory_span_size_shift);
+          _rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master,
+                                            reserved_span,
+                                            reserve_count - span_count);
+        }
+        // Already marked as subspan in _rpmalloc_global_get_reserved_spans
+        span->span_count = (uint32_t)span_count;
+      }
+    }
+  }
+  if (!span)
+    span = _rpmalloc_span_map_aligned_count(heap, span_count);
+  if (use_global_reserve)
+    atomic_store32_release(&_memory_global_lock, 0);
+  return span;
+}
+
+//! Unmap memory pages for the given number of spans (or mark as unused if no
+//! partial unmappings)
+static void _rpmalloc_span_unmap(span_t *span) {
+  rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
+                      (span->flags & SPAN_FLAG_SUBSPAN),
+                  "Span flag corrupted");
+  rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
+                      !(span->flags & SPAN_FLAG_SUBSPAN),
+                  "Span flag corrupted");
+
+  int is_master = !!(span->flags & SPAN_FLAG_MASTER);
+  span_t *master =
+      is_master ? span
+                : ((span_t *)pointer_offset(
+                      span, -(intptr_t)((uintptr_t)span->offset_from_master *
+                                        _memory_span_size)));
+  rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN),
+                  "Span flag corrupted");
+  rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
+
+  size_t span_count = span->span_count;
+  if (!is_master) {
+    // Directly unmap subspans (unless huge pages, in which case we defer and
+    // unmap entire page range with master)
+    rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted");
+    if (_memory_span_size >= _memory_page_size)
+      _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
+  } else {
+    // Special double flag to denote an unmapped master
+    // It must be kept in memory since span header must be used
+    span->flags |=
+        SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER;
+    _rpmalloc_stat_add(&_unmapped_master_spans, 1);
+  }
+
+  if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
+    // Everything unmapped, unmap the master span with release flag to unmap the
+    // entire range of the super span
+    rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) &&
+                        !!(master->flags & SPAN_FLAG_SUBSPAN),
+                    "Span flag corrupted");
+    size_t unmap_count = master->span_count;
+    if (_memory_span_size < _memory_page_size)
+      unmap_count = master->total_spans;
+    _rpmalloc_stat_sub(&_master_spans, 1);
+    _rpmalloc_stat_sub(&_unmapped_master_spans, 1);
+    _rpmalloc_unmap(master, unmap_count * _memory_span_size,
+                    master->align_offset,
+                    (size_t)master->total_spans * _memory_span_size);
+  }
+}
+
+//! Move the span (used for small or medium allocations) to the heap thread
+//! cache
+static void _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span) {
+  rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted");
+  rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT,
+                  "Invalid span size class");
+  rpmalloc_assert(span->span_count == 1, "Invalid span count");
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+  atomic_decr32(&heap->span_use[0].current);
+#endif
+  _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
+  if (!heap->finalize) {
+    _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
+    _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
+    if (heap->size_class[span->size_class].cache)
+      _rpmalloc_heap_cache_insert(heap,
+                                  heap->size_class[span->size_class].cache);
+    heap->size_class[span->size_class].cache = span;
+  } else {
+    _rpmalloc_span_unmap(span);
+  }
+}
+
+//! Initialize a (partial) free list up to next system memory page, while
+//! reserving the first block as allocated, returning number of blocks in list
+static uint32_t free_list_partial_init(void **list, void **first_block,
+                                       void *page_start, void *block_start,
+                                       uint32_t block_count,
+                                       uint32_t block_size) {
+  rpmalloc_assert(block_count, "Internal failure");
+  *first_block = block_start;
+  if (block_count > 1) {
+    void *free_block = pointer_offset(block_start, block_size);
+    void *block_end =
+        pointer_offset(block_start, (size_t)block_size * block_count);
+    // If block size is less than half a memory page, bound init to next memory
+    // page boundary
+    if (block_size < (_memory_page_size >> 1)) {
+      void *page_end = pointer_offset(page_start, _memory_page_size);
+      if (page_end < block_end)
+        block_end = page_end;
+    }
+    *list = free_block;
+    block_count = 2;
+    void *next_block = pointer_offset(free_block, block_size);
+    while (next_block < block_end) {
+      *((void **)free_block) = next_block;
+      free_block = next_block;
+      ++block_count;
+      next_block = pointer_offset(next_block, block_size);
+    }
+    *((void **)free_block) = 0;
+  } else {
+    *list = 0;
+  }
+  return block_count;
+}
+
+//! Initialize an unused span (from cache or mapped) to be new active span,
+//! putting the initial free list in heap class free list
+static void *_rpmalloc_span_initialize_new(heap_t *heap,
+                                           heap_size_class_t *heap_size_class,
+                                           span_t *span, uint32_t class_idx) {
+  rpmalloc_assert(span->span_count == 1, "Internal failure");
+  size_class_t *size_class = _memory_size_class + class_idx;
+  span->size_class = class_idx;
+  span->heap = heap;
+  span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
+  span->block_size = size_class->block_size;
+  span->block_count = size_class->block_count;
+  span->free_list = 0;
+  span->list_size = 0;
+  atomic_store_ptr_release(&span->free_list_deferred, 0);
+
+  // Setup free list. Only initialize one system page worth of free blocks in
+  // list
+  void *block;
+  span->free_list_limit =
+      free_list_partial_init(&heap_size_class->free_list, &block, span,
+                             pointer_offset(span, SPAN_HEADER_SIZE),
+                             size_class->block_count, size_class->block_size);
+  // Link span as partial if there remains blocks to be initialized as free
+  // list, or full if fully initialized
+  if (span->free_list_limit < span->block_count) {
+    _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span);
+    span->used_count = span->free_list_limit;
+  } else {
+#if RPMALLOC_FIRST_CLASS_HEAPS
+    _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
+#endif
+    ++heap->full_span_count;
+    span->used_count = span->block_count;
+  }
+  return block;
+}
+
+static void _rpmalloc_span_extract_free_list_deferred(span_t *span) {
+  // We need acquire semantics on the CAS operation since we are interested in
+  // the list size Refer to _rpmalloc_deallocate_defer_small_or_medium for
+  // further comments on this dependency
+  do {
+    span->free_list =
+        atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER);
+  } while (span->free_list == INVALID_POINTER);
+  span->used_count -= span->list_size;
+  span->list_size = 0;
+  atomic_store_ptr_release(&span->free_list_deferred, 0);
+}
+
+static int _rpmalloc_span_is_fully_utilized(span_t *span) {
+  rpmalloc_assert(span->free_list_limit <= span->block_count,
+                  "Span free list corrupted");
+  return !span->free_list && (span->free_list_limit >= span->block_count);
+}
+
+static int _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span,
+                                   span_t **list_head) {
+  void *free_list = heap->size_class[iclass].free_list;
+  span_t *class_span = (span_t *)((uintptr_t)free_list & _memory_span_mask);
+  if (span == class_span) {
+    // Adopt the heap class free list back into the span free list
+    void *block = span->free_list;
+    void *last_block = 0;
+    while (block) {
+      last_block = block;
+      block = *((void **)block);
+    }
+    uint32_t free_count = 0;
+    block = free_list;
+    while (block) {
+      ++free_count;
+      block = *((void **)block);
+    }
+    if (last_block) {
+      *((void **)last_block) = free_list;
+    } else {
+      span->free_list = free_list;
+    }
+    heap->size_class[iclass].free_list = 0;
+    span->used_count -= free_count;
+  }
+  // If this assert triggers you have memory leaks
+  rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected");
+  if (span->list_size == span->used_count) {
+    _rpmalloc_stat_dec(&heap->span_use[0].current);
+    _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
+    // This function only used for spans in double linked lists
+    if (list_head)
+      _rpmalloc_span_double_link_list_remove(list_head, span);
+    _rpmalloc_span_unmap(span);
+    return 1;
+  }
+  return 0;
+}
+
+////////////
+///
+/// Global cache
+///
+//////
+
+#if ENABLE_GLOBAL_CACHE
+
+//! Finalize a global cache
+static void _rpmalloc_global_cache_finalize(global_cache_t *cache) {
+  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+    _rpmalloc_spin();
+
+  for (size_t ispan = 0; ispan < cache->count; ++ispan)
+    _rpmalloc_span_unmap(cache->span[ispan]);
+  cache->count = 0;
+
+  while (cache->overflow) {
+    span_t *span = cache->overflow;
+    cache->overflow = span->next;
+    _rpmalloc_span_unmap(span);
+  }
+
+  atomic_store32_release(&cache->lock, 0);
+}
+
+static void _rpmalloc_global_cache_insert_spans(span_t **span,
+                                                size_t span_count,
+                                                size_t count) {
+  const size_t cache_limit =
+      (span_count == 1) ? GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE
+                        : GLOBAL_CACHE_MULTIPLIER *
+                              (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
+
+  global_cache_t *cache = &_memory_span_cache[span_count - 1];
+
+  size_t insert_count = count;
+  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+    _rpmalloc_spin();
+
+#if ENABLE_STATISTICS
+  cache->insert_count += count;
+#endif
+  if ((cache->count + insert_count) > cache_limit)
+    insert_count = cache_limit - cache->count;
+
+  memcpy(cache->span + cache->count, span, sizeof(span_t *) * insert_count);
+  cache->count += (uint32_t)insert_count;
+
+#if ENABLE_UNLIMITED_CACHE
+  while (insert_count < count) {
+#else
+  // Enable unlimited cache if huge pages, or we will leak since it is unlikely
+  // that an entire huge page will be unmapped, and we're unable to partially
+  // decommit a huge page
+  while ((_memory_page_size > _memory_span_size) && (insert_count < count)) {
+#endif
+    span_t *current_span = span[insert_count++];
+    current_span->next = cache->overflow;
+    cache->overflow = current_span;
+  }
+  atomic_store32_release(&cache->lock, 0);
+
+  span_t *keep = 0;
+  for (size_t ispan = insert_count; ispan < count; ++ispan) {
+    span_t *current_span = span[ispan];
+    // Keep master spans that has remaining subspans to avoid dangling them
+    if ((current_span->flags & SPAN_FLAG_MASTER) &&
+        (atomic_load32(&current_span->remaining_spans) >
+         (int32_t)current_span->span_count)) {
+      current_span->next = keep;
+      keep = current_span;
+    } else {
+      _rpmalloc_span_unmap(current_span);
+    }
+  }
+
+  if (keep) {
+    while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+      _rpmalloc_spin();
+
+    size_t islot = 0;
+    while (keep) {
+      for (; islot < cache->count; ++islot) {
+        span_t *current_span = cache->span[islot];
+        if (!(current_span->flags & SPAN_FLAG_MASTER) ||
+            ((current_span->flags & SPAN_FLAG_MASTER) &&
+             (atomic_load32(&current_span->remaining_spans) <=
+              (int32_t)current_span->span_count))) {
+          _rpmalloc_span_unmap(current_span);
+          cache->span[islot] = keep;
+          break;
+        }
+      }
+      if (islot == cache->count)
+        break;
+      keep = keep->next;
+    }
+
+    if (keep) {
+      span_t *tail = keep;
+      while (tail->next)
+        tail = tail->next;
+      tail->next = cache->overflow;
+      cache->overflow = keep;
+    }
+
+    atomic_store32_release(&cache->lock, 0);
+  }
+}
+
+static size_t _rpmalloc_global_cache_extract_spans(span_t **span,
+                                                   size_t span_count,
+                                                   size_t count) {
+  global_cache_t *cache = &_memory_span_cache[span_count - 1];
+
+  size_t extract_count = 0;
+  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+    _rpmalloc_spin();
+
+#if ENABLE_STATISTICS
+  cache->extract_count += count;
+#endif
+  size_t want = count - extract_count;
+  if (want > cache->count)
+    want = cache->count;
+
+  memcpy(span + extract_count, cache->span + (cache->count - want),
+         sizeof(span_t *) * want);
+  cache->count -= (uint32_t)want;
+  extract_count += want;
+
+  while ((extract_count < count) && cache->overflow) {
+    span_t *current_span = cache->overflow;
+    span[extract_count++] = current_span;
+    cache->overflow = current_span->next;
+  }
+
+#if ENABLE_ASSERTS
+  for (size_t ispan = 0; ispan < extract_count; ++ispan) {
+    rpmalloc_assert(span[ispan]->span_count == span_count,
+                    "Global cache span count mismatch");
+  }
+#endif
+
+  atomic_store32_release(&cache->lock, 0);
+
+  return extract_count;
+}
+
+#endif
+
+////////////
+///
+/// Heap control
+///
+//////
+
+static void _rpmalloc_deallocate_huge(span_t *);
+
+//! Store the given spans as reserve in the given heap
+static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
+                                              span_t *reserve,
+                                              size_t reserve_span_count) {
+  heap->span_reserve_master = master;
+  heap->span_reserve = reserve;
+  heap->spans_reserved = (uint32_t)reserve_span_count;
+}
+
+//! Adopt the deferred span cache list, optionally extracting the first single
+//! span for immediate re-use
+static void _rpmalloc_heap_cache_adopt_deferred(heap_t *heap,
+                                                span_t **single_span) {
+  span_t *span = (span_t *)((void *)atomic_exchange_ptr_acquire(
+      &heap->span_free_deferred, 0));
+  while (span) {
+    span_t *next_span = (span_t *)span->free_list;
+    rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted");
+    if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
+      rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
+      --heap->full_span_count;
+      _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred);
+#if RPMALLOC_FIRST_CLASS_HEAPS
+      _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
+                                             span);
+#endif
+      _rpmalloc_stat_dec(&heap->span_use[0].current);
+      _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
+      if (single_span && !*single_span)
+        *single_span = span;
+      else
+        _rpmalloc_heap_cache_insert(heap, span);
+    } else {
+      if (span->size_class == SIZE_CLASS_HUGE) {
+        _rpmalloc_deallocate_huge(span);
+      } else {
+        rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE,
+                        "Span size class invalid");
+        rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
+        --heap->full_span_count;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+        _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
+#endif
+        uint32_t idx = span->span_count - 1;
+        _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred);
+        _rpmalloc_stat_dec(&heap->span_use[idx].current);
+        if (!idx && single_span && !*single_span)
+          *single_span = span;
+        else
+          _rpmalloc_heap_cache_insert(heap, span);
+      }
+    }
+    span = next_span;
+  }
+}
+
+static void _rpmalloc_heap_unmap(heap_t *heap) {
+  if (!heap->master_heap) {
+    if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) {
+      span_t *span = (span_t *)((uintptr_t)heap & _memory_span_mask);
+      _rpmalloc_span_unmap(span);
+    }
+  } else {
+    if (atomic_decr32(&heap->master_heap->child_count) == 0) {
+      _rpmalloc_heap_unmap(heap->master_heap);
+    }
+  }
+}
+
+static void _rpmalloc_heap_global_finalize(heap_t *heap) {
+  if (heap->finalize++ > 1) {
+    --heap->finalize;
+    return;
+  }
+
+  _rpmalloc_heap_finalize(heap);
+
+#if ENABLE_THREAD_CACHE
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    span_cache_t *span_cache;
+    if (!iclass)
+      span_cache = &heap->span_cache;
+    else
+      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
+    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+      _rpmalloc_span_unmap(span_cache->span[ispan]);
+    span_cache->count = 0;
+  }
+#endif
+
+  if (heap->full_span_count) {
+    --heap->finalize;
+    return;
+  }
+
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    if (heap->size_class[iclass].free_list ||
+        heap->size_class[iclass].partial_span) {
+      --heap->finalize;
+      return;
+    }
+  }
+  // Heap is now completely free, unmap and remove from heap list
+  size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
+  heap_t *list_heap = _memory_heaps[list_idx];
+  if (list_heap == heap) {
+    _memory_heaps[list_idx] = heap->next_heap;
+  } else {
+    while (list_heap->next_heap != heap)
+      list_heap = list_heap->next_heap;
+    list_heap->next_heap = heap->next_heap;
+  }
+
+  _rpmalloc_heap_unmap(heap);
+}
+
+//! Insert a single span into thread heap cache, releasing to global cache if
+//! overflow
+static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
+  if (UNEXPECTED(heap->finalize != 0)) {
+    _rpmalloc_span_unmap(span);
+    _rpmalloc_heap_global_finalize(heap);
+    return;
+  }
+#if ENABLE_THREAD_CACHE
+  size_t span_count = span->span_count;
+  _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache);
+  if (span_count == 1) {
+    span_cache_t *span_cache = &heap->span_cache;
+    span_cache->span[span_cache->count++] = span;
+    if (span_cache->count == MAX_THREAD_SPAN_CACHE) {
+      const size_t remain_count =
+          MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER;
+#if ENABLE_GLOBAL_CACHE
+      _rpmalloc_stat_add64(&heap->thread_to_global,
+                           THREAD_SPAN_CACHE_TRANSFER * _memory_span_size);
+      _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
+                         THREAD_SPAN_CACHE_TRANSFER);
+      _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
+                                          span_count,
+                                          THREAD_SPAN_CACHE_TRANSFER);
+#else
+      for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan)
+        _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
+#endif
+      span_cache->count = remain_count;
+    }
+  } else {
+    size_t cache_idx = span_count - 2;
+    span_large_cache_t *span_cache = heap->span_large_cache + cache_idx;
+    span_cache->span[span_cache->count++] = span;
+    const size_t cache_limit =
+        (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
+    if (span_cache->count == cache_limit) {
+      const size_t transfer_limit = 2 + (cache_limit >> 2);
+      const size_t transfer_count =
+          (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit
+               ? THREAD_SPAN_LARGE_CACHE_TRANSFER
+               : transfer_limit);
+      const size_t remain_count = cache_limit - transfer_count;
+#if ENABLE_GLOBAL_CACHE
+      _rpmalloc_stat_add64(&heap->thread_to_global,
+                           transfer_count * span_count * _memory_span_size);
+      _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
+                         transfer_count);
+      _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
+                                          span_count, transfer_count);
+#else
+      for (size_t ispan = 0; ispan < transfer_count; ++ispan)
+        _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
+#endif
+      span_cache->count = remain_count;
+    }
+  }
+#else
+  (void)sizeof(heap);
+  _rpmalloc_span_unmap(span);
+#endif
+}
+
+//! Extract the given number of spans from the different cache levels
+static span_t *_rpmalloc_heap_thread_cache_extract(heap_t *heap,
+                                                   size_t span_count) {
+  span_t *span = 0;
+#if ENABLE_THREAD_CACHE
+  span_cache_t *span_cache;
+  if (span_count == 1)
+    span_cache = &heap->span_cache;
+  else
+    span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
+  if (span_cache->count) {
+    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache);
+    return span_cache->span[--span_cache->count];
+  }
+#endif
+  return span;
+}
+
+static span_t *_rpmalloc_heap_thread_cache_deferred_extract(heap_t *heap,
+                                                            size_t span_count) {
+  span_t *span = 0;
+  if (span_count == 1) {
+    _rpmalloc_heap_cache_adopt_deferred(heap, &span);
+  } else {
+    _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+    span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
+  }
+  return span;
+}
+
+static span_t *_rpmalloc_heap_reserved_extract(heap_t *heap,
+                                               size_t span_count) {
+  if (heap->spans_reserved >= span_count)
+    return _rpmalloc_span_map(heap, span_count);
+  return 0;
+}
+
+//! Extract a span from the global cache
+static span_t *_rpmalloc_heap_global_cache_extract(heap_t *heap,
+                                                   size_t span_count) {
+#if ENABLE_GLOBAL_CACHE
+#if ENABLE_THREAD_CACHE
+  span_cache_t *span_cache;
+  size_t wanted_count;
+  if (span_count == 1) {
+    span_cache = &heap->span_cache;
+    wanted_count = THREAD_SPAN_CACHE_TRANSFER;
+  } else {
+    span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
+    wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER;
+  }
+  span_cache->count = _rpmalloc_global_cache_extract_spans(
+      span_cache->span, span_count, wanted_count);
+  if (span_cache->count) {
+    _rpmalloc_stat_add64(&heap->global_to_thread,
+                         span_count * span_cache->count * _memory_span_size);
+    _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
+                       span_cache->count);
+    return span_cache->span[--span_cache->count];
+  }
+#else
+  span_t *span = 0;
+  size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1);
+  if (count) {
+    _rpmalloc_stat_add64(&heap->global_to_thread,
+                         span_count * count * _memory_span_size);
+    _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
+                       count);
+    return span;
+  }
+#endif
+#endif
+  (void)sizeof(heap);
+  (void)sizeof(span_count);
+  return 0;
+}
+
+static void _rpmalloc_inc_span_statistics(heap_t *heap, size_t span_count,
+                                          uint32_t class_idx) {
+  (void)sizeof(heap);
+  (void)sizeof(span_count);
+  (void)sizeof(class_idx);
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+  uint32_t idx = (uint32_t)span_count - 1;
+  uint32_t current_count =
+      (uint32_t)atomic_incr32(&heap->span_use[idx].current);
+  if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
+    atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
+  _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1,
+                          heap->size_class_use[class_idx].spans_peak);
+#endif
+}
+
+//! Get a span from one of the cache levels (thread cache, reserved, global
+//! cache) or fallback to mapping more memory
+static span_t *
+_rpmalloc_heap_extract_new_span(heap_t *heap,
+                                heap_size_class_t *heap_size_class,
+                                size_t span_count, uint32_t class_idx) {
+  span_t *span;
+#if ENABLE_THREAD_CACHE
+  if (heap_size_class && heap_size_class->cache) {
+    span = heap_size_class->cache;
+    heap_size_class->cache =
+        (heap->span_cache.count
+             ? heap->span_cache.span[--heap->span_cache.count]
+             : 0);
+    _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+    return span;
+  }
+#endif
+  (void)sizeof(class_idx);
+  // Allow 50% overhead to increase cache hits
+  size_t base_span_count = span_count;
+  size_t limit_span_count =
+      (span_count > 2) ? (span_count + (span_count >> 1)) : span_count;
+  if (limit_span_count > LARGE_CLASS_COUNT)
+    limit_span_count = LARGE_CLASS_COUNT;
+  do {
+    span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
+    if (EXPECTED(span != 0)) {
+      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
+      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+      return span;
+    }
+    span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count);
+    if (EXPECTED(span != 0)) {
+      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
+      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+      return span;
+    }
+    span = _rpmalloc_heap_global_cache_extract(heap, span_count);
+    if (EXPECTED(span != 0)) {
+      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
+      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+      return span;
+    }
+    span = _rpmalloc_heap_reserved_extract(heap, span_count);
+    if (EXPECTED(span != 0)) {
+      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
+      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+      return span;
+    }
+    ++span_count;
+  } while (span_count <= limit_span_count);
+  // Final fallback, map in more virtual memory
+  span = _rpmalloc_span_map(heap, base_span_count);
+  _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx);
+  _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
+  return span;
+}
+
+static void _rpmalloc_heap_initialize(heap_t *heap) {
+  _rpmalloc_memset_const(heap, 0, sizeof(heap_t));
+  // Get a new heap ID
+  heap->id = 1 + atomic_incr32(&_memory_heap_id);
+
+  // Link in heap in heap ID map
+  size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
+  heap->next_heap = _memory_heaps[list_idx];
+  _memory_heaps[list_idx] = heap;
+}
+
+static void _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
+  heap->owner_thread = (uintptr_t)-1;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  heap_t **heap_list =
+      (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
+#else
+  (void)sizeof(first_class);
+  heap_t **heap_list = &_memory_orphan_heaps;
+#endif
+  heap->next_orphan = *heap_list;
+  *heap_list = heap;
+}
+
+//! Allocate a new heap from newly mapped memory pages
+static heap_t *_rpmalloc_heap_allocate_new(void) {
+  // Map in pages for a 16 heaps. If page size is greater than required size for
+  // this, map a page and use first part for heaps and remaining part for spans
+  // for allocations. Adds a lot of complexity, but saves a lot of memory on
+  // systems where page size > 64 spans (4MiB)
+  size_t heap_size = sizeof(heap_t);
+  size_t aligned_heap_size = 16 * ((heap_size + 15) / 16);
+  size_t request_heap_count = 16;
+  size_t heap_span_count = ((aligned_heap_size * request_heap_count) +
+                            sizeof(span_t) + _memory_span_size - 1) /
+                           _memory_span_size;
+  size_t block_size = _memory_span_size * heap_span_count;
+  size_t span_count = heap_span_count;
+  span_t *span = 0;
+  // If there are global reserved spans, use these first
+  if (_memory_global_reserve_count >= heap_span_count) {
+    span = _rpmalloc_global_get_reserved_spans(heap_span_count);
+  }
+  if (!span) {
+    if (_memory_page_size > block_size) {
+      span_count = _memory_page_size / _memory_span_size;
+      block_size = _memory_page_size;
+      // If using huge pages, make sure to grab enough heaps to avoid
+      // reallocating a huge page just to serve new heaps
+      size_t possible_heap_count =
+          (block_size - sizeof(span_t)) / aligned_heap_size;
+      if (possible_heap_count >= (request_heap_count * 16))
+        request_heap_count *= 16;
+      else if (possible_heap_count < request_heap_count)
+        request_heap_count = possible_heap_count;
+      heap_span_count = ((aligned_heap_size * request_heap_count) +
+                         sizeof(span_t) + _memory_span_size - 1) /
+                        _memory_span_size;
+    }
+
+    size_t align_offset = 0;
+    span = (span_t *)_rpmalloc_mmap(block_size, &align_offset);
+    if (!span)
+      return 0;
+
+    // Master span will contain the heaps
+    _rpmalloc_stat_inc(&_master_spans);
+    _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset);
+  }
+
+  size_t remain_size = _memory_span_size - sizeof(span_t);
+  heap_t *heap = (heap_t *)pointer_offset(span, sizeof(span_t));
+  _rpmalloc_heap_initialize(heap);
+
+  // Put extra heaps as orphans
+  size_t num_heaps = remain_size / aligned_heap_size;
+  if (num_heaps < request_heap_count)
+    num_heaps = request_heap_count;
+  atomic_store32(&heap->child_count, (int32_t)num_heaps - 1);
+  heap_t *extra_heap = (heap_t *)pointer_offset(heap, aligned_heap_size);
+  while (num_heaps > 1) {
+    _rpmalloc_heap_initialize(extra_heap);
+    extra_heap->master_heap = heap;
+    _rpmalloc_heap_orphan(extra_heap, 1);
+    extra_heap = (heap_t *)pointer_offset(extra_heap, aligned_heap_size);
+    --num_heaps;
+  }
+
+  if (span_count > heap_span_count) {
+    // Cap reserved spans
+    size_t remain_count = span_count - heap_span_count;
+    size_t reserve_count =
+        (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count
+                                                   : remain_count);
+    span_t *remain_span =
+        (span_t *)pointer_offset(span, heap_span_count * _memory_span_size);
+    _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count);
+
+    if (remain_count > reserve_count) {
+      // Set to global reserved spans
+      remain_span = (span_t *)pointer_offset(remain_span,
+                                             reserve_count * _memory_span_size);
+      reserve_count = remain_count - reserve_count;
+      _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count);
+    }
+  }
+
+  return heap;
+}
+
+static heap_t *_rpmalloc_heap_extract_orphan(heap_t **heap_list) {
+  heap_t *heap = *heap_list;
+  *heap_list = (heap ? heap->next_orphan : 0);
+  return heap;
+}
+
+//! Allocate a new heap, potentially reusing a previously orphaned heap
+static heap_t *_rpmalloc_heap_allocate(int first_class) {
+  heap_t *heap = 0;
+  while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
+    _rpmalloc_spin();
+  if (first_class == 0)
+    heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps);
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  if (!heap)
+    heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
+#endif
+  if (!heap)
+    heap = _rpmalloc_heap_allocate_new();
+  atomic_store32_release(&_memory_global_lock, 0);
+  if (heap)
+    _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+  return heap;
+}
+
+static void _rpmalloc_heap_release(void *heapptr, int first_class,
+                                   int release_cache) {
+  heap_t *heap = (heap_t *)heapptr;
+  if (!heap)
+    return;
+  // Release thread cache spans back to global cache
+  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+  if (release_cache || heap->finalize) {
+#if ENABLE_THREAD_CACHE
+    for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+      span_cache_t *span_cache;
+      if (!iclass)
+        span_cache = &heap->span_cache;
+      else
+        span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
+      if (!span_cache->count)
+        continue;
+#if ENABLE_GLOBAL_CACHE
+      if (heap->finalize) {
+        for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+          _rpmalloc_span_unmap(span_cache->span[ispan]);
+      } else {
+        _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count *
+                                                          (iclass + 1) *
+                                                          _memory_span_size);
+        _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
+                           span_cache->count);
+        _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
+                                            span_cache->count);
+      }
+#else
+      for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+        _rpmalloc_span_unmap(span_cache->span[ispan]);
+#endif
+      span_cache->count = 0;
+    }
+#endif
+  }
+
+  if (get_thread_heap_raw() == heap)
+    set_thread_heap(0);
+
+#if ENABLE_STATISTICS
+  atomic_decr32(&_memory_active_heaps);
+  rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0,
+                  "Still active heaps during finalization");
+#endif
+
+  // If we are forcibly terminating with _exit the state of the
+  // lock atomic is unknown and it's best to just go ahead and exit
+  if (get_thread_id() != _rpmalloc_main_thread_id) {
+    while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
+      _rpmalloc_spin();
+  }
+  _rpmalloc_heap_orphan(heap, first_class);
+  atomic_store32_release(&_memory_global_lock, 0);
+}
+
+static void _rpmalloc_heap_release_raw(void *heapptr, int release_cache) {
+  _rpmalloc_heap_release(heapptr, 0, release_cache);
+}
+
+static void _rpmalloc_heap_release_raw_fc(void *heapptr) {
+  _rpmalloc_heap_release_raw(heapptr, 1);
+}
+
+static void _rpmalloc_heap_finalize(heap_t *heap) {
+  if (heap->spans_reserved) {
+    span_t *span = _rpmalloc_span_map(heap, heap->spans_reserved);
+    _rpmalloc_span_unmap(span);
+    heap->spans_reserved = 0;
+  }
+
+  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    if (heap->size_class[iclass].cache)
+      _rpmalloc_span_unmap(heap->size_class[iclass].cache);
+    heap->size_class[iclass].cache = 0;
+    span_t *span = heap->size_class[iclass].partial_span;
+    while (span) {
+      span_t *next = span->next;
+      _rpmalloc_span_finalize(heap, iclass, span,
+                              &heap->size_class[iclass].partial_span);
+      span = next;
+    }
+    // If class still has a free list it must be a full span
+    if (heap->size_class[iclass].free_list) {
+      span_t *class_span =
+          (span_t *)((uintptr_t)heap->size_class[iclass].free_list &
+                     _memory_span_mask);
+      span_t **list = 0;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+      list = &heap->full_span[iclass];
+#endif
+      --heap->full_span_count;
+      if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
+        if (list)
+          _rpmalloc_span_double_link_list_remove(list, class_span);
+        _rpmalloc_span_double_link_list_add(
+            &heap->size_class[iclass].partial_span, class_span);
+      }
+    }
+  }
+
+#if ENABLE_THREAD_CACHE
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    span_cache_t *span_cache;
+    if (!iclass)
+      span_cache = &heap->span_cache;
+    else
+      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
+    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+      _rpmalloc_span_unmap(span_cache->span[ispan]);
+    span_cache->count = 0;
+  }
+#endif
+  rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred),
+                  "Heaps still active during finalization");
+}
+
+////////////
+///
+/// Allocation entry points
+///
+//////
+
+//! Pop first block from a free list
+static void *free_list_pop(void **list) {
+  void *block = *list;
+  *list = *((void **)block);
+  return block;
+}
+
+//! Allocate a small/medium sized memory block from the given heap
+static void *_rpmalloc_allocate_from_heap_fallback(
+    heap_t *heap, heap_size_class_t *heap_size_class, uint32_t class_idx) {
+  span_t *span = heap_size_class->partial_span;
+  rpmalloc_assume(heap != 0);
+  if (EXPECTED(span != 0)) {
+    rpmalloc_assert(span->block_count ==
+                        _memory_size_class[span->size_class].block_count,
+                    "Span block count corrupted");
+    rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span),
+                    "Internal failure");
+    void *block;
+    if (span->free_list) {
+      // Span local free list is not empty, swap to size class free list
+      block = free_list_pop(&span->free_list);
+      heap_size_class->free_list = span->free_list;
+      span->free_list = 0;
+    } else {
+      // If the span did not fully initialize free list, link up another page
+      // worth of blocks
+      void *block_start = pointer_offset(
+          span, SPAN_HEADER_SIZE +
+                    ((size_t)span->free_list_limit * span->block_size));
+      span->free_list_limit += free_list_partial_init(
+          &heap_size_class->free_list, &block,
+          (void *)((uintptr_t)block_start & ~(_memory_page_size - 1)),
+          block_start, span->block_count - span->free_list_limit,
+          span->block_size);
+    }
+    rpmalloc_assert(span->free_list_limit <= span->block_count,
+                    "Span block count corrupted");
+    span->used_count = span->free_list_limit;
+
+    // Swap in deferred free list if present
+    if (atomic_load_ptr(&span->free_list_deferred))
+      _rpmalloc_span_extract_free_list_deferred(span);
+
+    // If span is still not fully utilized keep it in partial list and early
+    // return block
+    if (!_rpmalloc_span_is_fully_utilized(span))
+      return block;
+
+    // The span is fully utilized, unlink from partial list and add to fully
+    // utilized list
+    _rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span,
+                                             span);
+#if RPMALLOC_FIRST_CLASS_HEAPS
+    _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
+#endif
+    ++heap->full_span_count;
+    return block;
+  }
+
+  // Find a span in one of the cache levels
+  span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx);
+  if (EXPECTED(span != 0)) {
+    // Mark span as owned by this heap and set base data, return first block
+    return _rpmalloc_span_initialize_new(heap, heap_size_class, span,
+                                         class_idx);
+  }
+
+  return 0;
+}
+
+//! Allocate a small sized memory block from the given heap
+static void *_rpmalloc_allocate_small(heap_t *heap, size_t size) {
+  rpmalloc_assert(heap, "No thread heap");
+  // Small sizes have unique size classes
+  const uint32_t class_idx =
+      (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
+  heap_size_class_t *heap_size_class = heap->size_class + class_idx;
+  _rpmalloc_stat_inc_alloc(heap, class_idx);
+  if (EXPECTED(heap_size_class->free_list != 0))
+    return free_list_pop(&heap_size_class->free_list);
+  return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
+                                               class_idx);
+}
+
+//! Allocate a medium sized memory block from the given heap
+static void *_rpmalloc_allocate_medium(heap_t *heap, size_t size) {
+  rpmalloc_assert(heap, "No thread heap");
+  // Calculate the size class index and do a dependent lookup of the final class
+  // index (in case of merged classes)
+  const uint32_t base_idx =
+      (uint32_t)(SMALL_CLASS_COUNT +
+                 ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
+  const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
+  heap_size_class_t *heap_size_class = heap->size_class + class_idx;
+  _rpmalloc_stat_inc_alloc(heap, class_idx);
+  if (EXPECTED(heap_size_class->free_list != 0))
+    return free_list_pop(&heap_size_class->free_list);
+  return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
+                                               class_idx);
+}
+
+//! Allocate a large sized memory block from the given heap
+static void *_rpmalloc_allocate_large(heap_t *heap, size_t size) {
+  rpmalloc_assert(heap, "No thread heap");
+  // Calculate number of needed max sized spans (including header)
+  // Since this function is never called if size > LARGE_SIZE_LIMIT
+  // the span_count is guaranteed to be <= LARGE_CLASS_COUNT
+  size += SPAN_HEADER_SIZE;
+  size_t span_count = size >> _memory_span_size_shift;
+  if (size & (_memory_span_size - 1))
+    ++span_count;
+
+  // Find a span in one of the cache levels
+  span_t *span =
+      _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE);
+  if (!span)
+    return span;
+
+  // Mark span as owned by this heap and set base data
+  rpmalloc_assert(span->span_count >= span_count, "Internal failure");
+  span->size_class = SIZE_CLASS_LARGE;
+  span->heap = heap;
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
+#endif
+  ++heap->full_span_count;
+
+  return pointer_offset(span, SPAN_HEADER_SIZE);
+}
+
+//! Allocate a huge block by mapping memory pages directly
+static void *_rpmalloc_allocate_huge(heap_t *heap, size_t size) {
+  rpmalloc_assert(heap, "No thread heap");
+  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+  size += SPAN_HEADER_SIZE;
+  size_t num_pages = size >> _memory_page_size_shift;
+  if (size & (_memory_page_size - 1))
+    ++num_pages;
+  size_t align_offset = 0;
+  span_t *span =
+      (span_t *)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
+  if (!span)
+    return span;
+
+  // Store page count in span_count
+  span->size_class = SIZE_CLASS_HUGE;
+  span->span_count = (uint32_t)num_pages;
+  span->align_offset = (uint32_t)align_offset;
+  span->heap = heap;
+  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
+#endif
+  ++heap->full_span_count;
+
+  return pointer_offset(span, SPAN_HEADER_SIZE);
+}
+
+//! Allocate a block of the given size
+static void *_rpmalloc_allocate(heap_t *heap, size_t size) {
+  _rpmalloc_stat_add64(&_allocation_counter, 1);
+  if (EXPECTED(size <= SMALL_SIZE_LIMIT))
+    return _rpmalloc_allocate_small(heap, size);
+  else if (size <= _memory_medium_size_limit)
+    return _rpmalloc_allocate_medium(heap, size);
+  else if (size <= LARGE_SIZE_LIMIT)
+    return _rpmalloc_allocate_large(heap, size);
+  return _rpmalloc_allocate_huge(heap, size);
+}
+
+static void *_rpmalloc_aligned_allocate(heap_t *heap, size_t alignment,
+                                        size_t size) {
+  if (alignment <= SMALL_GRANULARITY)
+    return _rpmalloc_allocate(heap, size);
+
+#if ENABLE_VALIDATE_ARGS
+  if ((size + alignment) < size) {
+    errno = EINVAL;
+    return 0;
+  }
+  if (alignment & (alignment - 1)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+
+  if ((alignment <= SPAN_HEADER_SIZE) &&
+      ((size + SPAN_HEADER_SIZE) < _memory_medium_size_limit)) {
+    // If alignment is less or equal to span header size (which is power of
+    // two), and size aligned to span header size multiples is less than size +
+    // alignment, then use natural alignment of blocks to provide alignment
+    size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) &
+                                      ~(uintptr_t)(SPAN_HEADER_SIZE - 1)
+                                : SPAN_HEADER_SIZE;
+    rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE),
+                    "Failed alignment calculation");
+    if (multiple_size <= (size + alignment))
+      return _rpmalloc_allocate(heap, multiple_size);
+  }
+
+  void *ptr = 0;
+  size_t align_mask = alignment - 1;
+  if (alignment <= _memory_page_size) {
+    ptr = _rpmalloc_allocate(heap, size + alignment);
+    if ((uintptr_t)ptr & align_mask) {
+      ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
+      // Mark as having aligned blocks
+      span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
+      span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
+    }
+    return ptr;
+  }
+
+  // Fallback to mapping new pages for this request. Since pointers passed
+  // to rpfree must be able to reach the start of the span by bitmasking of
+  // the address with the span size, the returned aligned pointer from this
+  // function must be with a span size of the start of the mapped area.
+  // In worst case this requires us to loop and map pages until we get a
+  // suitable memory address. It also means we can never align to span size
+  // or greater, since the span header will push alignment more than one
+  // span size away from span start (thus causing pointer mask to give us
+  // an invalid span start on free)
+  if (alignment & align_mask) {
+    errno = EINVAL;
+    return 0;
+  }
+  if (alignment >= _memory_span_size) {
+    errno = EINVAL;
+    return 0;
+  }
+
+  size_t extra_pages = alignment / _memory_page_size;
+
+  // Since each span has a header, we will at least need one extra memory page
+  size_t num_pages = 1 + (size / _memory_page_size);
+  if (size & (_memory_page_size - 1))
+    ++num_pages;
+
+  if (extra_pages > num_pages)
+    num_pages = 1 + extra_pages;
+
+  size_t original_pages = num_pages;
+  size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
+  if (limit_pages < (original_pages * 2))
+    limit_pages = original_pages * 2;
+
+  size_t mapped_size, align_offset;
+  span_t *span;
+
+retry:
+  align_offset = 0;
+  mapped_size = num_pages * _memory_page_size;
+
+  span = (span_t *)_rpmalloc_mmap(mapped_size, &align_offset);
+  if (!span) {
+    errno = ENOMEM;
+    return 0;
+  }
+  ptr = pointer_offset(span, SPAN_HEADER_SIZE);
+
+  if ((uintptr_t)ptr & align_mask)
+    ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
+
+  if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
+      (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
+      (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
+    _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
+    ++num_pages;
+    if (num_pages > limit_pages) {
+      errno = EINVAL;
+      return 0;
+    }
+    goto retry;
+  }
+
+  // Store page count in span_count
+  span->size_class = SIZE_CLASS_HUGE;
+  span->span_count = (uint32_t)num_pages;
+  span->align_offset = (uint32_t)align_offset;
+  span->heap = heap;
+  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
+#endif
+  ++heap->full_span_count;
+
+  _rpmalloc_stat_add64(&_allocation_counter, 1);
+
+  return ptr;
+}
+
+////////////
+///
+/// Deallocation entry points
+///
+//////
+
+//! Deallocate the given small/medium memory block in the current thread local
+//! heap
+static void _rpmalloc_deallocate_direct_small_or_medium(span_t *span,
+                                                        void *block) {
+  heap_t *heap = span->heap;
+  rpmalloc_assert(heap->owner_thread == get_thread_id() ||
+                      !heap->owner_thread || heap->finalize,
+                  "Internal failure");
+  // Add block to free list
+  if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) {
+    span->used_count = span->block_count;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+    _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
+                                           span);
+#endif
+    _rpmalloc_span_double_link_list_add(
+        &heap->size_class[span->size_class].partial_span, span);
+    --heap->full_span_count;
+  }
+  *((void **)block) = span->free_list;
+  --span->used_count;
+  span->free_list = block;
+  if (UNEXPECTED(span->used_count == span->list_size)) {
+    // If there are no used blocks it is guaranteed that no other external
+    // thread is accessing the span
+    if (span->used_count) {
+      // Make sure we have synchronized the deferred list and list size by using
+      // acquire semantics and guarantee that no external thread is accessing
+      // span concurrently
+      void *free_list;
+      do {
+        free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred,
+                                                INVALID_POINTER);
+      } while (free_list == INVALID_POINTER);
+      atomic_store_ptr_release(&span->free_list_deferred, free_list);
+    }
+    _rpmalloc_span_double_link_list_remove(
+        &heap->size_class[span->size_class].partial_span, span);
+    _rpmalloc_span_release_to_cache(heap, span);
+  }
+}
+
+static void _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span) {
+  if (span->size_class != SIZE_CLASS_HUGE)
+    _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred);
+  // This list does not need ABA protection, no mutable side state
+  do {
+    span->free_list = (void *)atomic_load_ptr(&heap->span_free_deferred);
+  } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list));
+}
+
+//! Put the block in the deferred free list of the owning span
+static void _rpmalloc_deallocate_defer_small_or_medium(span_t *span,
+                                                       void *block) {
+  // The memory ordering here is a bit tricky, to avoid having to ABA protect
+  // the deferred free list to avoid desynchronization of list and list size
+  // we need to have acquire semantics on successful CAS of the pointer to
+  // guarantee the list_size variable validity + release semantics on pointer
+  // store
+  void *free_list;
+  do {
+    free_list =
+        atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER);
+  } while (free_list == INVALID_POINTER);
+  *((void **)block) = free_list;
+  uint32_t free_count = ++span->list_size;
+  int all_deferred_free = (free_count == span->block_count);
+  atomic_store_ptr_release(&span->free_list_deferred, block);
+  if (all_deferred_free) {
+    // Span was completely freed by this block. Due to the INVALID_POINTER spin
+    // lock no other thread can reach this state simultaneously on this span.
+    // Safe to move to owner heap deferred cache
+    _rpmalloc_deallocate_defer_free_span(span->heap, span);
+  }
+}
+
+static void _rpmalloc_deallocate_small_or_medium(span_t *span, void *p) {
+  _rpmalloc_stat_inc_free(span->heap, span->size_class);
+  if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
+    // Realign pointer to block start
+    void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
+    uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
+    p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
+  }
+  // Check if block belongs to this heap or if deallocation should be deferred
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  int defer =
+      (span->heap->owner_thread &&
+       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#else
+  int defer =
+      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#endif
+  if (!defer)
+    _rpmalloc_deallocate_direct_small_or_medium(span, p);
+  else
+    _rpmalloc_deallocate_defer_small_or_medium(span, p);
+}
+
+//! Deallocate the given large memory block to the current heap
+static void _rpmalloc_deallocate_large(span_t *span) {
+  rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class");
+  rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
+                      !(span->flags & SPAN_FLAG_SUBSPAN),
+                  "Span flag corrupted");
+  rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
+                      (span->flags & SPAN_FLAG_SUBSPAN),
+                  "Span flag corrupted");
+  // We must always defer (unless finalizing) if from another heap since we
+  // cannot touch the list or counters of another heap
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  int defer =
+      (span->heap->owner_thread &&
+       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#else
+  int defer =
+      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#endif
+  if (defer) {
+    _rpmalloc_deallocate_defer_free_span(span->heap, span);
+    return;
+  }
+  rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
+  --span->heap->full_span_count;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
+#endif
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+  // Decrease counter
+  size_t idx = span->span_count - 1;
+  atomic_decr32(&span->heap->span_use[idx].current);
+#endif
+  heap_t *heap = span->heap;
+  rpmalloc_assert(heap, "No thread heap");
+#if ENABLE_THREAD_CACHE
+  const int set_as_reserved =
+      ((span->span_count > 1) && (heap->span_cache.count == 0) &&
+       !heap->finalize && !heap->spans_reserved);
+#else
+  const int set_as_reserved =
+      ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved);
+#endif
+  if (set_as_reserved) {
+    heap->span_reserve = span;
+    heap->spans_reserved = span->span_count;
+    if (span->flags & SPAN_FLAG_MASTER) {
+      heap->span_reserve_master = span;
+    } else { // SPAN_FLAG_SUBSPAN
+      span_t *master = (span_t *)pointer_offset(
+          span,
+          -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
+      heap->span_reserve_master = master;
+      rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
+      rpmalloc_assert(atomic_load32(&master->remaining_spans) >=
+                          (int32_t)span->span_count,
+                      "Master span count corrupted");
+    }
+    _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
+  } else {
+    // Insert into cache list
+    _rpmalloc_heap_cache_insert(heap, span);
+  }
+}
+
+//! Deallocate the given huge span
+static void _rpmalloc_deallocate_huge(span_t *span) {
+  rpmalloc_assert(span->heap, "No span heap");
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  int defer =
+      (span->heap->owner_thread &&
+       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#else
+  int defer =
+      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#endif
+  if (defer) {
+    _rpmalloc_deallocate_defer_free_span(span->heap, span);
+    return;
+  }
+  rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
+  --span->heap->full_span_count;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
+#endif
+
+  // Oversized allocation, page count is stored in span_count
+  size_t num_pages = span->span_count;
+  _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset,
+                  num_pages * _memory_page_size);
+  _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
+}
+
+//! Deallocate the given block
+static void _rpmalloc_deallocate(void *p) {
+  _rpmalloc_stat_add64(&_deallocation_counter, 1);
+  // Grab the span (always at start of span, using span alignment)
+  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
+  if (UNEXPECTED(!span))
+    return;
+  if (EXPECTED(span->size_class < SIZE_CLASS_COUNT))
+    _rpmalloc_deallocate_small_or_medium(span, p);
+  else if (span->size_class == SIZE_CLASS_LARGE)
+    _rpmalloc_deallocate_large(span);
+  else
+    _rpmalloc_deallocate_huge(span);
+}
+
+////////////
+///
+/// Reallocation entry points
+///
+//////
+
+static size_t _rpmalloc_usable_size(void *p);
+
+//! Reallocate the given block to the given size
+static void *_rpmalloc_reallocate(heap_t *heap, void *p, size_t size,
+                                  size_t oldsize, unsigned int flags) {
+  if (p) {
+    // Grab the span using guaranteed span alignment
+    span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
+    if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
+      // Small/medium sized block
+      rpmalloc_assert(span->span_count == 1, "Span counter corrupted");
+      void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
+      uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
+      uint32_t block_idx = block_offset / span->block_size;
+      void *block =
+          pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
+      if (!oldsize)
+        oldsize =
+            (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
+      if ((size_t)span->block_size >= size) {
+        // Still fits in block, never mind trying to save memory, but preserve
+        // data if alignment changed
+        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
+          memmove(block, p, oldsize);
+        return block;
+      }
+    } else if (span->size_class == SIZE_CLASS_LARGE) {
+      // Large block
+      size_t total_size = size + SPAN_HEADER_SIZE;
+      size_t num_spans = total_size >> _memory_span_size_shift;
+      if (total_size & (_memory_span_mask - 1))
+        ++num_spans;
+      size_t current_spans = span->span_count;
+      void *block = pointer_offset(span, SPAN_HEADER_SIZE);
+      if (!oldsize)
+        oldsize = (current_spans * _memory_span_size) -
+                  (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
+      if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) {
+        // Still fits in block, never mind trying to save memory, but preserve
+        // data if alignment changed
+        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
+          memmove(block, p, oldsize);
+        return block;
+      }
+    } else {
+      // Oversized block
+      size_t total_size = size + SPAN_HEADER_SIZE;
+      size_t num_pages = total_size >> _memory_page_size_shift;
+      if (total_size & (_memory_page_size - 1))
+        ++num_pages;
+      // Page count is stored in span_count
+      size_t current_pages = span->span_count;
+      void *block = pointer_offset(span, SPAN_HEADER_SIZE);
+      if (!oldsize)
+        oldsize = (current_pages * _memory_page_size) -
+                  (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
+      if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
+        // Still fits in block, never mind trying to save memory, but preserve
+        // data if alignment changed
+        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
+          memmove(block, p, oldsize);
+        return block;
+      }
+    }
+  } else {
+    oldsize = 0;
+  }
+
+  if (!!(flags & RPMALLOC_GROW_OR_FAIL))
+    return 0;
+
+  // Size is greater than block size, need to allocate a new block and
+  // deallocate the old Avoid hysteresis by overallocating if increase is small
+  // (below 37%)
+  size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
+  size_t new_size =
+      (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
+  void *block = _rpmalloc_allocate(heap, new_size);
+  if (p && block) {
+    if (!(flags & RPMALLOC_NO_PRESERVE))
+      memcpy(block, p, oldsize < new_size ? oldsize : new_size);
+    _rpmalloc_deallocate(p);
+  }
+
+  return block;
+}
+
+static void *_rpmalloc_aligned_reallocate(heap_t *heap, void *ptr,
+                                          size_t alignment, size_t size,
+                                          size_t oldsize, unsigned int flags) {
+  if (alignment <= SMALL_GRANULARITY)
+    return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
+
+  int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
+  size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0);
+  if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
+    if (no_alloc || (size >= (usablesize / 2)))
+      return ptr;
+  }
+  // Aligned alloc marks span as having aligned blocks
+  void *block =
+      (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
+  if (EXPECTED(block != 0)) {
+    if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
+      if (!oldsize)
+        oldsize = usablesize;
+      memcpy(block, ptr, oldsize < size ? oldsize : size);
+    }
+    _rpmalloc_deallocate(ptr);
+  }
+  return block;
+}
+
+////////////
+///
+/// Initialization, finalization and utility
+///
+//////
+
+//! Get the usable size of the given block
+static size_t _rpmalloc_usable_size(void *p) {
+  // Grab the span using guaranteed span alignment
+  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
+  if (span->size_class < SIZE_CLASS_COUNT) {
+    // Small/medium block
+    void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
+    return span->block_size -
+           ((size_t)pointer_diff(p, blocks_start) % span->block_size);
+  }
+  if (span->size_class == SIZE_CLASS_LARGE) {
+    // Large block
+    size_t current_spans = span->span_count;
+    return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
+  }
+  // Oversized block, page count is stored in span_count
+  size_t current_pages = span->span_count;
+  return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
+}
+
+//! Adjust and optimize the size class properties for the given class
+static void _rpmalloc_adjust_size_class(size_t iclass) {
+  size_t block_size = _memory_size_class[iclass].block_size;
+  size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
+
+  _memory_size_class[iclass].block_count = (uint16_t)block_count;
+  _memory_size_class[iclass].class_idx = (uint16_t)iclass;
+
+  // Check if previous size classes can be merged
+  if (iclass >= SMALL_CLASS_COUNT) {
+    size_t prevclass = iclass;
+    while (prevclass > 0) {
+      --prevclass;
+      // A class can be merged if number of pages and number of blocks are equal
+      if (_memory_size_class[prevclass].block_count ==
+          _memory_size_class[iclass].block_count)
+        _rpmalloc_memcpy_const(_memory_size_class + prevclass,
+                               _memory_size_class + iclass,
+                               sizeof(_memory_size_class[iclass]));
+      else
+        break;
+    }
+  }
+}
+
+//! Initialize the allocator and setup global data
+extern inline int rpmalloc_initialize(void) {
+  if (_rpmalloc_initialized) {
+    rpmalloc_thread_initialize();
+    return 0;
+  }
+  return rpmalloc_initialize_config(0);
+}
+
+int rpmalloc_initialize_config(const rpmalloc_config_t *config) {
+  if (_rpmalloc_initialized) {
+    rpmalloc_thread_initialize();
+    return 0;
+  }
+  _rpmalloc_initialized = 1;
+
+  if (config)
+    memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
+  else
+    _rpmalloc_memset_const(&_memory_config, 0, sizeof(rpmalloc_config_t));
+
+  if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
+    _memory_config.memory_map = _rpmalloc_mmap_os;
+    _memory_config.memory_unmap = _rpmalloc_unmap_os;
+  }
+
+#if PLATFORM_WINDOWS
+  SYSTEM_INFO system_info;
+  memset(&system_info, 0, sizeof(system_info));
+  GetSystemInfo(&system_info);
+  _memory_map_granularity = system_info.dwAllocationGranularity;
+#else
+  _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
+#endif
+
+#if RPMALLOC_CONFIGURABLE
+  _memory_page_size = _memory_config.page_size;
+#else
+  _memory_page_size = 0;
+#endif
+  _memory_huge_pages = 0;
+  if (!_memory_page_size) {
+#if PLATFORM_WINDOWS
+    _memory_page_size = system_info.dwPageSize;
+#else
+    _memory_page_size = _memory_map_granularity;
+    if (_memory_config.enable_huge_pages) {
+#if defined(__linux__)
+      size_t huge_page_size = 0;
+      FILE *meminfo = fopen("/proc/meminfo", "r");
+      if (meminfo) {
+        char line[128];
+        while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
+          line[sizeof(line) - 1] = 0;
+          if (strstr(line, "Hugepagesize:"))
+            huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
+        }
+        fclose(meminfo);
+      }
+      if (huge_page_size) {
+        _memory_huge_pages = 1;
+        _memory_page_size = huge_page_size;
+        _memory_map_granularity = huge_page_size;
+      }
+#elif defined(__FreeBSD__)
+      int rc;
+      size_t sz = sizeof(rc);
+
+      if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 &&
+          rc == 1) {
+        static size_t defsize = 2 * 1024 * 1024;
+        int nsize = 0;
+        size_t sizes[4] = {0};
+        _memory_huge_pages = 1;
+        _memory_page_size = defsize;
+        if ((nsize = getpagesizes(sizes, 4)) >= 2) {
+          nsize--;
+          for (size_t csize = sizes[nsize]; nsize >= 0 && csize;
+               --nsize, csize = sizes[nsize]) {
+            //! Unlikely, but as a precaution..
+            rpmalloc_assert(!(csize & (csize - 1)) && !(csize % 1024),
+                            "Invalid page size");
+            if (defsize < csize) {
+              _memory_page_size = csize;
+              break;
+            }
+          }
+        }
+        _memory_map_granularity = _memory_page_size;
+      }
+#elif defined(__APPLE__) || defined(__NetBSD__)
+      _memory_huge_pages = 1;
+      _memory_page_size = 2 * 1024 * 1024;
+      _memory_map_granularity = _memory_page_size;
+#endif
+    }
+#endif
+  } else {
+    if (_memory_config.enable_huge_pages)
+      _memory_huge_pages = 1;
+  }
+
+#if PLATFORM_WINDOWS
+  if (_memory_config.enable_huge_pages) {
+    HANDLE token = 0;
+    size_t large_page_minimum = GetLargePageMinimum();
+    if (large_page_minimum)
+      OpenProcessToken(GetCurrentProcess(),
+                       TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
+    if (token) {
+      LUID luid;
+      if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
+        TOKEN_PRIVILEGES token_privileges;
+        memset(&token_privileges, 0, sizeof(token_privileges));
+        token_privileges.PrivilegeCount = 1;
+        token_privileges.Privileges[0].Luid = luid;
+        token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+        if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
+          if (GetLastError() == ERROR_SUCCESS)
+            _memory_huge_pages = 1;
+        }
+      }
+      CloseHandle(token);
+    }
+    if (_memory_huge_pages) {
+      if (large_page_minimum > _memory_page_size)
+        _memory_page_size = large_page_minimum;
+      if (large_page_minimum > _memory_map_granularity)
+        _memory_map_granularity = large_page_minimum;
+    }
+  }
+#endif
+
+  size_t min_span_size = 256;
+  size_t max_page_size;
+#if UINTPTR_MAX > 0xFFFFFFFF
+  max_page_size = 4096ULL * 1024ULL * 1024ULL;
+#else
+  max_page_size = 4 * 1024 * 1024;
+#endif
+  if (_memory_page_size < min_span_size)
+    _memory_page_size = min_span_size;
+  if (_memory_page_size > max_page_size)
+    _memory_page_size = max_page_size;
+  _memory_page_size_shift = 0;
+  size_t page_size_bit = _memory_page_size;
+  while (page_size_bit != 1) {
+    ++_memory_page_size_shift;
+    page_size_bit >>= 1;
+  }
+  _memory_page_size = ((size_t)1 << _memory_page_size_shift);
+
+#if RPMALLOC_CONFIGURABLE
+  if (!_memory_config.span_size) {
+    _memory_span_size = _memory_default_span_size;
+    _memory_span_size_shift = _memory_default_span_size_shift;
+    _memory_span_mask = _memory_default_span_mask;
+  } else {
+    size_t span_size = _memory_config.span_size;
+    if (span_size > (256 * 1024))
+      span_size = (256 * 1024);
+    _memory_span_size = 4096;
+    _memory_span_size_shift = 12;
+    while (_memory_span_size < span_size) {
+      _memory_span_size <<= 1;
+      ++_memory_span_size_shift;
+    }
+    _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
+  }
+#endif
+
+  _memory_span_map_count =
+      (_memory_config.span_map_count ? _memory_config.span_map_count
+                                     : DEFAULT_SPAN_MAP_COUNT);
+  if ((_memory_span_size * _memory_span_map_count) < _memory_page_size)
+    _memory_span_map_count = (_memory_page_size / _memory_span_size);
+  if ((_memory_page_size >= _memory_span_size) &&
+      ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
+    _memory_span_map_count = (_memory_page_size / _memory_span_size);
+  _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT)
+                                   ? DEFAULT_SPAN_MAP_COUNT
+                                   : _memory_span_map_count;
+
+  _memory_config.page_size = _memory_page_size;
+  _memory_config.span_size = _memory_span_size;
+  _memory_config.span_map_count = _memory_span_map_count;
+  _memory_config.enable_huge_pages = _memory_huge_pages;
+
+#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
+    defined(__TINYC__)
+  if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc))
+    return -1;
+#endif
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+  fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
+#endif
+
+  // Setup all small and medium size classes
+  size_t iclass = 0;
+  _memory_size_class[iclass].block_size = SMALL_GRANULARITY;
+  _rpmalloc_adjust_size_class(iclass);
+  for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
+    size_t size = iclass * SMALL_GRANULARITY;
+    _memory_size_class[iclass].block_size = (uint32_t)size;
+    _rpmalloc_adjust_size_class(iclass);
+  }
+  // At least two blocks per span, then fall back to large allocations
+  _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1;
+  if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
+    _memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
+  for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
+    size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
+    if (size > _memory_medium_size_limit) {
+      _memory_medium_size_limit =
+          SMALL_SIZE_LIMIT + (iclass * MEDIUM_GRANULARITY);
+      break;
+    }
+    _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
+    _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass);
+  }
+
+  _memory_orphan_heaps = 0;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _memory_first_class_orphan_heaps = 0;
+#endif
+#if ENABLE_STATISTICS
+  atomic_store32(&_memory_active_heaps, 0);
+  atomic_store32(&_mapped_pages, 0);
+  _mapped_pages_peak = 0;
+  atomic_store32(&_master_spans, 0);
+  atomic_store32(&_mapped_total, 0);
+  atomic_store32(&_unmapped_total, 0);
+  atomic_store32(&_mapped_pages_os, 0);
+  atomic_store32(&_huge_pages_current, 0);
+  _huge_pages_peak = 0;
+#endif
+  memset(_memory_heaps, 0, sizeof(_memory_heaps));
+  atomic_store32_release(&_memory_global_lock, 0);
+
+  rpmalloc_linker_reference();
+
+  // Initialize this thread
+  rpmalloc_thread_initialize();
+  return 0;
+}
+
+//! Finalize the allocator
+void rpmalloc_finalize(void) {
+  rpmalloc_thread_finalize(1);
+  // rpmalloc_dump_statistics(stdout);
+
+  if (_memory_global_reserve) {
+    atomic_add32(&_memory_global_reserve_master->remaining_spans,
+                 -(int32_t)_memory_global_reserve_count);
+    _memory_global_reserve_master = 0;
+    _memory_global_reserve_count = 0;
+    _memory_global_reserve = 0;
+  }
+  atomic_store32_release(&_memory_global_lock, 0);
+
+  // Free all thread caches and fully free spans
+  for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
+    heap_t *heap = _memory_heaps[list_idx];
+    while (heap) {
+      heap_t *next_heap = heap->next_heap;
+      heap->finalize = 1;
+      _rpmalloc_heap_global_finalize(heap);
+      heap = next_heap;
+    }
+  }
+
+#if ENABLE_GLOBAL_CACHE
+  // Free global caches
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
+    _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
+#endif
+
+#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
+  pthread_key_delete(_memory_thread_heap);
+#endif
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+  FlsFree(fls_key);
+  fls_key = 0;
+#endif
+#if ENABLE_STATISTICS
+  // If you hit these asserts you probably have memory leaks (perhaps global
+  // scope data doing dynamic allocations) or double frees in your code
+  rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected");
+  rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0,
+                  "Memory leak detected");
+#endif
+
+  _rpmalloc_initialized = 0;
+}
+
+//! Initialize thread, assign heap
+extern inline void rpmalloc_thread_initialize(void) {
+  if (!get_thread_heap_raw()) {
+    heap_t *heap = _rpmalloc_heap_allocate(0);
+    if (heap) {
+      _rpmalloc_stat_inc(&_memory_active_heaps);
+      set_thread_heap(heap);
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+      FlsSetValue(fls_key, heap);
+#endif
+    }
+  }
+}
+
+//! Finalize thread, orphan heap
+void rpmalloc_thread_finalize(int release_caches) {
+  heap_t *heap = get_thread_heap_raw();
+  if (heap)
+    _rpmalloc_heap_release_raw(heap, release_caches);
+  set_thread_heap(0);
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+  FlsSetValue(fls_key, 0);
+#endif
+}
+
+int rpmalloc_is_thread_initialized(void) {
+  return (get_thread_heap_raw() != 0) ? 1 : 0;
+}
+
+const rpmalloc_config_t *rpmalloc_config(void) { return &_memory_config; }
+
+// Extern interface
+
+extern inline RPMALLOC_ALLOCATOR void *rpmalloc(size_t size) {
+#if ENABLE_VALIDATE_ARGS
+  if (size >= MAX_ALLOC_SIZE) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  heap_t *heap = get_thread_heap();
+  return _rpmalloc_allocate(heap, size);
+}
+
+extern inline void rpfree(void *ptr) { _rpmalloc_deallocate(ptr); }
+
+extern inline RPMALLOC_ALLOCATOR void *rpcalloc(size_t num, size_t size) {
+  size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+  int err = SizeTMult(num, size, &total);
+  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#else
+  int err = __builtin_umull_overflow(num, size, &total);
+  if (err || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+#else
+  total = num * size;
+#endif
+  heap_t *heap = get_thread_heap();
+  void *block = _rpmalloc_allocate(heap, total);
+  if (block)
+    memset(block, 0, total);
+  return block;
+}
+
+extern inline RPMALLOC_ALLOCATOR void *rprealloc(void *ptr, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+  if (size >= MAX_ALLOC_SIZE) {
+    errno = EINVAL;
+    return ptr;
+  }
+#endif
+  heap_t *heap = get_thread_heap();
+  return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
+}
+
+extern RPMALLOC_ALLOCATOR void *rpaligned_realloc(void *ptr, size_t alignment,
+                                                  size_t size, size_t oldsize,
+                                                  unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+  if ((size + alignment < size) || (alignment > _memory_page_size)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  heap_t *heap = get_thread_heap();
+  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize,
+                                      flags);
+}
+
+extern RPMALLOC_ALLOCATOR void *rpaligned_alloc(size_t alignment, size_t size) {
+  heap_t *heap = get_thread_heap();
+  return _rpmalloc_aligned_allocate(heap, alignment, size);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpaligned_calloc(size_t alignment, size_t num, size_t size) {
+  size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+  int err = SizeTMult(num, size, &total);
+  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#else
+  int err = __builtin_umull_overflow(num, size, &total);
+  if (err || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+#else
+  total = num * size;
+#endif
+  void *block = rpaligned_alloc(alignment, total);
+  if (block)
+    memset(block, 0, total);
+  return block;
+}
+
+extern inline RPMALLOC_ALLOCATOR void *rpmemalign(size_t alignment,
+                                                  size_t size) {
+  return rpaligned_alloc(alignment, size);
+}
+
+extern inline int rpposix_memalign(void **memptr, size_t alignment,
+                                   size_t size) {
+  if (memptr)
+    *memptr = rpaligned_alloc(alignment, size);
+  else
+    return EINVAL;
+  return *memptr ? 0 : ENOMEM;
+}
+
+extern inline size_t rpmalloc_usable_size(void *ptr) {
+  return (ptr ? _rpmalloc_usable_size(ptr) : 0);
+}
+
+extern inline void rpmalloc_thread_collect(void) {}
+
+void rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats) {
+  memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
+  heap_t *heap = get_thread_heap_raw();
+  if (!heap)
+    return;
+
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    size_class_t *size_class = _memory_size_class + iclass;
+    span_t *span = heap->size_class[iclass].partial_span;
+    while (span) {
+      size_t free_count = span->list_size;
+      size_t block_count = size_class->block_count;
+      if (span->free_list_limit < block_count)
+        block_count = span->free_list_limit;
+      free_count += (block_count - span->used_count);
+      stats->sizecache += free_count * size_class->block_size;
+      span = span->next;
+    }
+  }
+
+#if ENABLE_THREAD_CACHE
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    span_cache_t *span_cache;
+    if (!iclass)
+      span_cache = &heap->span_cache;
+    else
+      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
+    stats->spancache += span_cache->count * (iclass + 1) * _memory_span_size;
+  }
+#endif
+
+  span_t *deferred = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
+  while (deferred) {
+    if (deferred->size_class != SIZE_CLASS_HUGE)
+      stats->spancache += (size_t)deferred->span_count * _memory_span_size;
+    deferred = (span_t *)deferred->free_list;
+  }
+
+#if ENABLE_STATISTICS
+  stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
+  stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
+
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    stats->span_use[iclass].current =
+        (size_t)atomic_load32(&heap->span_use[iclass].current);
+    stats->span_use[iclass].peak =
+        (size_t)atomic_load32(&heap->span_use[iclass].high);
+    stats->span_use[iclass].to_global =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
+    stats->span_use[iclass].from_global =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
+    stats->span_use[iclass].to_cache =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
+    stats->span_use[iclass].from_cache =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
+    stats->span_use[iclass].to_reserved =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
+    stats->span_use[iclass].from_reserved =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
+    stats->span_use[iclass].map_calls =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
+  }
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    stats->size_use[iclass].alloc_current =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
+    stats->size_use[iclass].alloc_peak =
+        (size_t)heap->size_class_use[iclass].alloc_peak;
+    stats->size_use[iclass].alloc_total =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
+    stats->size_use[iclass].free_total =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
+    stats->size_use[iclass].spans_to_cache =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
+    stats->size_use[iclass].spans_from_cache =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
+    stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(
+        &heap->size_class_use[iclass].spans_from_reserved);
+    stats->size_use[iclass].map_calls =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
+  }
+#endif
+}
+
+void rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats) {
+  memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
+#if ENABLE_STATISTICS
+  stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
+  stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
+  stats->mapped_total =
+      (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
+  stats->unmapped_total =
+      (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
+  stats->huge_alloc =
+      (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
+  stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
+#endif
+#if ENABLE_GLOBAL_CACHE
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    global_cache_t *cache = &_memory_span_cache[iclass];
+    while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+      _rpmalloc_spin();
+    uint32_t count = cache->count;
+#if ENABLE_UNLIMITED_CACHE
+    span_t *current_span = cache->overflow;
+    while (current_span) {
+      ++count;
+      current_span = current_span->next;
+    }
+#endif
+    atomic_store32_release(&cache->lock, 0);
+    stats->cached += count * (iclass + 1) * _memory_span_size;
+  }
+#endif
+}
+
+#if ENABLE_STATISTICS
+
+static void _memory_heap_dump_statistics(heap_t *heap, void *file) {
+  fprintf(file, "Heap %d stats:\n", heap->id);
+  fprintf(file, "Class   CurAlloc  PeakAlloc   TotAlloc    TotFree  BlkSize "
+                "BlkCount SpansCur SpansPeak  PeakAllocMiB  ToCacheMiB "
+                "FromCacheMiB FromReserveMiB MmapCalls\n");
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
+      continue;
+    fprintf(
+        file,
+        "%3u:  %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu "
+        "%9u\n",
+        (uint32_t)iclass,
+        atomic_load32(&heap->size_class_use[iclass].alloc_current),
+        heap->size_class_use[iclass].alloc_peak,
+        atomic_load32(&heap->size_class_use[iclass].alloc_total),
+        atomic_load32(&heap->size_class_use[iclass].free_total),
+        _memory_size_class[iclass].block_size,
+        _memory_size_class[iclass].block_count,
+        atomic_load32(&heap->size_class_use[iclass].spans_current),
+        heap->size_class_use[iclass].spans_peak,
+        ((size_t)heap->size_class_use[iclass].alloc_peak *
+         (size_t)_memory_size_class[iclass].block_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) *
+         _memory_span_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) *
+         _memory_span_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(
+             &heap->size_class_use[iclass].spans_from_reserved) *
+         _memory_span_size) /
+            (size_t)(1024 * 1024),
+        atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
+  }
+  fprintf(file, "Spans  Current     Peak Deferred  PeakMiB  Cached  ToCacheMiB "
+                "FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB "
+                "FromGlobalMiB  MmapCalls\n");
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    if (!atomic_load32(&heap->span_use[iclass].high) &&
+        !atomic_load32(&heap->span_use[iclass].spans_map_calls))
+      continue;
+    fprintf(
+        file,
+        "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n",
+        (uint32_t)(iclass + 1), atomic_load32(&heap->span_use[iclass].current),
+        atomic_load32(&heap->span_use[iclass].high),
+        atomic_load32(&heap->span_use[iclass].spans_deferred),
+        ((size_t)atomic_load32(&heap->span_use[iclass].high) *
+         (size_t)_memory_span_size * (iclass + 1)) /
+            (size_t)(1024 * 1024),
+#if ENABLE_THREAD_CACHE
+        (unsigned int)(!iclass ? heap->span_cache.count
+                               : heap->span_large_cache[iclass - 1].count),
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) *
+         (iclass + 1) * _memory_span_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) *
+         (iclass + 1) * _memory_span_size) /
+            (size_t)(1024 * 1024),
+#else
+        0, (size_t)0, (size_t)0,
+#endif
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) *
+         (iclass + 1) * _memory_span_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) *
+         (iclass + 1) * _memory_span_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) *
+         (size_t)_memory_span_size * (iclass + 1)) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) *
+         (size_t)_memory_span_size * (iclass + 1)) /
+            (size_t)(1024 * 1024),
+        atomic_load32(&heap->span_use[iclass].spans_map_calls));
+  }
+  fprintf(file, "Full spans: %zu\n", heap->full_span_count);
+  fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
+  fprintf(
+      file, "%17zu %17zu\n",
+      (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024),
+      (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
+}
+
+#endif
+
+void rpmalloc_dump_statistics(void *file) {
+#if ENABLE_STATISTICS
+  for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
+    heap_t *heap = _memory_heaps[list_idx];
+    while (heap) {
+      int need_dump = 0;
+      for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT);
+           ++iclass) {
+        if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
+          rpmalloc_assert(
+              !atomic_load32(&heap->size_class_use[iclass].free_total),
+              "Heap statistics counter mismatch");
+          rpmalloc_assert(
+              !atomic_load32(&heap->size_class_use[iclass].spans_map_calls),
+              "Heap statistics counter mismatch");
+          continue;
+        }
+        need_dump = 1;
+      }
+      for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT);
+           ++iclass) {
+        if (!atomic_load32(&heap->span_use[iclass].high) &&
+            !atomic_load32(&heap->span_use[iclass].spans_map_calls))
+          continue;
+        need_dump = 1;
+      }
+      if (need_dump)
+        _memory_heap_dump_statistics(heap, file);
+      heap = heap->next_heap;
+    }
+  }
+  fprintf(file, "Global stats:\n");
+  size_t huge_current =
+      (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
+  size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
+  fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
+  fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024),
+          huge_peak / (size_t)(1024 * 1024));
+
+#if ENABLE_GLOBAL_CACHE
+  fprintf(file, "GlobalCacheMiB\n");
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    global_cache_t *cache = _memory_span_cache + iclass;
+    size_t global_cache = (size_t)cache->count * iclass * _memory_span_size;
+
+    size_t global_overflow_cache = 0;
+    span_t *span = cache->overflow;
+    while (span) {
+      global_overflow_cache += iclass * _memory_span_size;
+      span = span->next;
+    }
+    if (global_cache || global_overflow_cache || cache->insert_count ||
+        cache->extract_count)
+      fprintf(file,
+              "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n",
+              iclass + 1, global_cache / (size_t)(1024 * 1024),
+              global_overflow_cache / (size_t)(1024 * 1024),
+              cache->insert_count, cache->extract_count);
+  }
+#endif
+
+  size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
+  size_t mapped_os =
+      (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
+  size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
+  size_t mapped_total =
+      (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
+  size_t unmapped_total =
+      (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
+  fprintf(
+      file,
+      "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n");
+  fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n",
+          mapped / (size_t)(1024 * 1024), mapped_os / (size_t)(1024 * 1024),
+          mapped_peak / (size_t)(1024 * 1024),
+          mapped_total / (size_t)(1024 * 1024),
+          unmapped_total / (size_t)(1024 * 1024));
+
+  fprintf(file, "\n");
+#if 0
+	int64_t allocated = atomic_load64(&_allocation_counter);
+	int64_t deallocated = atomic_load64(&_deallocation_counter);
+	fprintf(file, "Allocation count: %lli\n", allocated);
+	fprintf(file, "Deallocation count: %lli\n", deallocated);
+	fprintf(file, "Current allocations: %lli\n", (allocated - deallocated));
+	fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans));
+	fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans));
+#endif
+#endif
+  (void)sizeof(file);
+}
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+
+extern inline rpmalloc_heap_t *rpmalloc_heap_acquire(void) {
+  // Must be a pristine heap from newly mapped memory pages, or else memory
+  // blocks could already be allocated from the heap which would (wrongly) be
+  // released when heap is cleared with rpmalloc_heap_free_all(). Also heaps
+  // guaranteed to be pristine from the dedicated orphan list can be used.
+  heap_t *heap = _rpmalloc_heap_allocate(1);
+  rpmalloc_assume(heap != NULL);
+  heap->owner_thread = 0;
+  _rpmalloc_stat_inc(&_memory_active_heaps);
+  return heap;
+}
+
+extern inline void rpmalloc_heap_release(rpmalloc_heap_t *heap) {
+  if (heap)
+    _rpmalloc_heap_release(heap, 1, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+  if (size >= MAX_ALLOC_SIZE) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  return _rpmalloc_allocate(heap, size);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
+                            size_t size) {
+#if ENABLE_VALIDATE_ARGS
+  if (size >= MAX_ALLOC_SIZE) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  return _rpmalloc_aligned_allocate(heap, alignment, size);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
+  return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
+                             size_t num, size_t size) {
+  size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+  int err = SizeTMult(num, size, &total);
+  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#else
+  int err = __builtin_umull_overflow(num, size, &total);
+  if (err || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+#else
+  total = num * size;
+#endif
+  void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
+  if (block)
+    memset(block, 0, total);
+  return block;
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
+                      unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+  if (size >= MAX_ALLOC_SIZE) {
+    errno = EINVAL;
+    return ptr;
+  }
+#endif
+  return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr,
+                              size_t alignment, size_t size,
+                              unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+  if ((size + alignment < size) || (alignment > _memory_page_size)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
+}
+
+extern inline void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
+  (void)sizeof(heap);
+  _rpmalloc_deallocate(ptr);
+}
+
+extern inline void rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
+  span_t *span;
+  span_t *next_span;
+
+  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    span = heap->size_class[iclass].partial_span;
+    while (span) {
+      next_span = span->next;
+      _rpmalloc_heap_cache_insert(heap, span);
+      span = next_span;
+    }
+    heap->size_class[iclass].partial_span = 0;
+    span = heap->full_span[iclass];
+    while (span) {
+      next_span = span->next;
+      _rpmalloc_heap_cache_insert(heap, span);
+      span = next_span;
+    }
+
+    span = heap->size_class[iclass].cache;
+    if (span)
+      _rpmalloc_heap_cache_insert(heap, span);
+    heap->size_class[iclass].cache = 0;
+  }
+  memset(heap->size_class, 0, sizeof(heap->size_class));
+  memset(heap->full_span, 0, sizeof(heap->full_span));
+
+  span = heap->large_huge_span;
+  while (span) {
+    next_span = span->next;
+    if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE))
+      _rpmalloc_deallocate_huge(span);
+    else
+      _rpmalloc_heap_cache_insert(heap, span);
+    span = next_span;
+  }
+  heap->large_huge_span = 0;
+  heap->full_span_count = 0;
+
+#if ENABLE_THREAD_CACHE
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    span_cache_t *span_cache;
+    if (!iclass)
+      span_cache = &heap->span_cache;
+    else
+      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
+    if (!span_cache->count)
+      continue;
+#if ENABLE_GLOBAL_CACHE
+    _rpmalloc_stat_add64(&heap->thread_to_global,
+                         span_cache->count * (iclass + 1) * _memory_span_size);
+    _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
+                       span_cache->count);
+    _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
+                                        span_cache->count);
+#else
+    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+      _rpmalloc_span_unmap(span_cache->span[ispan]);
+#endif
+    span_cache->count = 0;
+  }
+#endif
+
+#if ENABLE_STATISTICS
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
+    atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
+  }
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    atomic_store32(&heap->span_use[iclass].current, 0);
+  }
+#endif
+}
+
+extern inline void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
+  heap_t *prev_heap = get_thread_heap_raw();
+  if (prev_heap != heap) {
+    set_thread_heap(heap);
+    if (prev_heap)
+      rpmalloc_heap_release(prev_heap);
+  }
+}
+
+extern inline rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr) {
+  // Grab the span, and then the heap from the span
+  span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
+  if (span) {
+    return span->heap;
+  }
+  return 0;
+}
+
+#endif
+
+#if ENABLE_PRELOAD || ENABLE_OVERRIDE
+
+#include "malloc.c"
+
+#endif
+
+void rpmalloc_linker_reference(void) { (void)sizeof(_rpmalloc_initialized); }
diff --git a/llvm/lib/Support/rpmalloc/rpmalloc.h b/llvm/lib/Support/rpmalloc/rpmalloc.h
index 5b7fe1ff4286..3911c53b779b 100644
--- a/llvm/lib/Support/rpmalloc/rpmalloc.h
+++ b/llvm/lib/Support/rpmalloc/rpmalloc.h
@@ -1,428 +1,428 @@
-//===---------------------- rpmalloc.h ------------------*- C -*-=============//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This library provides a cross-platform lock free thread caching malloc
-// implementation in C11.
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__clang__) || defined(__GNUC__)
-#define RPMALLOC_EXPORT __attribute__((visibility("default")))
-#define RPMALLOC_ALLOCATOR
-#if (defined(__clang_major__) && (__clang_major__ < 4)) ||                     \
-    (defined(__GNUC__) && defined(ENABLE_PRELOAD) && ENABLE_PRELOAD)
-#define RPMALLOC_ATTRIB_MALLOC
-#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
-#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
-#else
-#define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
-#define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
-#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)                               \
-  __attribute__((alloc_size(count, size)))
-#endif
-#define RPMALLOC_CDECL
-#elif defined(_MSC_VER)
-#define RPMALLOC_EXPORT
-#define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
-#define RPMALLOC_ATTRIB_MALLOC
-#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
-#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
-#define RPMALLOC_CDECL __cdecl
-#else
-#define RPMALLOC_EXPORT
-#define RPMALLOC_ALLOCATOR
-#define RPMALLOC_ATTRIB_MALLOC
-#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
-#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
-#define RPMALLOC_CDECL
-#endif
-
-//! Define RPMALLOC_CONFIGURABLE to enable configuring sizes. Will introduce
-//  a very small overhead due to some size calculations not being compile time
-//  constants
-#ifndef RPMALLOC_CONFIGURABLE
-#define RPMALLOC_CONFIGURABLE 0
-#endif
-
-//! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_*
-//! functions).
-//  Will introduce a very small overhead to track fully allocated spans in heaps
-#ifndef RPMALLOC_FIRST_CLASS_HEAPS
-#define RPMALLOC_FIRST_CLASS_HEAPS 0
-#endif
-
-//! Flag to rpaligned_realloc to not preserve content in reallocation
-#define RPMALLOC_NO_PRESERVE 1
-//! Flag to rpaligned_realloc to fail and return null pointer if grow cannot be
-//! done in-place,
-//  in which case the original pointer is still valid (just like a call to
-//  realloc which failes to allocate a new block).
-#define RPMALLOC_GROW_OR_FAIL 2
-
-typedef struct rpmalloc_global_statistics_t {
-  //! Current amount of virtual memory mapped, all of which might not have been
-  //! committed (only if ENABLE_STATISTICS=1)
-  size_t mapped;
-  //! Peak amount of virtual memory mapped, all of which might not have been
-  //! committed (only if ENABLE_STATISTICS=1)
-  size_t mapped_peak;
-  //! Current amount of memory in global caches for small and medium sizes
-  //! (<32KiB)
-  size_t cached;
-  //! Current amount of memory allocated in huge allocations, i.e larger than
-  //! LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
-  size_t huge_alloc;
-  //! Peak amount of memory allocated in huge allocations, i.e larger than
-  //! LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
-  size_t huge_alloc_peak;
-  //! Total amount of memory mapped since initialization (only if
-  //! ENABLE_STATISTICS=1)
-  size_t mapped_total;
-  //! Total amount of memory unmapped since initialization  (only if
-  //! ENABLE_STATISTICS=1)
-  size_t unmapped_total;
-} rpmalloc_global_statistics_t;
-
-typedef struct rpmalloc_thread_statistics_t {
-  //! Current number of bytes available in thread size class caches for small
-  //! and medium sizes (<32KiB)
-  size_t sizecache;
-  //! Current number of bytes available in thread span caches for small and
-  //! medium sizes (<32KiB)
-  size_t spancache;
-  //! Total number of bytes transitioned from thread cache to global cache (only
-  //! if ENABLE_STATISTICS=1)
-  size_t thread_to_global;
-  //! Total number of bytes transitioned from global cache to thread cache (only
-  //! if ENABLE_STATISTICS=1)
-  size_t global_to_thread;
-  //! Per span count statistics (only if ENABLE_STATISTICS=1)
-  struct {
-    //! Currently used number of spans
-    size_t current;
-    //! High water mark of spans used
-    size_t peak;
-    //! Number of spans transitioned to global cache
-    size_t to_global;
-    //! Number of spans transitioned from global cache
-    size_t from_global;
-    //! Number of spans transitioned to thread cache
-    size_t to_cache;
-    //! Number of spans transitioned from thread cache
-    size_t from_cache;
-    //! Number of spans transitioned to reserved state
-    size_t to_reserved;
-    //! Number of spans transitioned from reserved state
-    size_t from_reserved;
-    //! Number of raw memory map calls (not hitting the reserve spans but
-    //! resulting in actual OS mmap calls)
-    size_t map_calls;
-  } span_use[64];
-  //! Per size class statistics (only if ENABLE_STATISTICS=1)
-  struct {
-    //! Current number of allocations
-    size_t alloc_current;
-    //! Peak number of allocations
-    size_t alloc_peak;
-    //! Total number of allocations
-    size_t alloc_total;
-    //! Total number of frees
-    size_t free_total;
-    //! Number of spans transitioned to cache
-    size_t spans_to_cache;
-    //! Number of spans transitioned from cache
-    size_t spans_from_cache;
-    //! Number of spans transitioned from reserved state
-    size_t spans_from_reserved;
-    //! Number of raw memory map calls (not hitting the reserve spans but
-    //! resulting in actual OS mmap calls)
-    size_t map_calls;
-  } size_use[128];
-} rpmalloc_thread_statistics_t;
-
-typedef struct rpmalloc_config_t {
-  //! Map memory pages for the given number of bytes. The returned address MUST
-  //! be
-  //  aligned to the rpmalloc span size, which will always be a power of two.
-  //  Optionally the function can store an alignment offset in the offset
-  //  variable in case it performs alignment and the returned pointer is offset
-  //  from the actual start of the memory region due to this alignment. The
-  //  alignment offset will be passed to the memory unmap function. The
-  //  alignment offset MUST NOT be larger than 65535 (storable in an uint16_t),
-  //  if it is you must use natural alignment to shift it into 16 bits. If you
-  //  set a memory_map function, you must also set a memory_unmap function or
-  //  else the default implementation will be used for both. This function must
-  //  be thread safe, it can be called by multiple threads simultaneously.
-  void *(*memory_map)(size_t size, size_t *offset);
-  //! Unmap the memory pages starting at address and spanning the given number
-  //! of bytes.
-  //  If release is set to non-zero, the unmap is for an entire span range as
-  //  returned by a previous call to memory_map and that the entire range should
-  //  be released. The release argument holds the size of the entire span range.
-  //  If release is set to 0, the unmap is a partial decommit of a subset of the
-  //  mapped memory range. If you set a memory_unmap function, you must also set
-  //  a memory_map function or else the default implementation will be used for
-  //  both. This function must be thread safe, it can be called by multiple
-  //  threads simultaneously.
-  void (*memory_unmap)(void *address, size_t size, size_t offset,
-                       size_t release);
-  //! Called when an assert fails, if asserts are enabled. Will use the standard
-  //! assert()
-  //  if this is not set.
-  void (*error_callback)(const char *message);
-  //! Called when a call to map memory pages fails (out of memory). If this
-  //! callback is
-  //  not set or returns zero the library will return a null pointer in the
-  //  allocation call. If this callback returns non-zero the map call will be
-  //  retried. The argument passed is the number of bytes that was requested in
-  //  the map call. Only used if the default system memory map function is used
-  //  (memory_map callback is not set).
-  int (*map_fail_callback)(size_t size);
-  //! Size of memory pages. The page size MUST be a power of two. All memory
-  //! mapping
-  //  requests to memory_map will be made with size set to a multiple of the
-  //  page size. Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system
-  //  page size is used.
-  size_t page_size;
-  //! Size of a span of memory blocks. MUST be a power of two, and in
-  //! [4096,262144]
-  //  range (unless 0 - set to 0 to use the default span size). Used if
-  //  RPMALLOC_CONFIGURABLE is defined to 1.
-  size_t span_size;
-  //! Number of spans to map at each request to map new virtual memory blocks.
-  //! This can
-  //  be used to minimize the system call overhead at the cost of virtual memory
-  //  address space. The extra mapped pages will not be written until actually
-  //  used, so physical committed memory should not be affected in the default
-  //  implementation. Will be aligned to a multiple of spans that match memory
-  //  page size in case of huge pages.
-  size_t span_map_count;
-  //! Enable use of large/huge pages. If this flag is set to non-zero and page
-  //! size is
-  //  zero, the allocator will try to enable huge pages and auto detect the
-  //  configuration. If this is set to non-zero and page_size is also non-zero,
-  //  the allocator will assume huge pages have been configured and enabled
-  //  prior to initializing the allocator. For Windows, see
-  //  https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
-  //  For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
-  int enable_huge_pages;
-  //! Respectively allocated pages and huge allocated pages names for systems
-  //  supporting it to be able to distinguish among anonymous regions.
-  const char *page_name;
-  const char *huge_page_name;
-} rpmalloc_config_t;
-
-//! Initialize allocator with default configuration
-RPMALLOC_EXPORT int rpmalloc_initialize(void);
-
-//! Initialize allocator with given configuration
-RPMALLOC_EXPORT int rpmalloc_initialize_config(const rpmalloc_config_t *config);
-
-//! Get allocator configuration
-RPMALLOC_EXPORT const rpmalloc_config_t *rpmalloc_config(void);
-
-//! Finalize allocator
-RPMALLOC_EXPORT void rpmalloc_finalize(void);
-
-//! Initialize allocator for calling thread
-RPMALLOC_EXPORT void rpmalloc_thread_initialize(void);
-
-//! Finalize allocator for calling thread
-RPMALLOC_EXPORT void rpmalloc_thread_finalize(int release_caches);
-
-//! Perform deferred deallocations pending for the calling thread heap
-RPMALLOC_EXPORT void rpmalloc_thread_collect(void);
-
-//! Query if allocator is initialized for calling thread
-RPMALLOC_EXPORT int rpmalloc_is_thread_initialized(void);
-
-//! Get per-thread statistics
-RPMALLOC_EXPORT void
-rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats);
-
-//! Get global statistics
-RPMALLOC_EXPORT void
-rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats);
-
-//! Dump all statistics in human readable format to file (should be a FILE*)
-RPMALLOC_EXPORT void rpmalloc_dump_statistics(void *file);
-
-//! Allocate a memory block of at least the given size
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
-
-//! Free the given memory block
-RPMALLOC_EXPORT void rpfree(void *ptr);
-
-//! Allocate a memory block of at least the given size and zero initialize it
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
-
-//! Reallocate the given block to at least the given size
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rprealloc(void *ptr, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
-
-//! Reallocate the given block to at least the given size and alignment,
-//  with optional control flags (see RPMALLOC_NO_PRESERVE).
-//  Alignment must be a power of two and a multiple of sizeof(void*),
-//  and should ideally be less than memory page size. A caveat of rpmalloc
-//  internals is that this must also be strictly less than the span size
-//  (default 64KiB)
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize,
-                  unsigned int flags) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
-
-//! Allocate a memory block of at least the given size and alignment.
-//  Alignment must be a power of two and a multiple of sizeof(void*),
-//  and should ideally be less than memory page size. A caveat of rpmalloc
-//  internals is that this must also be strictly less than the span size
-//  (default 64KiB)
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
-
-//! Allocate a memory block of at least the given size and alignment, and zero
-//! initialize it.
-//  Alignment must be a power of two and a multiple of sizeof(void*),
-//  and should ideally be less than memory page size. A caveat of rpmalloc
-//  internals is that this must also be strictly less than the span size
-//  (default 64KiB)
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpaligned_calloc(size_t alignment, size_t num,
-                 size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
-
-//! Allocate a memory block of at least the given size and alignment.
-//  Alignment must be a power of two and a multiple of sizeof(void*),
-//  and should ideally be less than memory page size. A caveat of rpmalloc
-//  internals is that this must also be strictly less than the span size
-//  (default 64KiB)
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
-
-//! Allocate a memory block of at least the given size and alignment.
-//  Alignment must be a power of two and a multiple of sizeof(void*),
-//  and should ideally be less than memory page size. A caveat of rpmalloc
-//  internals is that this must also be strictly less than the span size
-//  (default 64KiB)
-RPMALLOC_EXPORT int rpposix_memalign(void **memptr, size_t alignment,
-                                     size_t size);
-
-//! Query the usable size of the given memory block (from given pointer to the
-//! end of block)
-RPMALLOC_EXPORT size_t rpmalloc_usable_size(void *ptr);
-
-//! Dummy empty function for forcing linker symbol inclusion
-RPMALLOC_EXPORT void rpmalloc_linker_reference(void);
-
-#if RPMALLOC_FIRST_CLASS_HEAPS
-
-//! Heap type
-typedef struct heap_t rpmalloc_heap_t;
-
-//! Acquire a new heap. Will reuse existing released heaps or allocate memory
-//! for a new heap
-//  if none available. Heap API is implemented with the strict assumption that
-//  only one single thread will call heap functions for a given heap at any
-//  given time, no functions are thread safe.
-RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_heap_acquire(void);
-
-//! Release a heap (does NOT free the memory allocated by the heap, use
-//! rpmalloc_heap_free_all before destroying the heap).
-//  Releasing a heap will enable it to be reused by other threads. Safe to pass
-//  a null pointer.
-RPMALLOC_EXPORT void rpmalloc_heap_release(rpmalloc_heap_t *heap);
-
-//! Allocate a memory block of at least the given size using the given heap.
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
-
-//! Allocate a memory block of at least the given size using the given heap. The
-//! returned
-//  block will have the requested alignment. Alignment must be a power of two
-//  and a multiple of sizeof(void*), and should ideally be less than memory page
-//  size. A caveat of rpmalloc internals is that this must also be strictly less
-//  than the span size (default 64KiB).
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
-                            size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
-
-//! Allocate a memory block of at least the given size using the given heap and
-//! zero initialize it.
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num,
-                     size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
-
-//! Allocate a memory block of at least the given size using the given heap and
-//! zero initialize it. The returned
-//  block will have the requested alignment. Alignment must either be zero, or a
-//  power of two and a multiple of sizeof(void*), and should ideally be less
-//  than memory page size. A caveat of rpmalloc internals is that this must also
-//  be strictly less than the span size (default 64KiB).
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
-                             size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
-
-//! Reallocate the given block to at least the given size. The memory block MUST
-//! be allocated
-//  by the same heap given to this function.
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
-                      unsigned int flags) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
-
-//! Reallocate the given block to at least the given size. The memory block MUST
-//! be allocated
-//  by the same heap given to this function. The returned block will have the
-//  requested alignment. Alignment must be either zero, or a power of two and a
-//  multiple of sizeof(void*), and should ideally be less than memory page size.
-//  A caveat of rpmalloc internals is that this must also be strictly less than
-//  the span size (default 64KiB).
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *rpmalloc_heap_aligned_realloc(
-    rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size,
-    unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(4);
-
-//! Free the given memory block from the given heap. The memory block MUST be
-//! allocated
-//  by the same heap given to this function.
-RPMALLOC_EXPORT void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr);
-
-//! Free all memory allocated by the heap
-RPMALLOC_EXPORT void rpmalloc_heap_free_all(rpmalloc_heap_t *heap);
-
-//! Set the given heap as the current heap for the calling thread. A heap MUST
-//! only be current heap
-//  for a single thread, a heap can never be shared between multiple threads.
-//  The previous current heap for the calling thread is released to be reused by
-//  other threads.
-RPMALLOC_EXPORT void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap);
-
-//! Returns which heap the given pointer is allocated on
-RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
+//===---------------------- rpmalloc.h ------------------*- C -*-=============//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This library provides a cross-platform lock free thread caching malloc
+// implementation in C11.
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__clang__) || defined(__GNUC__)
+#define RPMALLOC_EXPORT __attribute__((visibility("default")))
+#define RPMALLOC_ALLOCATOR
+#if (defined(__clang_major__) && (__clang_major__ < 4)) ||                     \
+    (defined(__GNUC__) && defined(ENABLE_PRELOAD) && ENABLE_PRELOAD)
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#else
+#define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)                               \
+  __attribute__((alloc_size(count, size)))
+#endif
+#define RPMALLOC_CDECL
+#elif defined(_MSC_VER)
+#define RPMALLOC_EXPORT
+#define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#define RPMALLOC_CDECL __cdecl
+#else
+#define RPMALLOC_EXPORT
+#define RPMALLOC_ALLOCATOR
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#define RPMALLOC_CDECL
+#endif
+
+//! Define RPMALLOC_CONFIGURABLE to enable configuring sizes. Will introduce
+//  a very small overhead due to some size calculations not being compile time
+//  constants
+#ifndef RPMALLOC_CONFIGURABLE
+#define RPMALLOC_CONFIGURABLE 0
+#endif
+
+//! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_*
+//! functions).
+//  Will introduce a very small overhead to track fully allocated spans in heaps
+#ifndef RPMALLOC_FIRST_CLASS_HEAPS
+#define RPMALLOC_FIRST_CLASS_HEAPS 0
+#endif
+
+//! Flag to rpaligned_realloc to not preserve content in reallocation
+#define RPMALLOC_NO_PRESERVE 1
+//! Flag to rpaligned_realloc to fail and return null pointer if grow cannot be
+//! done in-place,
+//  in which case the original pointer is still valid (just like a call to
+//  realloc which failes to allocate a new block).
+#define RPMALLOC_GROW_OR_FAIL 2
+
+typedef struct rpmalloc_global_statistics_t {
+  //! Current amount of virtual memory mapped, all of which might not have been
+  //! committed (only if ENABLE_STATISTICS=1)
+  size_t mapped;
+  //! Peak amount of virtual memory mapped, all of which might not have been
+  //! committed (only if ENABLE_STATISTICS=1)
+  size_t mapped_peak;
+  //! Current amount of memory in global caches for small and medium sizes
+  //! (<32KiB)
+  size_t cached;
+  //! Current amount of memory allocated in huge allocations, i.e larger than
+  //! LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
+  size_t huge_alloc;
+  //! Peak amount of memory allocated in huge allocations, i.e larger than
+  //! LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
+  size_t huge_alloc_peak;
+  //! Total amount of memory mapped since initialization (only if
+  //! ENABLE_STATISTICS=1)
+  size_t mapped_total;
+  //! Total amount of memory unmapped since initialization  (only if
+  //! ENABLE_STATISTICS=1)
+  size_t unmapped_total;
+} rpmalloc_global_statistics_t;
+
+typedef struct rpmalloc_thread_statistics_t {
+  //! Current number of bytes available in thread size class caches for small
+  //! and medium sizes (<32KiB)
+  size_t sizecache;
+  //! Current number of bytes available in thread span caches for small and
+  //! medium sizes (<32KiB)
+  size_t spancache;
+  //! Total number of bytes transitioned from thread cache to global cache (only
+  //! if ENABLE_STATISTICS=1)
+  size_t thread_to_global;
+  //! Total number of bytes transitioned from global cache to thread cache (only
+  //! if ENABLE_STATISTICS=1)
+  size_t global_to_thread;
+  //! Per span count statistics (only if ENABLE_STATISTICS=1)
+  struct {
+    //! Currently used number of spans
+    size_t current;
+    //! High water mark of spans used
+    size_t peak;
+    //! Number of spans transitioned to global cache
+    size_t to_global;
+    //! Number of spans transitioned from global cache
+    size_t from_global;
+    //! Number of spans transitioned to thread cache
+    size_t to_cache;
+    //! Number of spans transitioned from thread cache
+    size_t from_cache;
+    //! Number of spans transitioned to reserved state
+    size_t to_reserved;
+    //! Number of spans transitioned from reserved state
+    size_t from_reserved;
+    //! Number of raw memory map calls (not hitting the reserve spans but
+    //! resulting in actual OS mmap calls)
+    size_t map_calls;
+  } span_use[64];
+  //! Per size class statistics (only if ENABLE_STATISTICS=1)
+  struct {
+    //! Current number of allocations
+    size_t alloc_current;
+    //! Peak number of allocations
+    size_t alloc_peak;
+    //! Total number of allocations
+    size_t alloc_total;
+    //! Total number of frees
+    size_t free_total;
+    //! Number of spans transitioned to cache
+    size_t spans_to_cache;
+    //! Number of spans transitioned from cache
+    size_t spans_from_cache;
+    //! Number of spans transitioned from reserved state
+    size_t spans_from_reserved;
+    //! Number of raw memory map calls (not hitting the reserve spans but
+    //! resulting in actual OS mmap calls)
+    size_t map_calls;
+  } size_use[128];
+} rpmalloc_thread_statistics_t;
+
+typedef struct rpmalloc_config_t {
+  //! Map memory pages for the given number of bytes. The returned address MUST
+  //! be
+  //  aligned to the rpmalloc span size, which will always be a power of two.
+  //  Optionally the function can store an alignment offset in the offset
+  //  variable in case it performs alignment and the returned pointer is offset
+  //  from the actual start of the memory region due to this alignment. The
+  //  alignment offset will be passed to the memory unmap function. The
+  //  alignment offset MUST NOT be larger than 65535 (storable in an uint16_t),
+  //  if it is you must use natural alignment to shift it into 16 bits. If you
+  //  set a memory_map function, you must also set a memory_unmap function or
+  //  else the default implementation will be used for both. This function must
+  //  be thread safe, it can be called by multiple threads simultaneously.
+  void *(*memory_map)(size_t size, size_t *offset);
+  //! Unmap the memory pages starting at address and spanning the given number
+  //! of bytes.
+  //  If release is set to non-zero, the unmap is for an entire span range as
+  //  returned by a previous call to memory_map and that the entire range should
+  //  be released. The release argument holds the size of the entire span range.
+  //  If release is set to 0, the unmap is a partial decommit of a subset of the
+  //  mapped memory range. If you set a memory_unmap function, you must also set
+  //  a memory_map function or else the default implementation will be used for
+  //  both. This function must be thread safe, it can be called by multiple
+  //  threads simultaneously.
+  void (*memory_unmap)(void *address, size_t size, size_t offset,
+                       size_t release);
+  //! Called when an assert fails, if asserts are enabled. Will use the standard
+  //! assert()
+  //  if this is not set.
+  void (*error_callback)(const char *message);
+  //! Called when a call to map memory pages fails (out of memory). If this
+  //! callback is
+  //  not set or returns zero the library will return a null pointer in the
+  //  allocation call. If this callback returns non-zero the map call will be
+  //  retried. The argument passed is the number of bytes that was requested in
+  //  the map call. Only used if the default system memory map function is used
+  //  (memory_map callback is not set).
+  int (*map_fail_callback)(size_t size);
+  //! Size of memory pages. The page size MUST be a power of two. All memory
+  //! mapping
+  //  requests to memory_map will be made with size set to a multiple of the
+  //  page size. Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system
+  //  page size is used.
+  size_t page_size;
+  //! Size of a span of memory blocks. MUST be a power of two, and in
+  //! [4096,262144]
+  //  range (unless 0 - set to 0 to use the default span size). Used if
+  //  RPMALLOC_CONFIGURABLE is defined to 1.
+  size_t span_size;
+  //! Number of spans to map at each request to map new virtual memory blocks.
+  //! This can
+  //  be used to minimize the system call overhead at the cost of virtual memory
+  //  address space. The extra mapped pages will not be written until actually
+  //  used, so physical committed memory should not be affected in the default
+  //  implementation. Will be aligned to a multiple of spans that match memory
+  //  page size in case of huge pages.
+  size_t span_map_count;
+  //! Enable use of large/huge pages. If this flag is set to non-zero and page
+  //! size is
+  //  zero, the allocator will try to enable huge pages and auto detect the
+  //  configuration. If this is set to non-zero and page_size is also non-zero,
+  //  the allocator will assume huge pages have been configured and enabled
+  //  prior to initializing the allocator. For Windows, see
+  //  https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
+  //  For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
+  int enable_huge_pages;
+  //! Respectively allocated pages and huge allocated pages names for systems
+  //  supporting it to be able to distinguish among anonymous regions.
+  const char *page_name;
+  const char *huge_page_name;
+} rpmalloc_config_t;
+
+//! Initialize allocator with default configuration
+RPMALLOC_EXPORT int rpmalloc_initialize(void);
+
+//! Initialize allocator with given configuration
+RPMALLOC_EXPORT int rpmalloc_initialize_config(const rpmalloc_config_t *config);
+
+//! Get allocator configuration
+RPMALLOC_EXPORT const rpmalloc_config_t *rpmalloc_config(void);
+
+//! Finalize allocator
+RPMALLOC_EXPORT void rpmalloc_finalize(void);
+
+//! Initialize allocator for calling thread
+RPMALLOC_EXPORT void rpmalloc_thread_initialize(void);
+
+//! Finalize allocator for calling thread
+RPMALLOC_EXPORT void rpmalloc_thread_finalize(int release_caches);
+
+//! Perform deferred deallocations pending for the calling thread heap
+RPMALLOC_EXPORT void rpmalloc_thread_collect(void);
+
+//! Query if allocator is initialized for calling thread
+RPMALLOC_EXPORT int rpmalloc_is_thread_initialized(void);
+
+//! Get per-thread statistics
+RPMALLOC_EXPORT void
+rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats);
+
+//! Get global statistics
+RPMALLOC_EXPORT void
+rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats);
+
+//! Dump all statistics in human readable format to file (should be a FILE*)
+RPMALLOC_EXPORT void rpmalloc_dump_statistics(void *file);
+
+//! Allocate a memory block of at least the given size
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
+
+//! Free the given memory block
+RPMALLOC_EXPORT void rpfree(void *ptr);
+
+//! Allocate a memory block of at least the given size and zero initialize it
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
+
+//! Reallocate the given block to at least the given size
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rprealloc(void *ptr, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Reallocate the given block to at least the given size and alignment,
+//  with optional control flags (see RPMALLOC_NO_PRESERVE).
+//  Alignment must be a power of two and a multiple of sizeof(void*),
+//  and should ideally be less than memory page size. A caveat of rpmalloc
+//  internals is that this must also be strictly less than the span size
+//  (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize,
+                  unsigned int flags) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Allocate a memory block of at least the given size and alignment.
+//  Alignment must be a power of two and a multiple of sizeof(void*),
+//  and should ideally be less than memory page size. A caveat of rpmalloc
+//  internals is that this must also be strictly less than the span size
+//  (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size and alignment, and zero
+//! initialize it.
+//  Alignment must be a power of two and a multiple of sizeof(void*),
+//  and should ideally be less than memory page size. A caveat of rpmalloc
+//  internals is that this must also be strictly less than the span size
+//  (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpaligned_calloc(size_t alignment, size_t num,
+                 size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Allocate a memory block of at least the given size and alignment.
+//  Alignment must be a power of two and a multiple of sizeof(void*),
+//  and should ideally be less than memory page size. A caveat of rpmalloc
+//  internals is that this must also be strictly less than the span size
+//  (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size and alignment.
+//  Alignment must be a power of two and a multiple of sizeof(void*),
+//  and should ideally be less than memory page size. A caveat of rpmalloc
+//  internals is that this must also be strictly less than the span size
+//  (default 64KiB)
+RPMALLOC_EXPORT int rpposix_memalign(void **memptr, size_t alignment,
+                                     size_t size);
+
+//! Query the usable size of the given memory block (from given pointer to the
+//! end of block)
+RPMALLOC_EXPORT size_t rpmalloc_usable_size(void *ptr);
+
+//! Dummy empty function for forcing linker symbol inclusion
+RPMALLOC_EXPORT void rpmalloc_linker_reference(void);
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+
+//! Heap type
+typedef struct heap_t rpmalloc_heap_t;
+
+//! Acquire a new heap. Will reuse existing released heaps or allocate memory
+//! for a new heap
+//  if none available. Heap API is implemented with the strict assumption that
+//  only one single thread will call heap functions for a given heap at any
+//  given time, no functions are thread safe.
+RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_heap_acquire(void);
+
+//! Release a heap (does NOT free the memory allocated by the heap, use
+//! rpmalloc_heap_free_all before destroying the heap).
+//  Releasing a heap will enable it to be reused by other threads. Safe to pass
+//  a null pointer.
+RPMALLOC_EXPORT void rpmalloc_heap_release(rpmalloc_heap_t *heap);
+
+//! Allocate a memory block of at least the given size using the given heap.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size using the given heap. The
+//! returned
+//  block will have the requested alignment. Alignment must be a power of two
+//  and a multiple of sizeof(void*), and should ideally be less than memory page
+//  size. A caveat of rpmalloc internals is that this must also be strictly less
+//  than the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
+                            size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Allocate a memory block of at least the given size using the given heap and
+//! zero initialize it.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num,
+                     size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Allocate a memory block of at least the given size using the given heap and
+//! zero initialize it. The returned
+//  block will have the requested alignment. Alignment must either be zero, or a
+//  power of two and a multiple of sizeof(void*), and should ideally be less
+//  than memory page size. A caveat of rpmalloc internals is that this must also
+//  be strictly less than the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
+                             size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Reallocate the given block to at least the given size. The memory block MUST
+//! be allocated
+//  by the same heap given to this function.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
+                      unsigned int flags) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Reallocate the given block to at least the given size. The memory block MUST
+//! be allocated
+//  by the same heap given to this function. The returned block will have the
+//  requested alignment. Alignment must be either zero, or a power of two and a
+//  multiple of sizeof(void*), and should ideally be less than memory page size.
+//  A caveat of rpmalloc internals is that this must also be strictly less than
+//  the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *rpmalloc_heap_aligned_realloc(
+    rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size,
+    unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(4);
+
+//! Free the given memory block from the given heap. The memory block MUST be
+//! allocated
+//  by the same heap given to this function.
+RPMALLOC_EXPORT void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr);
+
+//! Free all memory allocated by the heap
+RPMALLOC_EXPORT void rpmalloc_heap_free_all(rpmalloc_heap_t *heap);
+
+//! Set the given heap as the current heap for the calling thread. A heap MUST
+//! only be current heap
+//  for a single thread, a heap can never be shared between multiple threads.
+//  The previous current heap for the calling thread is released to be reused by
+//  other threads.
+RPMALLOC_EXPORT void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap);
+
+//! Returns which heap the given pointer is allocated on
+RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/llvm/lib/Support/rpmalloc/rpnew.h b/llvm/lib/Support/rpmalloc/rpnew.h
index a18f0799d56d..d8303c6f9565 100644
--- a/llvm/lib/Support/rpmalloc/rpnew.h
+++ b/llvm/lib/Support/rpmalloc/rpnew.h
@@ -1,113 +1,113 @@
-//===-------------------------- rpnew.h -----------------*- C -*-=============//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This library provides a cross-platform lock free thread caching malloc
-// implementation in C11.
-//
-//===----------------------------------------------------------------------===//
-
-#ifdef __cplusplus
-
-#include <new>
-#include <rpmalloc.h>
-
-#ifndef __CRTDECL
-#define __CRTDECL
-#endif
-
-extern void __CRTDECL operator delete(void *p) noexcept { rpfree(p); }
-
-extern void __CRTDECL operator delete[](void *p) noexcept { rpfree(p); }
-
-extern void *__CRTDECL operator new(std::size_t size) noexcept(false) {
-  return rpmalloc(size);
-}
-
-extern void *__CRTDECL operator new[](std::size_t size) noexcept(false) {
-  return rpmalloc(size);
-}
-
-extern void *__CRTDECL operator new(std::size_t size,
-                                    const std::nothrow_t &tag) noexcept {
-  (void)sizeof(tag);
-  return rpmalloc(size);
-}
-
-extern void *__CRTDECL operator new[](std::size_t size,
-                                      const std::nothrow_t &tag) noexcept {
-  (void)sizeof(tag);
-  return rpmalloc(size);
-}
-
-#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
-
-extern void __CRTDECL operator delete(void *p, std::size_t size) noexcept {
-  (void)sizeof(size);
-  rpfree(p);
-}
-
-extern void __CRTDECL operator delete[](void *p, std::size_t size) noexcept {
-  (void)sizeof(size);
-  rpfree(p);
-}
-
-#endif
-
-#if (__cplusplus > 201402L || defined(__cpp_aligned_new))
-
-extern void __CRTDECL operator delete(void *p,
-                                      std::align_val_t align) noexcept {
-  (void)sizeof(align);
-  rpfree(p);
-}
-
-extern void __CRTDECL operator delete[](void *p,
-                                        std::align_val_t align) noexcept {
-  (void)sizeof(align);
-  rpfree(p);
-}
-
-extern void __CRTDECL operator delete(void *p, std::size_t size,
-                                      std::align_val_t align) noexcept {
-  (void)sizeof(size);
-  (void)sizeof(align);
-  rpfree(p);
-}
-
-extern void __CRTDECL operator delete[](void *p, std::size_t size,
-                                        std::align_val_t align) noexcept {
-  (void)sizeof(size);
-  (void)sizeof(align);
-  rpfree(p);
-}
-
-extern void *__CRTDECL operator new(std::size_t size,
-                                    std::align_val_t align) noexcept(false) {
-  return rpaligned_alloc(static_cast<size_t>(align), size);
-}
-
-extern void *__CRTDECL operator new[](std::size_t size,
-                                      std::align_val_t align) noexcept(false) {
-  return rpaligned_alloc(static_cast<size_t>(align), size);
-}
-
-extern void *__CRTDECL operator new(std::size_t size, std::align_val_t align,
-                                    const std::nothrow_t &tag) noexcept {
-  (void)sizeof(tag);
-  return rpaligned_alloc(static_cast<size_t>(align), size);
-}
-
-extern void *__CRTDECL operator new[](std::size_t size, std::align_val_t align,
-                                      const std::nothrow_t &tag) noexcept {
-  (void)sizeof(tag);
-  return rpaligned_alloc(static_cast<size_t>(align), size);
-}
-
-#endif
-
-#endif
+//===-------------------------- rpnew.h -----------------*- C -*-=============//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This library provides a cross-platform lock free thread caching malloc
+// implementation in C11.
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef __cplusplus
+
+#include <new>
+#include <rpmalloc.h>
+
+#ifndef __CRTDECL
+#define __CRTDECL
+#endif
+
+extern void __CRTDECL operator delete(void *p) noexcept { rpfree(p); }
+
+extern void __CRTDECL operator delete[](void *p) noexcept { rpfree(p); }
+
+extern void *__CRTDECL operator new(std::size_t size) noexcept(false) {
+  return rpmalloc(size);
+}
+
+extern void *__CRTDECL operator new[](std::size_t size) noexcept(false) {
+  return rpmalloc(size);
+}
+
+extern void *__CRTDECL operator new(std::size_t size,
+                                    const std::nothrow_t &tag) noexcept {
+  (void)sizeof(tag);
+  return rpmalloc(size);
+}
+
+extern void *__CRTDECL operator new[](std::size_t size,
+                                      const std::nothrow_t &tag) noexcept {
+  (void)sizeof(tag);
+  return rpmalloc(size);
+}
+
+#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
+
+extern void __CRTDECL operator delete(void *p, std::size_t size) noexcept {
+  (void)sizeof(size);
+  rpfree(p);
+}
+
+extern void __CRTDECL operator delete[](void *p, std::size_t size) noexcept {
+  (void)sizeof(size);
+  rpfree(p);
+}
+
+#endif
+
+#if (__cplusplus > 201402L || defined(__cpp_aligned_new))
+
+extern void __CRTDECL operator delete(void *p,
+                                      std::align_val_t align) noexcept {
+  (void)sizeof(align);
+  rpfree(p);
+}
+
+extern void __CRTDECL operator delete[](void *p,
+                                        std::align_val_t align) noexcept {
+  (void)sizeof(align);
+  rpfree(p);
+}
+
+extern void __CRTDECL operator delete(void *p, std::size_t size,
+                                      std::align_val_t align) noexcept {
+  (void)sizeof(size);
+  (void)sizeof(align);
+  rpfree(p);
+}
+
+extern void __CRTDECL operator delete[](void *p, std::size_t size,
+                                        std::align_val_t align) noexcept {
+  (void)sizeof(size);
+  (void)sizeof(align);
+  rpfree(p);
+}
+
+extern void *__CRTDECL operator new(std::size_t size,
+                                    std::align_val_t align) noexcept(false) {
+  return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void *__CRTDECL operator new[](std::size_t size,
+                                      std::align_val_t align) noexcept(false) {
+  return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void *__CRTDECL operator new(std::size_t size, std::align_val_t align,
+                                    const std::nothrow_t &tag) noexcept {
+  (void)sizeof(tag);
+  return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void *__CRTDECL operator new[](std::size_t size, std::align_val_t align,
+                                      const std::nothrow_t &tag) noexcept {
+  (void)sizeof(tag);
+  return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+#endif
+
+#endif
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 9844fd394aa4..8ea31401121b 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -1,38 +1,38 @@
-//===- DirectXTargetTransformInfo.cpp - DirectX TTI ---------------*- C++
-//-*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-//===----------------------------------------------------------------------===//
-
-#include "DirectXTargetTransformInfo.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IntrinsicsDirectX.h"
-
-using namespace llvm;
-
-bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
-                                                        unsigned ScalarOpdIdx) {
-  switch (ID) {
-  case Intrinsic::dx_wave_readlane:
-    return ScalarOpdIdx == 1;
-  default:
-    return false;
-  }
-}
-
-bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
-    Intrinsic::ID ID) const {
-  switch (ID) {
-  case Intrinsic::dx_frac:
-  case Intrinsic::dx_rsqrt:
-  case Intrinsic::dx_wave_readlane:
-    return true;
-  default:
-    return false;
-  }
-}
+//===- DirectXTargetTransformInfo.cpp - DirectX TTI ---------------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+
+#include "DirectXTargetTransformInfo.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
+
+using namespace llvm;
+
+bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
+                                                        unsigned ScalarOpdIdx) {
+  switch (ID) {
+  case Intrinsic::dx_wave_readlane:
+    return ScalarOpdIdx == 1;
+  default:
+    return false;
+  }
+}
+
+bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
+    Intrinsic::ID ID) const {
+  switch (ID) {
+  case Intrinsic::dx_frac:
+  case Intrinsic::dx_rsqrt:
+  case Intrinsic::dx_wave_readlane:
+    return true;
+  default:
+    return false;
+  }
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 60ac58f824ed..fbd2f47d2769 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20385,11 +20385,11 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       if (VT.isVector())
         break;
       if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
-        return std::make_pair(0U, &RISCV::GPRF16RegClass);
+        return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
       if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
-        return std::make_pair(0U, &RISCV::GPRF32RegClass);
+        return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
       if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
-        return std::make_pair(0U, &RISCV::GPRPairRegClass);
+        return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
       return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
     case 'f':
       if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 250f3c10f309..685f04213afa 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -661,6 +661,7 @@ def GPRF16 : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 17),
                                                 (sequence "X%u_H", 0, 4))>;
 def GPRF16C : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 15),
                                                  (sequence "X%u_H", 8, 9))>;
+def GPRF16NoX0 : RISCVRegisterClass<[f16], 16, (sub GPRF16, X0_H)>;
 
 def GPRF32 : RISCVRegisterClass<[f32], 32, (add (sequence "X%u_W", 10, 17),
                                                 (sequence "X%u_W", 5, 7),
@@ -721,6 +722,8 @@ def GPRPair : RISCVRegisterClass<[XLenPairFVT], 64, (add
 def GPRPairC : RISCVRegisterClass<[XLenPairFVT], 64, (add
   X10_X11, X12_X13, X14_X15, X8_X9
 )>;
+
+def GPRPairNoX0 : RISCVRegisterClass<[XLenPairFVT], 64, (sub GPRPair, X0_Pair)>;
 } // let RegInfos = XLenPairRI, DecoderMethod = "DecodeGPRPairRegisterClass"
 
 // The register class is added for inline assembly for vector mask types.
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index f4e98e576379..fc4355af5af6 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMVectorize
   SandboxVectorizer/Passes/RegionsFromMetadata.cpp
   SandboxVectorizer/SandboxVectorizer.cpp
   SandboxVectorizer/SandboxVectorizerPassBuilder.cpp
+  SandboxVectorizer/Scheduler.cpp
   SandboxVectorizer/SeedCollector.cpp
   SLPVectorizer.cpp
   Vectorize.cpp
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3e8bc1451f62..857efbdf687c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1427,12 +1427,9 @@ public:
     // Override forced styles if needed.
     // FIXME: use actual opcode/data type for analysis here.
     // FIXME: Investigate opportunity for fixed vector factor.
-    bool EVLIsLegal =
-        IsScalableVF && UserIC <= 1 &&
-        TTI.hasActiveVectorLength(0, nullptr, Align()) &&
-        !EnableVPlanNativePath &&
-        // FIXME: implement support for max safe dependency distance.
-        Legal->isSafeForAnyVectorWidth();
+    bool EVLIsLegal = UserIC <= 1 &&
+                      TTI.hasActiveVectorLength(0, nullptr, Align()) &&
+                      !EnableVPlanNativePath;
     if (!EVLIsLegal) {
       // If for some reason EVL mode is unsupported, fallback to
       // DataWithoutLaneMask to try to vectorize the loop with folded tail
@@ -1457,6 +1454,15 @@ public:
     return getTailFoldingStyle() != TailFoldingStyle::None;
   }
 
+  /// Return maximum safe number of elements to be processed per vector
+  /// iteration, which do not prevent store-load forwarding and are safe with
+  /// regard to the memory dependencies. Required for EVL-based VPlans to
+  /// correctly calculate AVL (application vector length) as min(remaining AVL,
+  /// MaxSafeElements).
+  /// TODO: need to consider adjusting cost model to use this value as a
+  /// vectorization factor for EVL-based vectorization.
+  std::optional<unsigned> getMaxSafeElements() const { return MaxSafeElements; }
+
   /// Returns true if the instructions in this block requires predication
   /// for any reason, e.g. because tail folding now requires a predicate
   /// or because the block in the original loop was predicated.
@@ -1608,6 +1614,12 @@ private:
   /// true if scalable vectorization is supported and enabled.
   std::optional<bool> IsScalableVectorizationAllowed;
 
+  /// Maximum safe number of elements to be processed per vector iteration,
+  /// which do not prevent store-load forwarding and are safe with regard to the
+  /// memory dependencies. Required for EVL-based veectorization, where this
+  /// value is used as the upper bound of the safe AVL.
+  std::optional<unsigned> MaxSafeElements;
+
   /// A map holding scalar costs for different vectorization factors. The
   /// presence of a cost for an instruction in the mapping indicates that the
   /// instruction will be scalarized when vectorizing with the associated
@@ -2435,12 +2447,26 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
   };
 
   TailFoldingStyle Style = Cost->getTailFoldingStyle();
-  if (Style == TailFoldingStyle::None)
-    CheckMinIters =
-        Builder.CreateICmp(P, Count, CreateStep(), "min.iters.check");
-  else if (VF.isScalable() &&
-           !isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
-           Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
+  if (Style == TailFoldingStyle::None) {
+    Value *Step = CreateStep();
+    ScalarEvolution &SE = *PSE.getSE();
+    // TODO: Emit unconditional branch to vector preheader instead of
+    // conditional branch with known condition.
+    const SCEV *TripCountSCEV = SE.applyLoopGuards(SE.getSCEV(Count), OrigLoop);
+    // Check if the trip count is < the step.
+    if (SE.isKnownPredicate(P, TripCountSCEV, SE.getSCEV(Step))) {
+      // TODO: Ensure step is at most the trip count when determining max VF and
+      // UF, w/o tail folding.
+      CheckMinIters = Builder.getTrue();
+    } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(P),
+                                    TripCountSCEV, SE.getSCEV(Step))) {
+      // Generate the minimum iteration check only if we cannot prove the
+      // check is known to be true, or known to be false.
+      CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
+    } // else step known to be < trip count, use CheckMinIters preset to false.
+  } else if (VF.isScalable() &&
+             !isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
+             Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
     // vscale is not necessarily a power-of-2, which means we cannot guarantee
     // an overflow to zero when updating induction variables and so an
     // additional overflow check is required before entering the vector loop.
@@ -2450,8 +2476,18 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
         ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask());
     Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
 
+    Value *Step = CreateStep();
+#ifndef NDEBUG
+    ScalarEvolution &SE = *PSE.getSE();
+    const SCEV *TC2OverflowSCEV = SE.applyLoopGuards(SE.getSCEV(LHS), OrigLoop);
+    assert(
+        !isIndvarOverflowCheckKnownFalse(Cost, VF * UF) &&
+        !SE.isKnownPredicate(CmpInst::getInversePredicate(ICmpInst::ICMP_ULT),
+                             TC2OverflowSCEV, SE.getSCEV(Step)) &&
+        "unexpectedly proved overflow check to be known");
+#endif
     // Don't execute the vector loop if (UMax - n) < (VF * UF).
-    CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, CreateStep());
+    CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
   }
 
   // Create new preheader for vector loop.
@@ -3858,6 +3894,8 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
 
   auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
   auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
+  if (!Legal->isSafeForAnyVectorWidth())
+    this->MaxSafeElements = MaxSafeElements;
 
   LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
                     << ".\n");
@@ -8686,8 +8724,8 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
       VPlanTransforms::optimize(*Plan);
       // TODO: try to put it close to addActiveLaneMask().
       // Discard the plan if it is not EVL-compatible
-      if (CM.foldTailWithEVL() &&
-          !VPlanTransforms::tryAddExplicitVectorLength(*Plan))
+      if (CM.foldTailWithEVL() && !VPlanTransforms::tryAddExplicitVectorLength(
+                                      *Plan, CM.getMaxSafeElements()))
         break;
       assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
       VPlans.push_back(std::move(Plan));
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ba70ab1e5e14..e1aa6127ac03 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -291,6 +291,8 @@ getFloorFullVectorNumberOfElements(const TargetTransformInfo &TTI, Type *Ty,
   if (NumParts == 0 || NumParts >= Sz)
     return bit_floor(Sz);
   unsigned RegVF = bit_ceil(divideCeil(Sz, NumParts));
+  if (RegVF > Sz)
+    return bit_floor(Sz);
   return (Sz / RegVF) * RegVF;
 }
 
@@ -1505,6 +1507,12 @@ public:
   /// vectorizable. We do not vectorize such trees.
   bool isTreeTinyAndNotFullyVectorizable(bool ForReduction = false) const;
 
+  /// Checks if the graph and all its subgraphs cannot be better vectorized.
+  /// It may happen, if all gather nodes are loads and they cannot be
+  /// "clusterized". In this case even subgraphs cannot be vectorized more
+  /// effectively than the base graph.
+  bool isTreeNotExtendable() const;
+
   /// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values
   /// can be load combined in the backend. Load combining may not be allowed in
   /// the IR optimizer, so we do not want to alter the pattern. For example,
@@ -3047,7 +3055,9 @@ private:
   /// vector loads/masked gathers instead of regular gathers. Later these loads
   /// are reshufled to build final gathered nodes.
   void tryToVectorizeGatheredLoads(
-      ArrayRef<SmallVector<std::pair<LoadInst *, int>>> GatheredLoads);
+      const SmallMapVector<std::tuple<BasicBlock *, Value *, Type *>,
+                           SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
+                           8> &GatheredLoads);
 
   /// Reorder commutative or alt operands to get better probability of
   /// generating vectorized code.
@@ -3059,7 +3069,7 @@ private:
   /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
   /// users of \p TE and collects the stores. It returns the map from the store
   /// pointers to the collected stores.
-  DenseMap<Value *, SmallVector<StoreInst *>>
+  SmallVector<SmallVector<StoreInst *>>
   collectUserStores(const BoUpSLP::TreeEntry *TE) const;
 
   /// Helper for `findExternalStoreUsersReorderIndices()`. It checks if the
@@ -4657,7 +4667,8 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
 static bool arePointersCompatible(Value *Ptr1, Value *Ptr2,
                                   const TargetLibraryInfo &TLI,
                                   bool CompareOpcodes = true) {
-  if (getUnderlyingObject(Ptr1) != getUnderlyingObject(Ptr2))
+  if (getUnderlyingObject(Ptr1, RecursionMaxDepth) !=
+      getUnderlyingObject(Ptr2, RecursionMaxDepth))
     return false;
   auto *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
   auto *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
@@ -5177,30 +5188,40 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
   return LoadsState::Gather;
 }
 
-static bool clusterSortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
+static bool clusterSortPtrAccesses(ArrayRef<Value *> VL,
+                                   ArrayRef<BasicBlock *> BBs, Type *ElemTy,
                                    const DataLayout &DL, ScalarEvolution &SE,
                                    SmallVectorImpl<unsigned> &SortedIndices) {
-  assert(llvm::all_of(
-             VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
-         "Expected list of pointer operands.");
+  assert(
+      all_of(VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
+      "Expected list of pointer operands.");
   // Map from bases to a vector of (Ptr, Offset, OrigIdx), which we insert each
   // Ptr into, sort and return the sorted indices with values next to one
   // another.
-  MapVector<Value *, SmallVector<std::tuple<Value *, int, unsigned>>> Bases;
-  Bases[VL[0]].push_back(std::make_tuple(VL[0], 0U, 0U));
-
-  unsigned Cnt = 1;
-  for (Value *Ptr : VL.drop_front()) {
-    bool Found = any_of(Bases, [&](auto &Base) {
-      std::optional<int> Diff =
-          getPointersDiff(ElemTy, Base.first, ElemTy, Ptr, DL, SE,
-                          /*StrictCheck=*/true);
-      if (!Diff)
-        return false;
+  SmallMapVector<std::pair<BasicBlock *, Value *>,
+                 SmallVector<SmallVector<std::tuple<Value *, int, unsigned>>>, 8>
+      Bases;
+  Bases
+      .try_emplace(std::make_pair(
+          BBs.front(), getUnderlyingObject(VL.front(), RecursionMaxDepth)))
+      .first->second.emplace_back().emplace_back(VL.front(), 0U, 0U);
 
-      Base.second.emplace_back(Ptr, *Diff, Cnt++);
-      return true;
-    });
+  SortedIndices.clear();
+  for (auto [Cnt, Ptr] : enumerate(VL.drop_front())) {
+    auto Key = std::make_pair(BBs[Cnt + 1],
+                              getUnderlyingObject(Ptr, RecursionMaxDepth));
+    bool Found = any_of(Bases.try_emplace(Key).first->second,
+                        [&, &Cnt = Cnt, &Ptr = Ptr](auto &Base) {
+                          std::optional<int> Diff = getPointersDiff(
+                              ElemTy, std::get<0>(Base.front()), ElemTy,
+                              Ptr, DL, SE,
+                              /*StrictCheck=*/true);
+                          if (!Diff)
+                            return false;
+
+                          Base.emplace_back(Ptr, *Diff, Cnt + 1);
+                          return true;
+                        });
 
     if (!Found) {
       // If we haven't found enough to usefully cluster, return early.
@@ -5208,71 +5229,39 @@ static bool clusterSortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
         return false;
 
       // Not found already - add a new Base
-      Bases[Ptr].emplace_back(Ptr, 0, Cnt++);
+      Bases.find(Key)->second.emplace_back().emplace_back(Ptr, 0, Cnt + 1);
     }
   }
 
   // For each of the bases sort the pointers by Offset and check if any of the
   // base become consecutively allocated.
-  bool AnyConsecutive = false;
   for (auto &Base : Bases) {
-    auto &Vec = Base.second;
-    if (Vec.size() > 1) {
-      llvm::stable_sort(Vec, [](const std::tuple<Value *, int, unsigned> &X,
-                                const std::tuple<Value *, int, unsigned> &Y) {
-        return std::get<1>(X) < std::get<1>(Y);
-      });
-      int InitialOffset = std::get<1>(Vec[0]);
-      AnyConsecutive |= all_of(enumerate(Vec), [InitialOffset](const auto &P) {
-        return std::get<1>(P.value()) == int(P.index()) + InitialOffset;
-      });
+    for (auto &Vec : Base.second) {
+      if (Vec.size() > 1) {
+        stable_sort(Vec, [](const std::tuple<Value *, int, unsigned> &X,
+                            const std::tuple<Value *, int, unsigned> &Y) {
+          return std::get<1>(X) < std::get<1>(Y);
+        });
+        int InitialOffset = std::get<1>(Vec[0]);
+        bool AnyConsecutive =
+            all_of(enumerate(Vec), [InitialOffset](const auto &P) {
+              return std::get<1>(P.value()) == int(P.index()) + InitialOffset;
+            });
+        // Fill SortedIndices array only if it looks worth-while to sort the
+        // ptrs.
+        if (!AnyConsecutive)
+          return false;
+      }
     }
-  }
-
-  // Fill SortedIndices array only if it looks worth-while to sort the ptrs.
-  SortedIndices.clear();
-  if (!AnyConsecutive)
-    return false;
-
-  // If we have a better order, also sort the base pointers by increasing
-  // (variable) values if possible, to try and keep the order more regular. In
-  // order to create a valid strict-weak order we cluster by the Root of gep
-  // chains and sort within each.
-  SmallVector<std::tuple<Value *, Value *, Value *>> SortedBases;
-  for (auto &Base : Bases) {
-    Value *Strip = Base.first->stripInBoundsConstantOffsets();
-    Value *Root = Strip;
-    while (auto *Gep = dyn_cast<GetElementPtrInst>(Root))
-      Root = Gep->getOperand(0);
-    SortedBases.emplace_back(Base.first, Strip, Root);
-  }
-  auto *Begin = SortedBases.begin();
-  auto *End = SortedBases.end();
-  while (Begin != End) {
-    Value *Root = std::get<2>(*Begin);
-    auto *Mid = std::stable_partition(
-        Begin, End, [&Root](auto V) { return std::get<2>(V) == Root; });
-    DenseMap<Value *, DenseMap<Value *, bool>> LessThan;
-    for (auto *I = Begin; I < Mid; ++I)
-      LessThan.try_emplace(std::get<1>(*I));
-    for (auto *I = Begin; I < Mid; ++I) {
-      Value *V = std::get<1>(*I);
-      while (auto *Gep = dyn_cast<GetElementPtrInst>(V)) {
-        V = Gep->getOperand(0);
-        if (LessThan.contains(V))
-          LessThan[V][std::get<1>(*I)] = true;
-      }
-    }
-    std::stable_sort(Begin, Mid, [&LessThan](auto &V1, auto &V2) {
-      return LessThan[std::get<1>(V1)][std::get<1>(V2)];
+    sort(Base.second, [](const auto &V1, const auto &V2) {
+      return std::get<2>(V1.front()) < std::get<2>(V2.front());
     });
-    Begin = Mid;
   }
 
-  // Collect the final order of sorted indices
-  for (auto Base : SortedBases)
-    for (auto &T : Bases[std::get<0>(Base)])
-      SortedIndices.push_back(std::get<2>(T));
+  for (auto &T : Bases)
+    for (const auto &Vec : T.second)
+      for (const auto &P : Vec)
+        SortedIndices.push_back(std::get<2>(P));
 
   assert(SortedIndices.size() == VL.size() &&
          "Expected SortedIndices to be the size of VL");
@@ -5286,15 +5275,18 @@ BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) {
 
   SmallVector<Value *> Ptrs;
   Ptrs.reserve(TE.Scalars.size());
+  SmallVector<BasicBlock *> BBs;
+  BBs.reserve(TE.Scalars.size());
   for (Value *V : TE.Scalars) {
     auto *L = dyn_cast<LoadInst>(V);
     if (!L || !L->isSimple())
       return std::nullopt;
     Ptrs.push_back(L->getPointerOperand());
+    BBs.push_back(L->getParent());
   }
 
   BoUpSLP::OrdersType Order;
-  if (clusterSortPtrAccesses(Ptrs, ScalarTy, *DL, *SE, Order))
+  if (clusterSortPtrAccesses(Ptrs, BBs, ScalarTy, *DL, *SE, Order))
     return std::move(Order);
   return std::nullopt;
 }
@@ -5662,7 +5654,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
     }
     // FIXME: Remove the non-power-of-two check once findReusedOrderedScalars
     // has been auditted for correctness with non-power-of-two vectors.
-    if (!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI))
+    if (!VectorizeNonPowerOf2 || !TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI))
       if (std::optional<OrdersType> CurrentOrder = findReusedOrderedScalars(TE))
         return CurrentOrder;
   }
@@ -6393,13 +6385,15 @@ void BoUpSLP::buildExternalUses(
   }
 }
 
-DenseMap<Value *, SmallVector<StoreInst *>>
+SmallVector<SmallVector<StoreInst *>>
 BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const {
-  DenseMap<Value *, SmallVector<StoreInst *>> PtrToStoresMap;
+  SmallDenseMap<std::tuple<BasicBlock *, Type *, Value *>,
+                SmallVector<StoreInst *>, 8>
+      PtrToStoresMap;
   for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size())) {
     Value *V = TE->Scalars[Lane];
     // Don't iterate over the users of constant data.
-    if (isa<ConstantData>(V))
+    if (!isa<Instruction>(V))
       continue;
     // To save compilation time we don't visit if we have too many users.
     if (V->hasNUsesOrMore(UsesLimit))
@@ -6417,25 +6411,34 @@ BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const {
       if (getTreeEntry(U))
         continue;
 
-      Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
-      auto &StoresVec = PtrToStoresMap[Ptr];
+      Value *Ptr =
+          getUnderlyingObject(SI->getPointerOperand(), RecursionMaxDepth);
+      auto &StoresVec = PtrToStoresMap[{SI->getParent(),
+                                        SI->getValueOperand()->getType(), Ptr}];
       // For now just keep one store per pointer object per lane.
       // TODO: Extend this to support multiple stores per pointer per lane
       if (StoresVec.size() > Lane)
         continue;
-      // Skip if in different BBs.
-      if (!StoresVec.empty() &&
-          SI->getParent() != StoresVec.back()->getParent())
-        continue;
-      // Make sure that the stores are of the same type.
-      if (!StoresVec.empty() &&
-          SI->getValueOperand()->getType() !=
-              StoresVec.back()->getValueOperand()->getType())
-        continue;
+      if (!StoresVec.empty()) {
+        std::optional<int> Diff = getPointersDiff(
+            SI->getValueOperand()->getType(), SI->getPointerOperand(),
+            SI->getValueOperand()->getType(),
+            StoresVec.front()->getPointerOperand(), *DL, *SE,
+            /*StrictCheck=*/true);
+        // We failed to compare the pointers so just abandon this store.
+        if (!Diff)
+          continue;
+      }
       StoresVec.push_back(SI);
     }
   }
-  return PtrToStoresMap;
+  SmallVector<SmallVector<StoreInst *>> Res(PtrToStoresMap.size());
+  unsigned I = 0;
+  for (auto &P : PtrToStoresMap) {
+    Res[I].swap(P.second);
+    ++I;
+  }
+  return Res;
 }
 
 bool BoUpSLP::canFormVector(ArrayRef<StoreInst *> StoresVec,
@@ -6445,9 +6448,9 @@ bool BoUpSLP::canFormVector(ArrayRef<StoreInst *> StoresVec,
 
   // To avoid calling getPointersDiff() while sorting we create a vector of
   // pairs {store, offset from first} and sort this instead.
-  SmallVector<std::pair<StoreInst *, int>> StoreOffsetVec(StoresVec.size());
+  SmallVector<std::pair<int, unsigned>> StoreOffsetVec;
   StoreInst *S0 = StoresVec[0];
-  StoreOffsetVec[0] = {S0, 0};
+  StoreOffsetVec.emplace_back(0, 0);
   Type *S0Ty = S0->getValueOperand()->getType();
   Value *S0Ptr = S0->getPointerOperand();
   for (unsigned Idx : seq<unsigned>(1, StoresVec.size())) {
@@ -6456,41 +6459,36 @@ bool BoUpSLP::canFormVector(ArrayRef<StoreInst *> StoresVec,
         getPointersDiff(S0Ty, S0Ptr, SI->getValueOperand()->getType(),
                         SI->getPointerOperand(), *DL, *SE,
                         /*StrictCheck=*/true);
-    // We failed to compare the pointers so just abandon this StoresVec.
-    if (!Diff)
-      return false;
-    StoreOffsetVec[Idx] = {StoresVec[Idx], *Diff};
+    StoreOffsetVec.emplace_back(*Diff, Idx);
   }
 
-  // Sort the vector based on the pointers. We create a copy because we may
-  // need the original later for calculating the reorder (shuffle) indices.
-  stable_sort(StoreOffsetVec, [](const std::pair<StoreInst *, int> &Pair1,
-                                 const std::pair<StoreInst *, int> &Pair2) {
-    int Offset1 = Pair1.second;
-    int Offset2 = Pair2.second;
-    return Offset1 < Offset2;
-  });
-
   // Check if the stores are consecutive by checking if their difference is 1.
-  for (unsigned Idx : seq<unsigned>(1, StoreOffsetVec.size()))
-    if (StoreOffsetVec[Idx].second != StoreOffsetVec[Idx - 1].second + 1)
+  if (StoreOffsetVec.size() != StoresVec.size())
+    return false;
+  sort(StoreOffsetVec,
+       [](const std::pair<int, unsigned> &L,
+          const std::pair<int, unsigned> &R) { return L.first < R.first; });
+  unsigned Idx = 0;
+  int PrevDist = 0;
+  for (const auto &P : StoreOffsetVec) {
+    if (Idx > 0 && P.first != PrevDist + 1)
       return false;
+    PrevDist = P.first;
+    ++Idx;
+  }
 
   // Calculate the shuffle indices according to their offset against the sorted
   // StoreOffsetVec.
-  ReorderIndices.reserve(StoresVec.size());
-  for (StoreInst *SI : StoresVec) {
-    unsigned Idx = find_if(StoreOffsetVec,
-                           [SI](const std::pair<StoreInst *, int> &Pair) {
-                             return Pair.first == SI;
-                           }) -
-                   StoreOffsetVec.begin();
-    ReorderIndices.push_back(Idx);
+  ReorderIndices.assign(StoresVec.size(), 0);
+  bool IsIdentity = true;
+  for (auto [I, P] : enumerate(StoreOffsetVec)) {
+    ReorderIndices[P.second] = I;
+    IsIdentity &= P.second == I;
   }
   // Identity order (e.g., {0,1,2,3}) is modeled as an empty OrdersType in
   // reorderTopToBottom() and reorderBottomToTop(), so we are following the
   // same convention here.
-  if (isIdentityOrder(ReorderIndices))
+  if (IsIdentity)
     ReorderIndices.clear();
 
   return true;
@@ -6508,8 +6506,7 @@ SmallVector<BoUpSLP::OrdersType, 1>
 BoUpSLP::findExternalStoreUsersReorderIndices(TreeEntry *TE) const {
   unsigned NumLanes = TE->Scalars.size();
 
-  DenseMap<Value *, SmallVector<StoreInst *>> PtrToStoresMap =
-      collectUserStores(TE);
+  SmallVector<SmallVector<StoreInst *>> Stores = collectUserStores(TE);
 
   // Holds the reorder indices for each candidate store vector that is a user of
   // the current TreeEntry.
@@ -6518,8 +6515,7 @@ BoUpSLP::findExternalStoreUsersReorderIndices(TreeEntry *TE) const {
   // Now inspect the stores collected per pointer and look for vectorization
   // candidates. For each candidate calculate the reorder index vector and push
   // it into `ExternalReorderIndices`
-  for (const auto &Pair : PtrToStoresMap) {
-    auto &StoresVec = Pair.second;
+  for (ArrayRef<StoreInst *> StoresVec : Stores) {
     // If we have fewer than NumLanes stores, then we can't form a vector.
     if (StoresVec.size() != NumLanes)
       continue;
@@ -6574,9 +6570,13 @@ static void gatherPossiblyVectorizableLoads(
       continue;
     bool IsFound = false;
     for (auto [Map, Data] : zip(ClusteredDistToLoad, ClusteredLoads)) {
-      if (LI->getParent() != Data.front().first->getParent() ||
-          LI->getType() != Data.front().first->getType())
-        continue;
+      assert(LI->getParent() == Data.front().first->getParent() &&
+             LI->getType() == Data.front().first->getType() &&
+             getUnderlyingObject(LI->getPointerOperand(), RecursionMaxDepth) ==
+                 getUnderlyingObject(Data.front().first->getPointerOperand(),
+                                     RecursionMaxDepth) &&
+             "Expected loads with the same type, same parent and same "
+             "underlying pointer.");
       std::optional<int> Dist = getPointersDiff(
           LI->getType(), LI->getPointerOperand(), Data.front().first->getType(),
           Data.front().first->getPointerOperand(), DL, SE,
@@ -6704,7 +6704,9 @@ static void gatherPossiblyVectorizableLoads(
 }
 
 void BoUpSLP::tryToVectorizeGatheredLoads(
-    ArrayRef<SmallVector<std::pair<LoadInst *, int>>> GatheredLoads) {
+    const SmallMapVector<std::tuple<BasicBlock *, Value *, Type *>,
+                         SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
+                         8> &GatheredLoads) {
   GatheredLoadsEntriesFirst = VectorizableTree.size();
 
   SmallVector<SmallPtrSet<const Value *, 4>> LoadSetsToVectorize(
@@ -6737,7 +6739,10 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
     SmallVector<int> CandidateVFs;
     if (VectorizeNonPowerOf2 && has_single_bit(MaxVF + 1))
       CandidateVFs.push_back(MaxVF);
-    for (int NumElts = bit_floor(MaxVF); NumElts > 1; NumElts /= 2) {
+    for (int NumElts = getFloorFullVectorNumberOfElements(
+             *TTI, Loads.front()->getType(), MaxVF);
+         NumElts > 1; NumElts = getFloorFullVectorNumberOfElements(
+                          *TTI, Loads.front()->getType(), NumElts - 1)) {
       CandidateVFs.push_back(NumElts);
       if (VectorizeNonPowerOf2 && NumElts > 2)
         CandidateVFs.push_back(NumElts - 1);
@@ -6751,9 +6756,10 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
       if (Final && NumElts > BestVF)
         continue;
       SmallVector<unsigned> MaskedGatherVectorized;
-      for (unsigned Cnt = StartIdx, E = Loads.size(); Cnt + NumElts <= E;
+      for (unsigned Cnt = StartIdx, E = Loads.size(); Cnt < E;
            ++Cnt) {
-        ArrayRef<LoadInst *> Slice = ArrayRef(Loads).slice(Cnt, NumElts);
+        ArrayRef<LoadInst *> Slice =
+            ArrayRef(Loads).slice(Cnt, std::min(NumElts, E - Cnt));
         if (VectorizedLoads.count(Slice.front()) ||
             VectorizedLoads.count(Slice.back()) ||
             areKnownNonVectorizableLoads(Slice))
@@ -7099,24 +7105,27 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
         }
         return NonVectorized;
       };
-  SmallVector<LoadInst *> NonVectorized = ProcessGatheredLoads(GatheredLoads);
-  if (!GatheredLoads.empty() && !NonVectorized.empty() &&
-      std::accumulate(
-          GatheredLoads.begin(), GatheredLoads.end(), 0u,
-          [](unsigned S, ArrayRef<std::pair<LoadInst *, int>> LoadsDists) {
-            return S + LoadsDists.size();
-          }) != NonVectorized.size() &&
-      IsMaskedGatherSupported(NonVectorized)) {
-    SmallVector<SmallVector<std::pair<LoadInst *, int>>> FinalGatheredLoads;
-    for (LoadInst *LI : NonVectorized) {
-      // Reinsert non-vectorized loads to other list of loads with the same
-      // base pointers.
-      gatherPossiblyVectorizableLoads(*this, LI, *DL, *SE, *TTI,
-                                      FinalGatheredLoads,
-                                      /*AddNew=*/false);
-    }
-    // Final attempt to vectorize non-vectorized loads.
-    (void)ProcessGatheredLoads(FinalGatheredLoads, /*Final=*/true);
+  for (const auto &GLs : GatheredLoads) {
+    const auto &Ref = GLs.second;
+    SmallVector<LoadInst *> NonVectorized = ProcessGatheredLoads(Ref);
+    if (!Ref.empty() && !NonVectorized.empty() &&
+        std::accumulate(
+            Ref.begin(), Ref.end(), 0u,
+            [](unsigned S, ArrayRef<std::pair<LoadInst *, int>> LoadsDists) {
+              return S + LoadsDists.size();
+            }) != NonVectorized.size() &&
+        IsMaskedGatherSupported(NonVectorized)) {
+      SmallVector<SmallVector<std::pair<LoadInst *, int>>> FinalGatheredLoads;
+      for (LoadInst *LI : NonVectorized) {
+        // Reinsert non-vectorized loads to other list of loads with the same
+        // base pointers.
+        gatherPossiblyVectorizableLoads(*this, LI, *DL, *SE, *TTI,
+                                        FinalGatheredLoads,
+                                        /*AddNew=*/false);
+      }
+      // Final attempt to vectorize non-vectorized loads.
+      (void)ProcessGatheredLoads(FinalGatheredLoads, /*Final=*/true);
+    }
   }
   // Try to vectorize postponed load entries, previously marked as gathered.
   for (unsigned Idx : LoadEntriesToVectorize) {
@@ -7363,13 +7372,6 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
     OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) {
   assert(S.MainOp && "Expected instructions with same/alternate opcodes only.");
 
-  if (S.MainOp->getType()->isFloatingPointTy() &&
-      TTI->isFPVectorizationPotentiallyUnsafe() && any_of(VL, [](Value *V) {
-        auto *I = dyn_cast<Instruction>(V);
-        return I && (I->isBinaryOp() || isa<CallInst>(I)) && !I->isFast();
-      }))
-    return TreeEntry::NeedToGather;
-
   unsigned ShuffleOrOp =
       S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode();
   auto *VL0 = cast<Instruction>(S.OpValue);
@@ -7534,6 +7536,12 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
   case Instruction::Or:
   case Instruction::Xor:
   case Instruction::Freeze:
+    if (S.MainOp->getType()->isFloatingPointTy() &&
+        TTI->isFPVectorizationPotentiallyUnsafe() && any_of(VL, [](Value *V) {
+          auto *I = dyn_cast<Instruction>(V);
+          return I && I->isBinaryOp() && !I->isFast();
+        }))
+      return TreeEntry::NeedToGather;
     return TreeEntry::Vectorize;
   case Instruction::GetElementPtr: {
     // We don't combine GEPs with complicated (nested) indexing.
@@ -7625,6 +7633,12 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
     return TreeEntry::NeedToGather;
   }
   case Instruction::Call: {
+    if (S.MainOp->getType()->isFloatingPointTy() &&
+        TTI->isFPVectorizationPotentiallyUnsafe() && any_of(VL, [](Value *V) {
+          auto *I = dyn_cast<Instruction>(V);
+          return I && !I->isFast();
+        }))
+      return TreeEntry::NeedToGather;
     // Check if the calls are all to the same vectorizable intrinsic or
     // library function.
     CallInst *CI = cast<CallInst>(VL0);
@@ -9344,8 +9358,13 @@ void BoUpSLP::transformNodes() {
       // insertvector instructions.
       unsigned StartIdx = 0;
       unsigned End = VL.size();
-      for (unsigned VF = VL.size() / 2; VF >= MinVF; VF = bit_ceil(VF) / 2) {
-        SmallVector<unsigned> Slices;
+      for (unsigned VF = getFloorFullVectorNumberOfElements(
+               *TTI, VL.front()->getType(), VL.size() - 1);
+           VF >= MinVF; VF = getFloorFullVectorNumberOfElements(
+                            *TTI, VL.front()->getType(), VF - 1)) {
+        if (StartIdx + VF > End)
+          continue;
+        SmallVector<std::pair<unsigned, unsigned>> Slices;
         for (unsigned Cnt = StartIdx; Cnt + VF <= End; Cnt += VF) {
           ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
           // If any instruction is vectorized already - do not try again.
@@ -9375,7 +9394,10 @@ void BoUpSLP::transformNodes() {
             if (IsSplat)
               continue;
             InstructionsState S = getSameOpcode(Slice, *TLI);
-            if (!S.getOpcode() || S.isAltShuffle() || !allSameBlock(Slice))
+            if (!S.getOpcode() || S.isAltShuffle() || !allSameBlock(Slice) ||
+                (S.getOpcode() == Instruction::Load &&
+                 areKnownNonVectorizableLoads(Slice)) ||
+                (S.getOpcode() != Instruction::Load && !has_single_bit(VF)))
               continue;
             if (VF == 2) {
               // Try to vectorize reduced values or if all users are vectorized.
@@ -9395,8 +9417,16 @@ void BoUpSLP::transformNodes() {
                     canVectorizeLoads(Slice, Slice.front(), Order, PointerOps);
                 // Do not vectorize gathers.
                 if (Res == LoadsState::ScatterVectorize ||
-                    Res == LoadsState::Gather)
+                    Res == LoadsState::Gather) {
+                  if (Res == LoadsState::Gather) {
+                    registerNonVectorizableLoads(Slice);
+                    // If reductions and the scalars from the root node are
+                    // analyzed - mark as non-vectorizable reduction.
+                    if (UserIgnoreList && E.Idx == 0)
+                      analyzedReductionVals(Slice);
+                  }
                   continue;
+                }
               } else if (S.getOpcode() == Instruction::ExtractElement ||
                          (TTI->getInstructionCost(
                               cast<Instruction>(Slice.front()), CostKind) <
@@ -9411,17 +9441,17 @@ void BoUpSLP::transformNodes() {
               }
             }
           }
-          Slices.emplace_back(Cnt);
+          Slices.emplace_back(Cnt, Slice.size());
         }
-        auto AddCombinedNode = [&](unsigned Idx, unsigned Cnt) {
+        auto AddCombinedNode = [&](unsigned Idx, unsigned Cnt, unsigned Sz) {
           E.CombinedEntriesWithIndices.emplace_back(Idx, Cnt);
           if (StartIdx == Cnt)
-            StartIdx = Cnt + VF;
-          if (End == Cnt + VF)
+            StartIdx = Cnt + Sz;
+          if (End == Cnt + Sz)
             End = Cnt;
         };
-        for (unsigned Cnt : Slices) {
-          ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
+        for (auto [Cnt, Sz] : Slices) {
+          ArrayRef<Value *> Slice = VL.slice(Cnt, Sz);
           // If any instruction is vectorized already - do not try again.
           if (TreeEntry *SE = getTreeEntry(Slice.front());
               SE || getTreeEntry(Slice.back())) {
@@ -9430,7 +9460,7 @@ void BoUpSLP::transformNodes() {
             if (VF != SE->getVectorFactor() || !SE->isSame(Slice))
               continue;
             SE->UserTreeIndices.emplace_back(&E, UINT_MAX);
-            AddCombinedNode(SE->Idx, Cnt);
+            AddCombinedNode(SE->Idx, Cnt, Sz);
             continue;
           }
           unsigned PrevSize = VectorizableTree.size();
@@ -9442,12 +9472,14 @@ void BoUpSLP::transformNodes() {
               VectorizableTree[PrevSize]->getOpcode() !=
                   Instruction::ExtractElement &&
               !isSplat(Slice)) {
+            if (UserIgnoreList && E.Idx == 0 && VF == 2)
+              analyzedReductionVals(Slice);
             VectorizableTree.pop_back();
             assert(PrevEntriesSize == LoadEntriesToVectorize.size() &&
                    "LoadEntriesToVectorize expected to remain the same");
             continue;
           }
-          AddCombinedNode(PrevSize, Cnt);
+          AddCombinedNode(PrevSize, Cnt, Sz);
         }
       }
     }
@@ -9542,11 +9574,24 @@ void BoUpSLP::transformNodes() {
          VectorizableTree.front()->Scalars.size() == SmallVF) ||
         (VectorizableTree.size() <= 2 && UserIgnoreList))
       return;
+
+    if (VectorizableTree.front()->isNonPowOf2Vec() &&
+        getCanonicalGraphSize() != getTreeSize() && UserIgnoreList &&
+        getCanonicalGraphSize() <= SmallTree &&
+        count_if(ArrayRef(VectorizableTree).drop_front(getCanonicalGraphSize()),
+                 [](const std::unique_ptr<TreeEntry> &TE) {
+                   return TE->isGather() &&
+                          TE->getOpcode() == Instruction::Load &&
+                          !allSameBlock(TE->Scalars);
+                 }) == 1)
+      return;
   }
 
   // A list of loads to be gathered during the vectorization process. We can
   // try to vectorize them at the end, if profitable.
-  SmallVector<SmallVector<std::pair<LoadInst *, int>>> GatheredLoads;
+  SmallMapVector<std::tuple<BasicBlock *, Value *, Type *>,
+                 SmallVector<SmallVector<std::pair<LoadInst *, int>>>, 8>
+      GatheredLoads;
 
   for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
     TreeEntry &E = *TE;
@@ -9558,9 +9603,21 @@ void BoUpSLP::transformNodes() {
                                             !isVectorized(V) &&
                                             !isDeleted(cast<Instruction>(V));
                                    }))) &&
-        !isSplat(E.Scalars))
-      gatherPossiblyVectorizableLoads(*this, E.Scalars, *DL, *SE, *TTI,
-                                      GatheredLoads);
+        !isSplat(E.Scalars)) {
+      for (Value *V : E.Scalars) {
+        auto *LI = dyn_cast<LoadInst>(V);
+        if (!LI)
+          continue;
+        if (isDeleted(LI) || isVectorized(LI) || !LI->isSimple())
+          continue;
+        gatherPossiblyVectorizableLoads(
+            *this, V, *DL, *SE, *TTI,
+            GatheredLoads[std::make_tuple(
+                LI->getParent(),
+                getUnderlyingObject(LI->getPointerOperand(), RecursionMaxDepth),
+                LI->getType())]);
+      }
+    }
   }
   // Try to vectorize gathered loads if this is not just a gather of loads.
   if (!GatheredLoads.empty())
@@ -11515,6 +11572,34 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
   return true;
 }
 
+bool BoUpSLP::isTreeNotExtendable() const {
+  if (getCanonicalGraphSize() != getTreeSize()) {
+    constexpr unsigned SmallTree = 3;
+    if (VectorizableTree.front()->isNonPowOf2Vec() &&
+        getCanonicalGraphSize() <= SmallTree &&
+        count_if(ArrayRef(VectorizableTree).drop_front(getCanonicalGraphSize()),
+                 [](const std::unique_ptr<TreeEntry> &TE) {
+                   return TE->isGather() &&
+                          TE->getOpcode() == Instruction::Load &&
+                          !allSameBlock(TE->Scalars);
+                 }) == 1)
+      return true;
+    return false;
+  }
+  bool Res = false;
+  for (unsigned Idx : seq<unsigned>(getTreeSize())) {
+    TreeEntry &E = *VectorizableTree[Idx];
+    if (!E.isGather())
+      continue;
+    if (E.getOpcode() && E.getOpcode() != Instruction::Load)
+      return false;
+    if (isSplat(E.Scalars) || allConstant(E.Scalars))
+      continue;
+    Res = true;
+  }
+  return Res;
+}
+
 InstructionCost BoUpSLP::getSpillCost() const {
   // Walk from the bottom of the tree to the top, tracking which values are
   // live. When we see a call instruction that is not part of our tree,
@@ -14868,6 +14953,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
         return E->VectorizedValue;
       }
 
+      if (Op->getType() != VecTy) {
+        assert((It != MinBWs.end() || getOperandEntry(E, 0)->isGather() ||
+                MinBWs.contains(getOperandEntry(E, 0))) &&
+               "Expected item in MinBWs.");
+        Op = Builder.CreateIntCast(Op, VecTy, GetOperandSignedness(0));
+      }
       Value *V = Builder.CreateFreeze(Op);
       V = FinalShuffle(V, E);
 
@@ -17010,6 +17101,8 @@ bool BoUpSLP::collectValuesToDemote(
     return TryProcessInstruction(
         BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)});
   }
+  case Instruction::Freeze:
+    return TryProcessInstruction(BitWidth, getOperandEntry(&E, 0));
   case Instruction::Shl: {
     // If we are truncating the result of this SHL, and if it's a shift of an
     // inrange amount, we can always perform a SHL in a smaller type.
@@ -17131,9 +17224,25 @@ bool BoUpSLP::collectValuesToDemote(
                 MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)));
       });
     };
+    auto AbsChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
+      assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
+      return all_of(E.Scalars, [&](Value *V) {
+        auto *I = cast<Instruction>(V);
+        unsigned SignBits = OrigBitWidth - BitWidth;
+        APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1);
+        unsigned Op0SignBits =
+            ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT);
+        return SignBits <= Op0SignBits &&
+               ((SignBits != Op0SignBits &&
+                 !isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) ||
+                MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)));
+      });
+    };
     if (ID != Intrinsic::abs) {
       Operands.push_back(getOperandEntry(&E, 1));
       CallChecker = CompChecker;
+    } else {
+      CallChecker = AbsChecker;
     }
     InstructionCost BestCost =
         std::numeric_limits<InstructionCost::CostType>::max();
@@ -18747,7 +18856,8 @@ public:
 
     auto GenerateLoadsSubkey = [&](size_t Key, LoadInst *LI) {
       Key = hash_combine(hash_value(LI->getParent()), Key);
-      Value *Ptr = getUnderlyingObject(LI->getPointerOperand());
+      Value *Ptr =
+          getUnderlyingObject(LI->getPointerOperand(), RecursionMaxDepth);
       if (!LoadKeyUsed.insert(Key).second) {
         auto LIt = LoadsMap.find(std::make_pair(Key, Ptr));
         if (LIt != LoadsMap.end()) {
@@ -19070,8 +19180,28 @@ public:
           RegMaxNumber * RedValsMaxNumber);
 
       unsigned ReduxWidth = NumReducedVals;
+      auto GetVectorFactor = [&, &TTI = *TTI](unsigned ReduxWidth) {
+        unsigned NumParts, NumRegs;
+        Type *ScalarTy = Candidates.front()->getType();
+        ReduxWidth =
+            getFloorFullVectorNumberOfElements(TTI, ScalarTy, ReduxWidth);
+        VectorType *Tp = getWidenedType(ScalarTy, ReduxWidth);
+        NumParts = TTI.getNumberOfParts(Tp);
+        NumRegs =
+            TTI.getNumberOfRegisters(TTI.getRegisterClassForType(true, Tp));
+        while (NumParts > NumRegs) {
+          ReduxWidth = bit_floor(ReduxWidth - 1);
+          VectorType *Tp = getWidenedType(ScalarTy, ReduxWidth);
+          NumParts = TTI.getNumberOfParts(Tp);
+          NumRegs =
+              TTI.getNumberOfRegisters(TTI.getRegisterClassForType(true, Tp));
+        }
+        if (NumParts > NumRegs / 2)
+          ReduxWidth = bit_floor(ReduxWidth);
+        return ReduxWidth;
+      };
       if (!VectorizeNonPowerOf2 || !has_single_bit(ReduxWidth + 1))
-        ReduxWidth = bit_floor(ReduxWidth);
+        ReduxWidth = GetVectorFactor(ReduxWidth);
       ReduxWidth = std::min(ReduxWidth, MaxElts);
 
       unsigned Start = 0;
@@ -19079,10 +19209,7 @@ public:
       // Restarts vectorization attempt with lower vector factor.
       unsigned PrevReduxWidth = ReduxWidth;
       bool CheckForReusedReductionOpsLocal = false;
-      auto &&AdjustReducedVals = [&Pos, &Start, &ReduxWidth, NumReducedVals,
-                                  &CheckForReusedReductionOpsLocal,
-                                  &PrevReduxWidth, &V,
-                                  &IgnoreList](bool IgnoreVL = false) {
+      auto AdjustReducedVals = [&](bool IgnoreVL = false) {
         bool IsAnyRedOpGathered = !IgnoreVL && V.isAnyGathered(IgnoreList);
         if (!CheckForReusedReductionOpsLocal && PrevReduxWidth == ReduxWidth) {
           // Check if any of the reduction ops are gathered. If so, worth
@@ -19093,10 +19220,13 @@ public:
         if (Pos < NumReducedVals - ReduxWidth + 1)
           return IsAnyRedOpGathered;
         Pos = Start;
-        ReduxWidth = bit_ceil(ReduxWidth) / 2;
+        --ReduxWidth;
+        if (ReduxWidth > 1)
+          ReduxWidth = GetVectorFactor(ReduxWidth);
         return IsAnyRedOpGathered;
       };
       bool AnyVectorized = false;
+      SmallDenseSet<std::pair<unsigned, unsigned>, 8> IgnoredCandidates;
       while (Pos < NumReducedVals - ReduxWidth + 1 &&
              ReduxWidth >= ReductionLimit) {
         // Dependency in tree of the reduction ops - drop this attempt, try
@@ -19108,8 +19238,15 @@ public:
         }
         PrevReduxWidth = ReduxWidth;
         ArrayRef<Value *> VL(std::next(Candidates.begin(), Pos), ReduxWidth);
-        // Beeing analyzed already - skip.
-        if (V.areAnalyzedReductionVals(VL)) {
+        // Been analyzed already - skip.
+        if (IgnoredCandidates.contains(std::make_pair(Pos, ReduxWidth)) ||
+            (!has_single_bit(ReduxWidth) &&
+             (IgnoredCandidates.contains(
+                  std::make_pair(Pos, bit_floor(ReduxWidth))) ||
+              IgnoredCandidates.contains(
+                  std::make_pair(Pos + (ReduxWidth - bit_floor(ReduxWidth)),
+                                 bit_floor(ReduxWidth))))) ||
+            V.areAnalyzedReductionVals(VL)) {
           (void)AdjustReducedVals(/*IgnoreVL=*/true);
           continue;
         }
@@ -19215,8 +19352,24 @@ public:
                    << " and threshold "
                    << ore::NV("Threshold", -SLPCostThreshold);
           });
-          if (!AdjustReducedVals())
+          if (!AdjustReducedVals()) {
             V.analyzedReductionVals(VL);
+            unsigned Offset = Pos == Start ? Pos : Pos - 1;
+            if (ReduxWidth > ReductionLimit && V.isTreeNotExtendable()) {
+              // Add subvectors of VL to the list of the analyzed values.
+              for (unsigned VF = getFloorFullVectorNumberOfElements(
+                       *TTI, VL.front()->getType(), ReduxWidth - 1);
+                   VF >= ReductionLimit;
+                   VF = getFloorFullVectorNumberOfElements(
+                       *TTI, VL.front()->getType(), VF - 1)) {
+                if (has_single_bit(VF) &&
+                    V.getCanonicalGraphSize() != V.getTreeSize())
+                  continue;
+                for (unsigned Idx : seq<unsigned>(ReduxWidth - VF))
+                  IgnoredCandidates.insert(std::make_pair(Offset + Idx, VF));
+              }
+            }
+          }
           continue;
         }
 
@@ -19325,7 +19478,9 @@ public:
         }
         Pos += ReduxWidth;
         Start = Pos;
-        ReduxWidth = llvm::bit_floor(NumReducedVals - Pos);
+        ReduxWidth = NumReducedVals - Pos;
+        if (ReduxWidth > 1)
+          ReduxWidth = GetVectorFactor(NumReducedVals - Pos);
         AnyVectorized = true;
       }
       if (OptReusedScalars && !AnyVectorized) {
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
index 9bbeca4fc154..6217c9fecf45 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
@@ -60,7 +60,7 @@ bool PredIterator::operator==(const PredIterator &Other) const {
 
 #ifndef NDEBUG
 void DGNode::print(raw_ostream &OS, bool PrintDeps) const {
-  OS << *I << " USuccs:" << UnscheduledSuccs << "\n";
+  OS << *I << " USuccs:" << UnscheduledSuccs << " Sched:" << Scheduled << "\n";
 }
 void DGNode::dump() const { print(dbgs()); }
 void MemDGNode::print(raw_ostream &OS, bool PrintDeps) const {
@@ -249,6 +249,10 @@ void DependencyGraph::setDefUseUnscheduledSuccs(
   // Walk over all instructions in "BotInterval" and update the counter
   // of operands that are in "TopInterval".
   for (Instruction &BotI : BotInterval) {
+    auto *BotN = getNode(&BotI);
+    // Skip scheduled nodes.
+    if (BotN->scheduled())
+      continue;
     for (Value *Op : BotI.operands()) {
       auto *OpI = dyn_cast<Instruction>(Op);
       if (OpI == nullptr)
@@ -286,7 +290,9 @@ void DependencyGraph::createNewNodes(const Interval<Instruction> &NewInterval) {
         MemDGNodeIntervalBuilder::getBotMemDGNode(TopInterval, *this);
     MemDGNode *LinkBotN =
         MemDGNodeIntervalBuilder::getTopMemDGNode(BotInterval, *this);
-    assert(LinkTopN->comesBefore(LinkBotN) && "Wrong order!");
+    assert((LinkTopN == nullptr || LinkBotN == nullptr ||
+            LinkTopN->comesBefore(LinkBotN)) &&
+           "Wrong order!");
     if (LinkTopN != nullptr && LinkBotN != nullptr) {
       LinkTopN->setNextNode(LinkBotN);
       LinkBotN->setPrevNode(LinkTopN);
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
new file mode 100644
index 000000000000..6140c2a8dcec
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
@@ -0,0 +1,169 @@
+//===- Scheduler.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h"
+
+namespace llvm::sandboxir {
+
+// TODO: Check if we can cache top/bottom to reduce compile-time.
+DGNode *SchedBundle::getTop() const {
+  DGNode *TopN = Nodes.front();
+  for (auto *N : drop_begin(Nodes)) {
+    if (N->getInstruction()->comesBefore(TopN->getInstruction()))
+      TopN = N;
+  }
+  return TopN;
+}
+
+DGNode *SchedBundle::getBot() const {
+  DGNode *BotN = Nodes.front();
+  for (auto *N : drop_begin(Nodes)) {
+    if (BotN->getInstruction()->comesBefore(N->getInstruction()))
+      BotN = N;
+  }
+  return BotN;
+}
+
+void SchedBundle::cluster(BasicBlock::iterator Where) {
+  for (auto *N : Nodes) {
+    auto *I = N->getInstruction();
+    if (I->getIterator() == Where)
+      ++Where; // Try to maintain bundle order.
+    I->moveBefore(*Where.getNodeParent(), Where);
+  }
+}
+
+#ifndef NDEBUG
+void SchedBundle::dump(raw_ostream &OS) const {
+  for (auto *N : Nodes)
+    OS << *N;
+}
+
+void SchedBundle::dump() const {
+  dump(dbgs());
+  dbgs() << "\n";
+}
+#endif // NDEBUG
+
+#ifndef NDEBUG
+void ReadyListContainer::dump(raw_ostream &OS) const {
+  auto ListCopy = List;
+  while (!ListCopy.empty()) {
+    OS << *ListCopy.top() << "\n";
+    ListCopy.pop();
+  }
+}
+
+void ReadyListContainer::dump() const {
+  dump(dbgs());
+  dbgs() << "\n";
+}
+#endif // NDEBUG
+
+void Scheduler::scheduleAndUpdateReadyList(SchedBundle &Bndl) {
+  // Find where we should schedule the instructions.
+  assert(ScheduleTopItOpt && "Should have been set by now!");
+  auto Where = *ScheduleTopItOpt;
+  // Move all instructions in `Bndl` to `Where`.
+  Bndl.cluster(Where);
+  // Update the last scheduled bundle.
+  ScheduleTopItOpt = Bndl.getTop()->getInstruction()->getIterator();
+  // Set nodes as "scheduled" and decrement the UnsceduledSuccs counter of all
+  // dependency predecessors.
+  for (DGNode *N : Bndl) {
+    N->setScheduled(true);
+    for (auto *DepN : N->preds(DAG)) {
+      // TODO: preds() should not return nullptr.
+      if (DepN == nullptr)
+        continue;
+      DepN->decrUnscheduledSuccs();
+      if (DepN->ready())
+        ReadyList.insert(DepN);
+    }
+  }
+}
+
+SchedBundle *Scheduler::createBundle(ArrayRef<Instruction *> Instrs) {
+  SchedBundle::ContainerTy Nodes;
+  Nodes.reserve(Instrs.size());
+  for (auto *I : Instrs)
+    Nodes.push_back(DAG.getNode(I));
+  auto BndlPtr = std::make_unique<SchedBundle>(std::move(Nodes));
+  auto *Bndl = BndlPtr.get();
+  Bndls.push_back(std::move(BndlPtr));
+  return Bndl;
+}
+
+bool Scheduler::tryScheduleUntil(ArrayRef<Instruction *> Instrs) {
+  // Use a set of instructions, instead of `Instrs` for fast lookups.
+  DenseSet<Instruction *> InstrsToDefer(Instrs.begin(), Instrs.end());
+  // This collects the nodes that correspond to instructions found in `Instrs`
+  // that have just become ready. These nodes won't be scheduled right away.
+  SmallVector<DGNode *, 8> DeferredNodes;
+
+  // Keep scheduling ready nodes until we either run out of ready nodes (i.e.,
+  // ReadyList is empty), or all nodes that correspond to `Instrs` (the nodes of
+  // which are collected in DeferredNodes) are all ready to schedule.
+  while (!ReadyList.empty()) {
+    auto *ReadyN = ReadyList.pop();
+    if (InstrsToDefer.contains(ReadyN->getInstruction())) {
+      // If the ready instruction is one of those in `Instrs`, then we don't
+      // schedule it right away. Instead we defer it until we can schedule it
+      // along with the rest of the instructions in `Instrs`, at the same
+      // time in a single scheduling bundle.
+      DeferredNodes.push_back(ReadyN);
+      bool ReadyToScheduleDeferred = DeferredNodes.size() == Instrs.size();
+      if (ReadyToScheduleDeferred) {
+        scheduleAndUpdateReadyList(*createBundle(Instrs));
+        return true;
+      }
+    } else {
+      // If the ready instruction is not found in `Instrs`, then we wrap it in a
+      // scheduling bundle and schedule it right away.
+      scheduleAndUpdateReadyList(*createBundle({ReadyN->getInstruction()}));
+    }
+  }
+  assert(DeferredNodes.size() != Instrs.size() &&
+         "We should have succesfully scheduled and early-returned!");
+  return false;
+}
+
+bool Scheduler::trySchedule(ArrayRef<Instruction *> Instrs) {
+  assert(all_of(drop_begin(Instrs),
+                [Instrs](Instruction *I) {
+                  return I->getParent() == (*Instrs.begin())->getParent();
+                }) &&
+         "Instrs not in the same BB!");
+  // Extend the DAG to include Instrs.
+  Interval<Instruction> Extension = DAG.extend(Instrs);
+  // TODO: Set the window of the DAG that we are interested in.
+  // We start scheduling at the bottom instr of Instrs.
+  auto getBottomI = [](ArrayRef<Instruction *> Instrs) -> Instruction * {
+    return *min_element(Instrs,
+                        [](auto *I1, auto *I2) { return I1->comesBefore(I2); });
+  };
+  ScheduleTopItOpt = std::next(getBottomI(Instrs)->getIterator());
+  // Add nodes to ready list.
+  for (auto &I : Extension) {
+    auto *N = DAG.getNode(&I);
+    if (N->ready())
+      ReadyList.insert(N);
+  }
+  // Try schedule all nodes until we can schedule Instrs back-to-back.
+  return tryScheduleUntil(Instrs);
+}
+
+#ifndef NDEBUG
+void Scheduler::dump(raw_ostream &OS) const {
+  OS << "ReadyList:\n";
+  ReadyList.dump(OS);
+}
+void Scheduler::dump() const { dump(dbgs()); }
+#endif // NDEBUG
+
+} // namespace llvm::sandboxir
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a38cdfc542cb..f4a1f58debba 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -392,6 +392,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
     return true;
   switch (Opcode) {
   case Instruction::ICmp:
+  case Instruction::Select:
   case VPInstruction::BranchOnCond:
   case VPInstruction::BranchOnCount:
   case VPInstruction::CalculateTripCountMinusVF:
@@ -440,9 +441,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
     return Builder.CreateCmp(getPredicate(), A, B, Name);
   }
   case Instruction::Select: {
-    Value *Cond = State.get(getOperand(0));
-    Value *Op1 = State.get(getOperand(1));
-    Value *Op2 = State.get(getOperand(2));
+    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
+    Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
+    Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
+    Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
     return Builder.CreateSelect(Cond, Op1, Op2, Name);
   }
   case VPInstruction::ActiveLaneMask: {
@@ -742,6 +744,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
   default:
     return false;
   case Instruction::ICmp:
+  case Instruction::Select:
   case VPInstruction::PtrAdd:
     // TODO: Cover additional opcodes.
     return vputils::onlyFirstLaneUsed(this);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index faec08cac187..d50f3c0c3f3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1439,7 +1439,24 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
 /// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi
 /// ...
 ///
-bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
+/// If MaxSafeElements is provided, the function adds the following recipes:
+/// vector.ph:
+/// ...
+///
+/// vector.body:
+/// ...
+/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],
+///                                               [ %NextEVLIV, %vector.body ]
+/// %AVL = sub original TC, %EVLPhi
+/// %cmp = cmp ult %AVL, MaxSafeElements
+/// %SAFE_AVL = select %cmp, %AVL, MaxSafeElements
+/// %VPEVL = EXPLICIT-VECTOR-LENGTH %SAFE_AVL
+/// ...
+/// %NextEVLIV = add IVSize (cast i32 %VPEVL to IVSize), %EVLPhi
+/// ...
+///
+bool VPlanTransforms::tryAddExplicitVectorLength(
+    VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) {
   VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
   // The transform updates all users of inductions to work based on EVL, instead
   // of the VF directly. At the moment, widened inductions cannot be updated, so
@@ -1464,14 +1481,19 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
   // Create the ExplicitVectorLengthPhi recipe in the main loop.
   auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
   EVLPhi->insertAfter(CanonicalIVPHI);
-  // TODO: Add support for MaxSafeDist for correct loop emission.
+  VPBuilder Builder(Header, Header->getFirstNonPhi());
   // Compute original TC - IV as the AVL (application vector length).
-  auto *AVL = new VPInstruction(Instruction::Sub, {Plan.getTripCount(), EVLPhi},
-                                DebugLoc(), "avl");
-  AVL->insertBefore(*Header, Header->getFirstNonPhi());
-  auto *VPEVL =
-      new VPInstruction(VPInstruction::ExplicitVectorLength, AVL, DebugLoc());
-  VPEVL->insertAfter(AVL);
+  VPValue *AVL = Builder.createNaryOp(
+      Instruction::Sub, {Plan.getTripCount(), EVLPhi}, DebugLoc(), "avl");
+  if (MaxSafeElements) {
+    // Support for MaxSafeDist for correct loop emission.
+    VPValue *AVLSafe = Plan.getOrAddLiveIn(
+        ConstantInt::get(CanonicalIVPHI->getScalarType(), *MaxSafeElements));
+    VPValue *Cmp = Builder.createICmp(ICmpInst::ICMP_ULT, AVL, AVLSafe);
+    AVL = Builder.createSelect(Cmp, AVL, AVLSafe, DebugLoc(), "safe_avl");
+  }
+  auto *VPEVL = Builder.createNaryOp(VPInstruction::ExplicitVectorLength, AVL,
+                                     DebugLoc());
 
   auto *CanonicalIVIncrement =
       cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 3b792ee32dce..60a44bfb0dca 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -108,7 +108,9 @@ struct VPlanTransforms {
   /// VPCanonicalIVPHIRecipe is only used to control the loop after
   /// this transformation.
   /// \returns true if the transformation succeeds, or false if it doesn't.
-  static bool tryAddExplicitVectorLength(VPlan &Plan);
+  static bool
+  tryAddExplicitVectorLength(VPlan &Plan,
+                             const std::optional<unsigned> &MaxEVLSafeElements);
 
   // For each Interleave Group in \p InterleaveGroups replace the Recipes
   // widening its memory instructions with a single VPInterleaveRecipe at its
diff --git a/llvm/test/CodeGen/DirectX/atan2.ll b/llvm/test/CodeGen/DirectX/atan2.ll
index b2c650d11626..9d86f87f3ed5 100644
--- a/llvm/test/CodeGen/DirectX/atan2.ll
+++ b/llvm/test/CodeGen/DirectX/atan2.ll
@@ -1,87 +1,87 @@
-; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
-; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
-
-; Make sure correct dxil expansions for atan2 are generated for float and half.
-
-define noundef float @atan2_float(float noundef %y, float noundef %x) {
-entry:
-; CHECK: [[DIV:%.+]] = fdiv float %y, %x
-; EXPCHECK: [[ATAN:%.+]] = call float @llvm.atan.f32(float [[DIV]])
-; DOPCHECK: [[ATAN:%.+]] = call float @dx.op.unary.f32(i32 17, float [[DIV]])
-; CHECK-DAG: [[ADD_PI:%.+]] = fadd float [[ATAN]], 0x400921FB60000000
-; CHECK-DAG: [[SUB_PI:%.+]] = fsub float [[ATAN]], 0x400921FB60000000
-; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt float %x, 0.000000e+00
-; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq float %x, 0.000000e+00 
-; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge float %y, 0.000000e+00 
-; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt float %y, 0.000000e+00
-; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]]
-; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], float [[ADD_PI]], float [[ATAN]]
-; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]]
-; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], float [[SUB_PI]], float [[SELECT_ADD_PI]]
-; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]]
-; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], float 0xBFF921FB60000000, float [[SELECT_SUB_PI]]
-; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]]
-; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], float 0x3FF921FB60000000, float [[SELECT_NEGHPI]]
-; CHECK: ret float [[SELECT_HPI]]
-  %elt.atan2 = call float @llvm.atan2.f32(float %y, float %x)
-  ret float %elt.atan2
-}
-
-define noundef half @atan2_half(half noundef %y, half noundef %x) {
-entry:
-; CHECK: [[DIV:%.+]] = fdiv half %y, %x
-; EXPCHECK: [[ATAN:%.+]] = call half @llvm.atan.f16(half [[DIV]])
-; DOPCHECK: [[ATAN:%.+]] = call half @dx.op.unary.f16(i32 17, half [[DIV]])
-; CHECK-DAG: [[ADD_PI:%.+]] = fadd half [[ATAN]], 0xH4248
-; CHECK-DAG: [[SUB_PI:%.+]] = fsub half [[ATAN]], 0xH4248
-; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt half %x, 0xH0000
-; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq half %x, 0xH0000 
-; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge half %y, 0xH0000 
-; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt half %y, 0xH0000
-; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]]
-; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], half [[ADD_PI]], half [[ATAN]]
-; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]]
-; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], half [[SUB_PI]], half [[SELECT_ADD_PI]]
-; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]]
-; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], half 0xHBE48, half [[SELECT_SUB_PI]]
-; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]]
-; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], half 0xH3E48, half [[SELECT_NEGHPI]]
-; CHECK: ret half [[SELECT_HPI]]
-  %elt.atan2 = call half @llvm.atan2.f16(half %y, half %x)
-  ret half %elt.atan2
-}
-
-define noundef <4 x float> @atan2_float4(<4 x float> noundef %y, <4 x float> noundef %x) {
-entry:
-; Just Expansion, no scalarization or lowering:
-; EXPCHECK: [[DIV:%.+]] = fdiv <4 x float> %y, %x
-; EXPCHECK: [[ATAN:%.+]] = call <4 x float> @llvm.atan.v4f32(<4 x float> [[DIV]])
-; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <4 x float> [[ATAN]], <float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000>
-; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <4 x float> [[ATAN]], <float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000>
-; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <4 x float> %x, zeroinitializer
-; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <4 x float> %x, zeroinitializer
-; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <4 x float> %y, zeroinitializer
-; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <4 x float> %y, zeroinitializer
-; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_GE_0]]
-; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <4 x i1> [[XLT0_AND_YGE0]], <4 x float> [[ADD_PI]], <4 x float> [[ATAN]]
-; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_LT_0]]
-; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <4 x i1> [[XLT0_AND_YLT0]], <4 x float> [[SUB_PI]], <4 x float> [[SELECT_ADD_PI]]
-; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_LT_0]]
-; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <4 x i1> [[XEQ0_AND_YLT0]], <4 x float> <float 0xBFF921FB60000000, float 0xBFF921FB60000000, float 0xBFF921FB60000000, float 0xBFF921FB60000000>, <4 x float> [[SELECT_SUB_PI]]
-; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_GE_0]]
-; EXPCHECK: [[SELECT_HPI:%.+]] = select <4 x i1> [[XEQ0_AND_YGE0]], <4 x float> <float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0x3FF921FB60000000>, <4 x float> [[SELECT_NEGHPI]]
-; EXPCHECK: ret <4 x float> [[SELECT_HPI]]
-
-; Scalarization occurs after expansion, so atan scalarization is tested separately.
-; Expansion, scalarization and lowering:
-; Just make sure this expands to exactly 4 scalar DXIL atan (OpCode=17) calls.
-; DOPCHECK-COUNT-4: call float @dx.op.unary.f32(i32 17, float %{{.*}})
-; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17,
-
-  %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %y, <4 x float> %x)
-  ret <4 x float> %elt.atan2
-}
-
-declare half @llvm.atan2.f16(half, half)
-declare float @llvm.atan2.f32(float, float)
-declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>)
+; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
+
+; Make sure correct dxil expansions for atan2 are generated for float and half.
+
+define noundef float @atan2_float(float noundef %y, float noundef %x) {
+entry:
+; CHECK: [[DIV:%.+]] = fdiv float %y, %x
+; EXPCHECK: [[ATAN:%.+]] = call float @llvm.atan.f32(float [[DIV]])
+; DOPCHECK: [[ATAN:%.+]] = call float @dx.op.unary.f32(i32 17, float [[DIV]])
+; CHECK-DAG: [[ADD_PI:%.+]] = fadd float [[ATAN]], 0x400921FB60000000
+; CHECK-DAG: [[SUB_PI:%.+]] = fsub float [[ATAN]], 0x400921FB60000000
+; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt float %x, 0.000000e+00
+; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq float %x, 0.000000e+00 
+; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge float %y, 0.000000e+00 
+; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt float %y, 0.000000e+00
+; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]]
+; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], float [[ADD_PI]], float [[ATAN]]
+; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]]
+; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], float [[SUB_PI]], float [[SELECT_ADD_PI]]
+; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]]
+; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], float 0xBFF921FB60000000, float [[SELECT_SUB_PI]]
+; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]]
+; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], float 0x3FF921FB60000000, float [[SELECT_NEGHPI]]
+; CHECK: ret float [[SELECT_HPI]]
+  %elt.atan2 = call float @llvm.atan2.f32(float %y, float %x)
+  ret float %elt.atan2
+}
+
+define noundef half @atan2_half(half noundef %y, half noundef %x) {
+entry:
+; CHECK: [[DIV:%.+]] = fdiv half %y, %x
+; EXPCHECK: [[ATAN:%.+]] = call half @llvm.atan.f16(half [[DIV]])
+; DOPCHECK: [[ATAN:%.+]] = call half @dx.op.unary.f16(i32 17, half [[DIV]])
+; CHECK-DAG: [[ADD_PI:%.+]] = fadd half [[ATAN]], 0xH4248
+; CHECK-DAG: [[SUB_PI:%.+]] = fsub half [[ATAN]], 0xH4248
+; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt half %x, 0xH0000
+; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq half %x, 0xH0000 
+; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge half %y, 0xH0000 
+; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt half %y, 0xH0000
+; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]]
+; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], half [[ADD_PI]], half [[ATAN]]
+; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]]
+; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], half [[SUB_PI]], half [[SELECT_ADD_PI]]
+; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]]
+; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], half 0xHBE48, half [[SELECT_SUB_PI]]
+; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]]
+; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], half 0xH3E48, half [[SELECT_NEGHPI]]
+; CHECK: ret half [[SELECT_HPI]]
+  %elt.atan2 = call half @llvm.atan2.f16(half %y, half %x)
+  ret half %elt.atan2
+}
+
+define noundef <4 x float> @atan2_float4(<4 x float> noundef %y, <4 x float> noundef %x) {
+entry:
+; Just Expansion, no scalarization or lowering:
+; EXPCHECK: [[DIV:%.+]] = fdiv <4 x float> %y, %x
+; EXPCHECK: [[ATAN:%.+]] = call <4 x float> @llvm.atan.v4f32(<4 x float> [[DIV]])
+; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <4 x float> [[ATAN]], <float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000>
+; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <4 x float> [[ATAN]], <float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000>
+; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <4 x float> %x, zeroinitializer
+; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <4 x float> %x, zeroinitializer
+; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <4 x float> %y, zeroinitializer
+; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <4 x float> %y, zeroinitializer
+; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_GE_0]]
+; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <4 x i1> [[XLT0_AND_YGE0]], <4 x float> [[ADD_PI]], <4 x float> [[ATAN]]
+; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_LT_0]]
+; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <4 x i1> [[XLT0_AND_YLT0]], <4 x float> [[SUB_PI]], <4 x float> [[SELECT_ADD_PI]]
+; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_LT_0]]
+; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <4 x i1> [[XEQ0_AND_YLT0]], <4 x float> <float 0xBFF921FB60000000, float 0xBFF921FB60000000, float 0xBFF921FB60000000, float 0xBFF921FB60000000>, <4 x float> [[SELECT_SUB_PI]]
+; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_GE_0]]
+; EXPCHECK: [[SELECT_HPI:%.+]] = select <4 x i1> [[XEQ0_AND_YGE0]], <4 x float> <float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0x3FF921FB60000000>, <4 x float> [[SELECT_NEGHPI]]
+; EXPCHECK: ret <4 x float> [[SELECT_HPI]]
+
+; Scalarization occurs after expansion, so atan scalarization is tested separately.
+; Expansion, scalarization and lowering:
+; Just make sure this expands to exactly 4 scalar DXIL atan (OpCode=17) calls.
+; DOPCHECK-COUNT-4: call float @dx.op.unary.f32(i32 17, float %{{.*}})
+; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17,
+
+  %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %y, <4 x float> %x)
+  ret <4 x float> %elt.atan2
+}
+
+declare half @llvm.atan2.f16(half, half)
+declare float @llvm.atan2.f32(float, float)
+declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/atan2_error.ll b/llvm/test/CodeGen/DirectX/atan2_error.ll
index 9b66f9f1dd45..372934098b7c 100644
--- a/llvm/test/CodeGen/DirectX/atan2_error.ll
+++ b/llvm/test/CodeGen/DirectX/atan2_error.ll
@@ -1,11 +1,11 @@
-; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation atan does not support double overload type
-; CHECK: in function atan2_double
-; CHECK-SAME: Cannot create ATan operation: Invalid overload type
-
-define noundef double @atan2_double(double noundef %a, double noundef %b) #0 {
-entry:
-  %1 = call double @llvm.atan2.f64(double %a, double %b)
-  ret double %1
-}
+; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation atan does not support double overload type
+; CHECK: in function atan2_double
+; CHECK-SAME: Cannot create ATan operation: Invalid overload type
+
+define noundef double @atan2_double(double noundef %a, double noundef %b) #0 {
+entry:
+  %1 = call double @llvm.atan2.f64(double %a, double %b)
+  ret double %1
+}
diff --git a/llvm/test/CodeGen/DirectX/cross.ll b/llvm/test/CodeGen/DirectX/cross.ll
index 6153cf7cddc9..6ec3ec4d3594 100644
--- a/llvm/test/CodeGen/DirectX/cross.ll
+++ b/llvm/test/CodeGen/DirectX/cross.ll
@@ -1,56 +1,56 @@
-; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s
-
-; Make sure dxil operation function calls for cross are generated for half/float.
-
-declare <3 x half> @llvm.dx.cross.v3f16(<3 x half>, <3 x half>)
-declare <3 x float> @llvm.dx.cross.v3f32(<3 x float>, <3 x float>)
-
-define noundef <3 x half> @test_cross_half3(<3 x half> noundef %p0, <3 x half> noundef %p1) {
-entry:
-  ; CHECK: %x0 = extractelement <3 x half> %p0, i64 0
-  ; CHECK: %x1 = extractelement <3 x half> %p0, i64 1
-  ; CHECK: %x2 = extractelement <3 x half> %p0, i64 2
-  ; CHECK: %y0 = extractelement <3 x half> %p1, i64 0
-  ; CHECK: %y1 = extractelement <3 x half> %p1, i64 1
-  ; CHECK: %y2 = extractelement <3 x half> %p1, i64 2
-  ; CHECK: %0 = fmul half %x1, %y2
-  ; CHECK: %1 = fmul half %x2, %y1
-  ; CHECK: %hlsl.cross1 = fsub half %0, %1
-  ; CHECK: %2 = fmul half %x2, %y0
-  ; CHECK: %3 = fmul half %x0, %y2
-  ; CHECK: %hlsl.cross2 = fsub half %2, %3
-  ; CHECK: %4 = fmul half %x0, %y1
-  ; CHECK: %5 = fmul half %x1, %y0
-  ; CHECK: %hlsl.cross3 = fsub half %4, %5
-  ; CHECK: %6 = insertelement <3 x half> undef, half %hlsl.cross1, i64 0
-  ; CHECK: %7 = insertelement <3 x half> %6, half %hlsl.cross2, i64 1
-  ; CHECK: %8 = insertelement <3 x half> %7, half %hlsl.cross3, i64 2
-  ; CHECK: ret <3 x half> %8
-  %hlsl.cross = call <3 x half> @llvm.dx.cross.v3f16(<3 x half> %p0, <3 x half> %p1)
-  ret <3 x half> %hlsl.cross
-}
-
-define noundef <3 x float> @test_cross_float3(<3 x float> noundef %p0, <3 x float> noundef %p1) {
-entry:
-  ; CHECK: %x0 = extractelement <3 x float> %p0, i64 0
-  ; CHECK: %x1 = extractelement <3 x float> %p0, i64 1
-  ; CHECK: %x2 = extractelement <3 x float> %p0, i64 2
-  ; CHECK: %y0 = extractelement <3 x float> %p1, i64 0
-  ; CHECK: %y1 = extractelement <3 x float> %p1, i64 1
-  ; CHECK: %y2 = extractelement <3 x float> %p1, i64 2
-  ; CHECK: %0 = fmul float %x1, %y2
-  ; CHECK: %1 = fmul float %x2, %y1
-  ; CHECK: %hlsl.cross1 = fsub float %0, %1
-  ; CHECK: %2 = fmul float %x2, %y0
-  ; CHECK: %3 = fmul float %x0, %y2
-  ; CHECK: %hlsl.cross2 = fsub float %2, %3
-  ; CHECK: %4 = fmul float %x0, %y1
-  ; CHECK: %5 = fmul float %x1, %y0
-  ; CHECK: %hlsl.cross3 = fsub float %4, %5
-  ; CHECK: %6 = insertelement <3 x float> undef, float %hlsl.cross1, i64 0
-  ; CHECK: %7 = insertelement <3 x float> %6, float %hlsl.cross2, i64 1
-  ; CHECK: %8 = insertelement <3 x float> %7, float %hlsl.cross3, i64 2
-  ; CHECK: ret <3 x float> %8
-  %hlsl.cross = call <3 x float> @llvm.dx.cross.v3f32(<3 x float> %p0, <3 x float> %p1)
-  ret <3 x float> %hlsl.cross
-}
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s
+
+; Make sure dxil operation function calls for cross are generated for half/float.
+
+declare <3 x half> @llvm.dx.cross.v3f16(<3 x half>, <3 x half>)
+declare <3 x float> @llvm.dx.cross.v3f32(<3 x float>, <3 x float>)
+
+define noundef <3 x half> @test_cross_half3(<3 x half> noundef %p0, <3 x half> noundef %p1) {
+entry:
+  ; CHECK: %x0 = extractelement <3 x half> %p0, i64 0
+  ; CHECK: %x1 = extractelement <3 x half> %p0, i64 1
+  ; CHECK: %x2 = extractelement <3 x half> %p0, i64 2
+  ; CHECK: %y0 = extractelement <3 x half> %p1, i64 0
+  ; CHECK: %y1 = extractelement <3 x half> %p1, i64 1
+  ; CHECK: %y2 = extractelement <3 x half> %p1, i64 2
+  ; CHECK: %0 = fmul half %x1, %y2
+  ; CHECK: %1 = fmul half %x2, %y1
+  ; CHECK: %hlsl.cross1 = fsub half %0, %1
+  ; CHECK: %2 = fmul half %x2, %y0
+  ; CHECK: %3 = fmul half %x0, %y2
+  ; CHECK: %hlsl.cross2 = fsub half %2, %3
+  ; CHECK: %4 = fmul half %x0, %y1
+  ; CHECK: %5 = fmul half %x1, %y0
+  ; CHECK: %hlsl.cross3 = fsub half %4, %5
+  ; CHECK: %6 = insertelement <3 x half> undef, half %hlsl.cross1, i64 0
+  ; CHECK: %7 = insertelement <3 x half> %6, half %hlsl.cross2, i64 1
+  ; CHECK: %8 = insertelement <3 x half> %7, half %hlsl.cross3, i64 2
+  ; CHECK: ret <3 x half> %8
+  %hlsl.cross = call <3 x half> @llvm.dx.cross.v3f16(<3 x half> %p0, <3 x half> %p1)
+  ret <3 x half> %hlsl.cross
+}
+
+define noundef <3 x float> @test_cross_float3(<3 x float> noundef %p0, <3 x float> noundef %p1) {
+entry:
+  ; CHECK: %x0 = extractelement <3 x float> %p0, i64 0
+  ; CHECK: %x1 = extractelement <3 x float> %p0, i64 1
+  ; CHECK: %x2 = extractelement <3 x float> %p0, i64 2
+  ; CHECK: %y0 = extractelement <3 x float> %p1, i64 0
+  ; CHECK: %y1 = extractelement <3 x float> %p1, i64 1
+  ; CHECK: %y2 = extractelement <3 x float> %p1, i64 2
+  ; CHECK: %0 = fmul float %x1, %y2
+  ; CHECK: %1 = fmul float %x2, %y1
+  ; CHECK: %hlsl.cross1 = fsub float %0, %1
+  ; CHECK: %2 = fmul float %x2, %y0
+  ; CHECK: %3 = fmul float %x0, %y2
+  ; CHECK: %hlsl.cross2 = fsub float %2, %3
+  ; CHECK: %4 = fmul float %x0, %y1
+  ; CHECK: %5 = fmul float %x1, %y0
+  ; CHECK: %hlsl.cross3 = fsub float %4, %5
+  ; CHECK: %6 = insertelement <3 x float> undef, float %hlsl.cross1, i64 0
+  ; CHECK: %7 = insertelement <3 x float> %6, float %hlsl.cross2, i64 1
+  ; CHECK: %8 = insertelement <3 x float> %7, float %hlsl.cross3, i64 2
+  ; CHECK: ret <3 x float> %8
+  %hlsl.cross = call <3 x float> @llvm.dx.cross.v3f32(<3 x float> %p0, <3 x float> %p1)
+  ret <3 x float> %hlsl.cross
+}
diff --git a/llvm/test/CodeGen/DirectX/normalize.ll b/llvm/test/CodeGen/DirectX/normalize.ll
index de106be12437..2aba9d5f74d7 100644
--- a/llvm/test/CodeGen/DirectX/normalize.ll
+++ b/llvm/test/CodeGen/DirectX/normalize.ll
@@ -1,112 +1,112 @@
-; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
-; RUN: opt -S  -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
-
-; Make sure dxil operation function calls for normalize are generated for half/float.
-
-declare half @llvm.dx.normalize.f16(half)
-declare <2 x half> @llvm.dx.normalize.v2f16(<2 x half>)
-declare <3 x half> @llvm.dx.normalize.v3f16(<3 x half>)
-declare <4 x half> @llvm.dx.normalize.v4f16(<4 x half>)
-
-declare float @llvm.dx.normalize.f32(float)
-declare <2 x float> @llvm.dx.normalize.v2f32(<2 x float>)
-declare <3 x float> @llvm.dx.normalize.v3f32(<3 x float>)
-declare <4 x float> @llvm.dx.normalize.v4f32(<4 x float>)
-
-define noundef half @test_normalize_half(half noundef %p0) {
-entry:
-  ; CHECK: fdiv half %p0, %p0
-  %hlsl.normalize = call half @llvm.dx.normalize.f16(half %p0)
-  ret half %hlsl.normalize
-}
-
-define noundef <2 x half> @test_normalize_half2(<2 x half> noundef %p0) {
-entry:
-  ; EXPCHECK: [[doth2:%.*]] = call half @llvm.dx.dot2.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
-  ; DOPCHECK: [[doth2:%.*]] = call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
-  ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth2]])
-  ; DOPCHECK: [[rsqrt:%.*]] = call half @dx.op.unary.f16(i32 25, half [[doth2]])
-  ; CHECK: [[splatinserth2:%.*]] = insertelement <2 x half> poison, half [[rsqrt]], i64 0
-  ; CHECK: [[splat:%.*]] = shufflevector <2 x half> [[splatinserth2]], <2 x half> poison, <2 x i32> zeroinitializer
-  ; CHECK: fmul <2 x half> %p0, [[splat]]
-
-  %hlsl.normalize = call <2 x half> @llvm.dx.normalize.v2f16(<2 x half> %p0)
-  ret <2 x half> %hlsl.normalize
-}
-
-define noundef <3 x half> @test_normalize_half3(<3 x half> noundef %p0) {
-entry:
-  ; EXPCHECK: [[doth3:%.*]] = call half @llvm.dx.dot3.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}})
-  ; DOPCHECK: [[doth3:%.*]] = call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
-  ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth3]])
-  ; DOPCHECK: [[rsqrt:%.*]] = call half @dx.op.unary.f16(i32 25, half [[doth3]])
-  ; CHECK: [[splatinserth3:%.*]] = insertelement <3 x half> poison, half [[rsqrt]], i64 0
-  ; CHECK: [[splat:%.*]] shufflevector <3 x half> [[splatinserth3]], <3 x half> poison, <3 x i32> zeroinitializer
-  ; CHECK: fmul <3 x half> %p0, %.splat
-
-  %hlsl.normalize = call <3 x half> @llvm.dx.normalize.v3f16(<3 x half> %p0)
-  ret <3 x half> %hlsl.normalize
-}
-
-define noundef <4 x half> @test_normalize_half4(<4 x half> noundef %p0) {
-entry:
-  ; EXPCHECK: [[doth4:%.*]] = call half @llvm.dx.dot4.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}})
-  ; DOPCHECK: [[doth4:%.*]] = call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
-  ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth4]])
-  ; DOPCHECK: [[rsqrt:%.*]] = call half @dx.op.unary.f16(i32 25, half [[doth4]])
-  ; CHECK: [[splatinserth4:%.*]] = insertelement <4 x half> poison, half [[rsqrt]], i64 0
-  ; CHECK: [[splat:%.*]] shufflevector <4 x half> [[splatinserth4]], <4 x half> poison, <4 x i32> zeroinitializer
-  ; CHECK: fmul <4 x half> %p0, %.splat
-
-  %hlsl.normalize = call <4 x half> @llvm.dx.normalize.v4f16(<4 x half> %p0)
-  ret <4 x half> %hlsl.normalize
-}
-
-define noundef float @test_normalize_float(float noundef %p0) {
-entry:
-  ; CHECK: fdiv float %p0, %p0
-  %hlsl.normalize = call float @llvm.dx.normalize.f32(float %p0)
-  ret float %hlsl.normalize
-}
-
-define noundef <2 x float> @test_normalize_float2(<2 x float> noundef %p0) {
-entry:
-  ; EXPCHECK: [[dotf2:%.*]] = call float @llvm.dx.dot2.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}})
-  ; DOPCHECK: [[dotf2:%.*]] = call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
-  ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf2]])
-  ; DOPCHECK: [[rsqrt:%.*]] = call float @dx.op.unary.f32(i32 25, float [[dotf2]])
-  ; CHECK: [[splatinsertf2:%.*]] = insertelement <2 x float> poison, float [[rsqrt]], i64 0
-  ; CHECK: [[splat:%.*]] shufflevector <2 x float> [[splatinsertf2]], <2 x float> poison, <2 x i32> zeroinitializer
-  ; CHECK: fmul <2 x float> %p0, %.splat
-
-  %hlsl.normalize = call <2 x float> @llvm.dx.normalize.v2f32(<2 x float> %p0)
-  ret <2 x float> %hlsl.normalize
-}
-
-define noundef <3 x float> @test_normalize_float3(<3 x float> noundef %p0) {
-entry:
-  ; EXPCHECK: [[dotf3:%.*]] = call float @llvm.dx.dot3.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}})
-  ; DOPCHECK: [[dotf3:%.*]] = call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
-  ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf3]])
-  ; DOPCHECK: [[rsqrt:%.*]] = call float @dx.op.unary.f32(i32 25, float [[dotf3]])
-  ; CHECK: [[splatinsertf3:%.*]] = insertelement <3 x float> poison, float [[rsqrt]], i64 0
-  ; CHECK: [[splat:%.*]] shufflevector <3 x float> [[splatinsertf3]], <3 x float> poison, <3 x i32> zeroinitializer
-  ; CHECK: fmul <3 x float> %p0, %.splat
-
-  %hlsl.normalize = call <3 x float> @llvm.dx.normalize.v3f32(<3 x float> %p0)
-  ret <3 x float> %hlsl.normalize
-}
-
-define noundef <4 x float> @test_normalize_float4(<4 x float> noundef %p0) {
-entry:
-  ; EXPCHECK: [[dotf4:%.*]] = call float @llvm.dx.dot4.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}})
-  ; DOPCHECK: [[dotf4:%.*]] = call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
-  ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf4]])
-  ; DOPCHECK: [[rsqrt:%.*]] = call float @dx.op.unary.f32(i32 25, float [[dotf4]])
-  ; CHECK: [[splatinsertf4:%.*]] = insertelement <4 x float> poison, float [[rsqrt]], i64 0
-  ; CHECK: [[splat:%.*]] shufflevector <4 x float> [[splatinsertf4]], <4 x float> poison, <4 x i32> zeroinitializer
-  ; CHECK: fmul <4 x float> %p0, %.splat
-
-  %hlsl.normalize = call <4 x float> @llvm.dx.normalize.v4f32(<4 x float> %p0)
-  ret <4 x float> %hlsl.normalize
-}
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
+; RUN: opt -S  -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
+
+; Make sure dxil operation function calls for normalize are generated for half/float.
+
+declare half @llvm.dx.normalize.f16(half)
+declare <2 x half> @llvm.dx.normalize.v2f16(<2 x half>)
+declare <3 x half> @llvm.dx.normalize.v3f16(<3 x half>)
+declare <4 x half> @llvm.dx.normalize.v4f16(<4 x half>)
+
+declare float @llvm.dx.normalize.f32(float)
+declare <2 x float> @llvm.dx.normalize.v2f32(<2 x float>)
+declare <3 x float> @llvm.dx.normalize.v3f32(<3 x float>)
+declare <4 x float> @llvm.dx.normalize.v4f32(<4 x float>)
+
+define noundef half @test_normalize_half(half noundef %p0) {
+entry:
+  ; CHECK: fdiv half %p0, %p0
+  %hlsl.normalize = call half @llvm.dx.normalize.f16(half %p0)
+  ret half %hlsl.normalize
+}
+
+define noundef <2 x half> @test_normalize_half2(<2 x half> noundef %p0) {
+entry:
+  ; EXPCHECK: [[doth2:%.*]] = call half @llvm.dx.dot2.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
+  ; DOPCHECK: [[doth2:%.*]] = call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+  ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth2]])
+  ; DOPCHECK: [[rsqrt:%.*]] = call half @dx.op.unary.f16(i32 25, half [[doth2]])
+  ; CHECK: [[splatinserth2:%.*]] = insertelement <2 x half> poison, half [[rsqrt]], i64 0
+  ; CHECK: [[splat:%.*]] = shufflevector <2 x half> [[splatinserth2]], <2 x half> poison, <2 x i32> zeroinitializer
+  ; CHECK: fmul <2 x half> %p0, [[splat]]
+
+  %hlsl.normalize = call <2 x half> @llvm.dx.normalize.v2f16(<2 x half> %p0)
+  ret <2 x half> %hlsl.normalize
+}
+
+define noundef <3 x half> @test_normalize_half3(<3 x half> noundef %p0) {
+entry:
+  ; EXPCHECK: [[doth3:%.*]] = call half @llvm.dx.dot3.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}})
+  ; DOPCHECK: [[doth3:%.*]] = call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+  ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth3]])
+  ; DOPCHECK: [[rsqrt:%.*]] = call half @dx.op.unary.f16(i32 25, half [[doth3]])
+  ; CHECK: [[splatinserth3:%.*]] = insertelement <3 x half> poison, half [[rsqrt]], i64 0
+  ; CHECK: [[splat:%.*]] shufflevector <3 x half> [[splatinserth3]], <3 x half> poison, <3 x i32> zeroinitializer
+  ; CHECK: fmul <3 x half> %p0, %.splat
+
+  %hlsl.normalize = call <3 x half> @llvm.dx.normalize.v3f16(<3 x half> %p0)
+  ret <3 x half> %hlsl.normalize
+}
+
+define noundef <4 x half> @test_normalize_half4(<4 x half> noundef %p0) {
+entry:
+  ; EXPCHECK: [[doth4:%.*]] = call half @llvm.dx.dot4.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}})
+  ; DOPCHECK: [[doth4:%.*]] = call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+  ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth4]])
+  ; DOPCHECK: [[rsqrt:%.*]] = call half @dx.op.unary.f16(i32 25, half [[doth4]])
+  ; CHECK: [[splatinserth4:%.*]] = insertelement <4 x half> poison, half [[rsqrt]], i64 0
+  ; CHECK: [[splat:%.*]] shufflevector <4 x half> [[splatinserth4]], <4 x half> poison, <4 x i32> zeroinitializer
+  ; CHECK: fmul <4 x half> %p0, %.splat
+
+  %hlsl.normalize = call <4 x half> @llvm.dx.normalize.v4f16(<4 x half> %p0)
+  ret <4 x half> %hlsl.normalize
+}
+
+define noundef float @test_normalize_float(float noundef %p0) {
+entry:
+  ; CHECK: fdiv float %p0, %p0
+  %hlsl.normalize = call float @llvm.dx.normalize.f32(float %p0)
+  ret float %hlsl.normalize
+}
+
+define noundef <2 x float> @test_normalize_float2(<2 x float> noundef %p0) {
+entry:
+  ; EXPCHECK: [[dotf2:%.*]] = call float @llvm.dx.dot2.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}})
+  ; DOPCHECK: [[dotf2:%.*]] = call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+  ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf2]])
+  ; DOPCHECK: [[rsqrt:%.*]] = call float @dx.op.unary.f32(i32 25, float [[dotf2]])
+  ; CHECK: [[splatinsertf2:%.*]] = insertelement <2 x float> poison, float [[rsqrt]], i64 0
+  ; CHECK: [[splat:%.*]] shufflevector <2 x float> [[splatinsertf2]], <2 x float> poison, <2 x i32> zeroinitializer
+  ; CHECK: fmul <2 x float> %p0, %.splat
+
+  %hlsl.normalize = call <2 x float> @llvm.dx.normalize.v2f32(<2 x float> %p0)
+  ret <2 x float> %hlsl.normalize
+}
+
+define noundef <3 x float> @test_normalize_float3(<3 x float> noundef %p0) {
+entry:
+  ; EXPCHECK: [[dotf3:%.*]] = call float @llvm.dx.dot3.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}})
+  ; DOPCHECK: [[dotf3:%.*]] = call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+  ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf3]])
+  ; DOPCHECK: [[rsqrt:%.*]] = call float @dx.op.unary.f32(i32 25, float [[dotf3]])
+  ; CHECK: [[splatinsertf3:%.*]] = insertelement <3 x float> poison, float [[rsqrt]], i64 0
+  ; CHECK: [[splat:%.*]] shufflevector <3 x float> [[splatinsertf3]], <3 x float> poison, <3 x i32> zeroinitializer
+  ; CHECK: fmul <3 x float> %p0, %.splat
+
+  %hlsl.normalize = call <3 x float> @llvm.dx.normalize.v3f32(<3 x float> %p0)
+  ret <3 x float> %hlsl.normalize
+}
+
+define noundef <4 x float> @test_normalize_float4(<4 x float> noundef %p0) {
+entry:
+  ; EXPCHECK: [[dotf4:%.*]] = call float @llvm.dx.dot4.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}})
+  ; DOPCHECK: [[dotf4:%.*]] = call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+  ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf4]])
+  ; DOPCHECK: [[rsqrt:%.*]] = call float @dx.op.unary.f32(i32 25, float [[dotf4]])
+  ; CHECK: [[splatinsertf4:%.*]] = insertelement <4 x float> poison, float [[rsqrt]], i64 0
+  ; CHECK: [[splat:%.*]] shufflevector <4 x float> [[splatinsertf4]], <4 x float> poison, <4 x i32> zeroinitializer
+  ; CHECK: fmul <4 x float> %p0, %.splat
+
+  %hlsl.normalize = call <4 x float> @llvm.dx.normalize.v4f32(<4 x float> %p0)
+  ret <4 x float> %hlsl.normalize
+}
diff --git a/llvm/test/CodeGen/DirectX/normalize_error.ll b/llvm/test/CodeGen/DirectX/normalize_error.ll
index 3041d2ecdd92..35a91c0cdc24 100644
--- a/llvm/test/CodeGen/DirectX/normalize_error.ll
+++ b/llvm/test/CodeGen/DirectX/normalize_error.ll
@@ -1,10 +1,10 @@
-; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation normalize does not support double overload type
-; CHECK: Cannot create Dot2 operation: Invalid overload type
-
-define noundef <2 x double> @test_normalize_double2(<2 x double> noundef %p0) {
-entry:
-  %hlsl.normalize = call <2 x double> @llvm.dx.normalize.v2f32(<2 x double> %p0)
-  ret <2 x double> %hlsl.normalize
-}
+; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation normalize does not support double overload type
+; CHECK: Cannot create Dot2 operation: Invalid overload type
+
+define noundef <2 x double> @test_normalize_double2(<2 x double> noundef %p0) {
+entry:
+  %hlsl.normalize = call <2 x double> @llvm.dx.normalize.v2f32(<2 x double> %p0)
+  ret <2 x double> %hlsl.normalize
+}
diff --git a/llvm/test/CodeGen/DirectX/step.ll b/llvm/test/CodeGen/DirectX/step.ll
index 6a9b5bf71da8..1c9894026c62 100644
--- a/llvm/test/CodeGen/DirectX/step.ll
+++ b/llvm/test/CodeGen/DirectX/step.ll
@@ -1,78 +1,78 @@
-; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s --check-prefix=CHECK
-; RUN: opt -S  -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
-
-; Make sure dxil operation function calls for step are generated for half/float.
-
-declare half @llvm.dx.step.f16(half, half)
-declare <2 x half> @llvm.dx.step.v2f16(<2 x half>, <2 x half>)
-declare <3 x half> @llvm.dx.step.v3f16(<3 x half>, <3 x half>)
-declare <4 x half> @llvm.dx.step.v4f16(<4 x half>, <4 x half>)
-
-declare float @llvm.dx.step.f32(float, float)
-declare <2 x float> @llvm.dx.step.v2f32(<2 x float>, <2 x float>)
-declare <3 x float> @llvm.dx.step.v3f32(<3 x float>, <3 x float>)
-declare <4 x float> @llvm.dx.step.v4f32(<4 x float>, <4 x float>)
-
-define noundef half @test_step_half(half noundef %p0, half noundef %p1) {
-entry:
-  ; CHECK: %0 = fcmp olt half %p1, %p0
-  ; CHECK: %1 = select i1 %0, half 0xH0000, half 0xH3C00
-  %hlsl.step = call half @llvm.dx.step.f16(half %p0, half %p1)
-  ret half %hlsl.step
-}
-
-define noundef <2 x half> @test_step_half2(<2 x half> noundef %p0, <2 x half> noundef %p1) {
-entry:
-  ; CHECK: %0 = fcmp olt <2 x half> %p1, %p0
-  ; CHECK: %1 = select <2 x i1> %0, <2 x half> zeroinitializer, <2 x half> <half 0xH3C00, half 0xH3C00>
-  %hlsl.step = call <2 x half> @llvm.dx.step.v2f16(<2 x half> %p0, <2 x half> %p1)
-  ret <2 x half> %hlsl.step
-}
-
-define noundef <3 x half> @test_step_half3(<3 x half> noundef %p0, <3 x half> noundef %p1) {
-entry:
-  ; CHECK: %0 = fcmp olt <3 x half> %p1, %p0
-  ; CHECK: %1 = select <3 x i1> %0, <3 x half> zeroinitializer, <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>
-  %hlsl.step = call <3 x half> @llvm.dx.step.v3f16(<3 x half> %p0, <3 x half> %p1)
-  ret <3 x half> %hlsl.step
-}
-
-define noundef <4 x half> @test_step_half4(<4 x half> noundef %p0, <4 x half> noundef %p1) {
-entry:
-  ; CHECK: %0 = fcmp olt <4 x half> %p1, %p0
-  ; CHECK: %1 = select <4 x i1> %0, <4 x half> zeroinitializer, <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
-  %hlsl.step = call <4 x half> @llvm.dx.step.v4f16(<4 x half> %p0, <4 x half> %p1)
-  ret <4 x half> %hlsl.step
-}
-
-define noundef float @test_step_float(float noundef %p0, float noundef %p1) {
-entry:
-  ; CHECK: %0 = fcmp olt float %p1, %p0
-  ; CHECK: %1 = select i1 %0, float 0.000000e+00, float 1.000000e+00
-  %hlsl.step = call float @llvm.dx.step.f32(float %p0, float %p1)
-  ret float %hlsl.step
-}
-
-define noundef <2 x float> @test_step_float2(<2 x float> noundef %p0, <2 x float> noundef %p1) {
-entry:
-  ; CHECK: %0 = fcmp olt <2 x float> %p1, %p0
-  ; CHECK: %1 = select <2 x i1> %0, <2 x float> zeroinitializer, <2 x float> <float 1.000000e+00, float 1.000000e+00>
-  %hlsl.step = call <2 x float> @llvm.dx.step.v2f32(<2 x float> %p0, <2 x float> %p1)
-  ret <2 x float> %hlsl.step
-}
-
-define noundef <3 x float> @test_step_float3(<3 x float> noundef %p0, <3 x float> noundef %p1) {
-entry:
-  ; CHECK: %0 = fcmp olt <3 x float> %p1, %p0
-  ; CHECK: %1 = select <3 x i1> %0, <3 x float> zeroinitializer, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-  %hlsl.step = call <3 x float> @llvm.dx.step.v3f32(<3 x float> %p0, <3 x float> %p1)
-  ret <3 x float> %hlsl.step
-}
-
-define noundef <4 x float> @test_step_float4(<4 x float> noundef %p0, <4 x float> noundef %p1) {
-entry:
-  ; CHECK: %0 = fcmp olt <4 x float> %p1, %p0
-  ; CHECK: %1 = select <4 x i1> %0, <4 x float> zeroinitializer, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-  %hlsl.step = call <4 x float> @llvm.dx.step.v4f32(<4 x float> %p0, <4 x float> %p1)
-  ret <4 x float> %hlsl.step
-}
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -S  -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+
+; Make sure dxil operation function calls for step are generated for half/float.
+
+declare half @llvm.dx.step.f16(half, half)
+declare <2 x half> @llvm.dx.step.v2f16(<2 x half>, <2 x half>)
+declare <3 x half> @llvm.dx.step.v3f16(<3 x half>, <3 x half>)
+declare <4 x half> @llvm.dx.step.v4f16(<4 x half>, <4 x half>)
+
+declare float @llvm.dx.step.f32(float, float)
+declare <2 x float> @llvm.dx.step.v2f32(<2 x float>, <2 x float>)
+declare <3 x float> @llvm.dx.step.v3f32(<3 x float>, <3 x float>)
+declare <4 x float> @llvm.dx.step.v4f32(<4 x float>, <4 x float>)
+
+define noundef half @test_step_half(half noundef %p0, half noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt half %p1, %p0
+  ; CHECK: %1 = select i1 %0, half 0xH0000, half 0xH3C00
+  %hlsl.step = call half @llvm.dx.step.f16(half %p0, half %p1)
+  ret half %hlsl.step
+}
+
+define noundef <2 x half> @test_step_half2(<2 x half> noundef %p0, <2 x half> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <2 x half> %p1, %p0
+  ; CHECK: %1 = select <2 x i1> %0, <2 x half> zeroinitializer, <2 x half> <half 0xH3C00, half 0xH3C00>
+  %hlsl.step = call <2 x half> @llvm.dx.step.v2f16(<2 x half> %p0, <2 x half> %p1)
+  ret <2 x half> %hlsl.step
+}
+
+define noundef <3 x half> @test_step_half3(<3 x half> noundef %p0, <3 x half> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <3 x half> %p1, %p0
+  ; CHECK: %1 = select <3 x i1> %0, <3 x half> zeroinitializer, <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>
+  %hlsl.step = call <3 x half> @llvm.dx.step.v3f16(<3 x half> %p0, <3 x half> %p1)
+  ret <3 x half> %hlsl.step
+}
+
+define noundef <4 x half> @test_step_half4(<4 x half> noundef %p0, <4 x half> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <4 x half> %p1, %p0
+  ; CHECK: %1 = select <4 x i1> %0, <4 x half> zeroinitializer, <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
+  %hlsl.step = call <4 x half> @llvm.dx.step.v4f16(<4 x half> %p0, <4 x half> %p1)
+  ret <4 x half> %hlsl.step
+}
+
+define noundef float @test_step_float(float noundef %p0, float noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt float %p1, %p0
+  ; CHECK: %1 = select i1 %0, float 0.000000e+00, float 1.000000e+00
+  %hlsl.step = call float @llvm.dx.step.f32(float %p0, float %p1)
+  ret float %hlsl.step
+}
+
+define noundef <2 x float> @test_step_float2(<2 x float> noundef %p0, <2 x float> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <2 x float> %p1, %p0
+  ; CHECK: %1 = select <2 x i1> %0, <2 x float> zeroinitializer, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+  %hlsl.step = call <2 x float> @llvm.dx.step.v2f32(<2 x float> %p0, <2 x float> %p1)
+  ret <2 x float> %hlsl.step
+}
+
+define noundef <3 x float> @test_step_float3(<3 x float> noundef %p0, <3 x float> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <3 x float> %p1, %p0
+  ; CHECK: %1 = select <3 x i1> %0, <3 x float> zeroinitializer, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %hlsl.step = call <3 x float> @llvm.dx.step.v3f32(<3 x float> %p0, <3 x float> %p1)
+  ret <3 x float> %hlsl.step
+}
+
+define noundef <4 x float> @test_step_float4(<4 x float> noundef %p0, <4 x float> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <4 x float> %p1, %p0
+  ; CHECK: %1 = select <4 x i1> %0, <4 x float> zeroinitializer, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %hlsl.step = call <4 x float> @llvm.dx.step.v4f32(<4 x float> %p0, <4 x float> %p1)
+  ret <4 x float> %hlsl.step
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll
index a0306bae4a22..bdbfc133efa2 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll
@@ -1,49 +1,49 @@
-; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-
-; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
-; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
-; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
-; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
-; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
-
-define noundef float @atan2_float(float noundef %a, float noundef %b) {
-entry:
-; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]]
-  %elt.atan2 = call float @llvm.atan2.f32(float %a, float %b)
-  ret float %elt.atan2
-}
-
-define noundef half @atan2_half(half noundef %a, half noundef %b) {
-entry:
-; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]]
-  %elt.atan2 = call half @llvm.atan2.f16(half %a, half %b)
-  ret half %elt.atan2
-}
-
-define noundef <4 x float> @atan2_float4(<4 x float> noundef %a, <4 x float> noundef %b) {
-entry:
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
-  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
-  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]]
-  %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %a, <4 x float> %b)
-  ret <4 x float> %elt.atan2
-}
-
-define noundef <4 x half> @atan2_half4(<4 x half> noundef %a, <4 x half> noundef %b) {
-entry:
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
-  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
-  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]]
-  %elt.atan2 = call <4 x half> @llvm.atan2.v4f16(<4 x half> %a, <4 x half> %b)
-  ret <4 x half> %elt.atan2
-}
-
-declare half @llvm.atan2.f16(half, half)
-declare float @llvm.atan2.f32(float, float)
-declare <4 x half> @llvm.atan2.v4f16(<4 x half>, <4 x half>)
-declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>)
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+
+define noundef float @atan2_float(float noundef %a, float noundef %b) {
+entry:
+; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]]
+  %elt.atan2 = call float @llvm.atan2.f32(float %a, float %b)
+  ret float %elt.atan2
+}
+
+define noundef half @atan2_half(half noundef %a, half noundef %b) {
+entry:
+; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]]
+  %elt.atan2 = call half @llvm.atan2.f16(half %a, half %b)
+  ret half %elt.atan2
+}
+
+define noundef <4 x float> @atan2_float4(<4 x float> noundef %a, <4 x float> noundef %b) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]]
+  %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %elt.atan2
+}
+
+define noundef <4 x half> @atan2_half4(<4 x half> noundef %a, <4 x half> noundef %b) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]]
+  %elt.atan2 = call <4 x half> @llvm.atan2.v4f16(<4 x half> %a, <4 x half> %b)
+  ret <4 x half> %elt.atan2
+}
+
+declare half @llvm.atan2.f16(half, half)
+declare float @llvm.atan2.f32(float, float)
+declare <4 x half> @llvm.atan2.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll
index 7c06c14bb968..2e0eb8c429ac 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll
@@ -1,33 +1,33 @@
-; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-
-; Make sure SPIRV operation function calls for cross are lowered correctly.
-
-; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
-; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
-; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
-; CHECK-DAG: %[[#vec3_float_16:]] = OpTypeVector %[[#float_16]] 3
-; CHECK-DAG: %[[#vec3_float_32:]] = OpTypeVector %[[#float_32]] 3
-
-define noundef <3 x half> @cross_half4(<3 x half> noundef %a, <3 x half> noundef %b) {
-entry:
-  ; CHECK: %[[#]] = OpFunction %[[#vec3_float_16]] None %[[#]]
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_16]]
-  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec3_float_16]]
-  ; CHECK: %[[#]] = OpExtInst %[[#vec3_float_16]] %[[#op_ext_glsl]] Cross %[[#arg0]] %[[#arg1]]
-  %hlsl.cross = call <3 x half> @llvm.spv.cross.v4f16(<3 x half> %a, <3 x half> %b)
-  ret <3 x half> %hlsl.cross
-}
-
-define noundef <3 x float> @cross_float4(<3 x float> noundef %a, <3 x float> noundef %b) {
-entry:
-  ; CHECK: %[[#]] = OpFunction %[[#vec3_float_32]] None %[[#]]
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_32]]
-  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec3_float_32]]
-  ; CHECK: %[[#]] = OpExtInst %[[#vec3_float_32]] %[[#op_ext_glsl]] Cross %[[#arg0]] %[[#arg1]]
-  %hlsl.cross = call <3 x float> @llvm.spv.cross.v4f32(<3 x float> %a, <3 x float> %b)
-  ret <3 x float> %hlsl.cross
-}
-
-declare <3 x half> @llvm.spv.cross.v4f16(<3 x half>, <3 x half>)
-declare <3 x float> @llvm.spv.cross.v4f32(<3 x float>, <3 x float>)
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Make sure SPIRV operation function calls for cross are lowered correctly.
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec3_float_16:]] = OpTypeVector %[[#float_16]] 3
+; CHECK-DAG: %[[#vec3_float_32:]] = OpTypeVector %[[#float_32]] 3
+
+define noundef <3 x half> @cross_half4(<3 x half> noundef %a, <3 x half> noundef %b) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec3_float_16]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_16]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec3_float_16]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec3_float_16]] %[[#op_ext_glsl]] Cross %[[#arg0]] %[[#arg1]]
+  %hlsl.cross = call <3 x half> @llvm.spv.cross.v4f16(<3 x half> %a, <3 x half> %b)
+  ret <3 x half> %hlsl.cross
+}
+
+define noundef <3 x float> @cross_float4(<3 x float> noundef %a, <3 x float> noundef %b) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec3_float_32]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_32]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec3_float_32]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec3_float_32]] %[[#op_ext_glsl]] Cross %[[#arg0]] %[[#arg1]]
+  %hlsl.cross = call <3 x float> @llvm.spv.cross.v4f32(<3 x float> %a, <3 x float> %b)
+  ret <3 x float> %hlsl.cross
+}
+
+declare <3 x half> @llvm.spv.cross.v4f16(<3 x half>, <3 x half>)
+declare <3 x float> @llvm.spv.cross.v4f32(<3 x float>, <3 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll
index df1ef3a7287c..b4a9d8e0664b 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll
@@ -1,29 +1,29 @@
-; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-
-; Make sure SPIRV operation function calls for length are lowered correctly.
-
-; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
-; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
-; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
-; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
-; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
-
-define noundef half @length_half4(<4 x half> noundef %a) {
-entry:
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
-  ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Length %[[#arg0]]
-  %hlsl.length = call half @llvm.spv.length.v4f16(<4 x half> %a)
-  ret half %hlsl.length
-}
-
-define noundef float @length_float4(<4 x float> noundef %a) {
-entry:
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
-  ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Length %[[#arg0]]
-  %hlsl.length = call float @llvm.spv.length.v4f32(<4 x float> %a)
-  ret float %hlsl.length
-}
-
-declare half @llvm.spv.length.v4f16(<4 x half>)
-declare float @llvm.spv.length.v4f32(<4 x float>)
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Make sure SPIRV operation function calls for length are lowered correctly.
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+
+define noundef half @length_half4(<4 x half> noundef %a) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Length %[[#arg0]]
+  %hlsl.length = call half @llvm.spv.length.v4f16(<4 x half> %a)
+  ret half %hlsl.length
+}
+
+define noundef float @length_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Length %[[#arg0]]
+  %hlsl.length = call float @llvm.spv.length.v4f32(<4 x float> %a)
+  ret float %hlsl.length
+}
+
+declare half @llvm.spv.length.v4f16(<4 x half>)
+declare float @llvm.spv.length.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/normalize.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/normalize.ll
index 4659b5146e43..fa73b9c2a4d3 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/normalize.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/normalize.ll
@@ -1,31 +1,31 @@
-; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-
-; Make sure SPIRV operation function calls for normalize are lowered correctly.
-
-; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
-; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
-; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
-; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
-; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
-
-define noundef <4 x half> @normalize_half4(<4 x half> noundef %a) {
-entry:
-  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]]
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
-  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Normalize %[[#arg0]]
-  %hlsl.normalize = call <4 x half> @llvm.spv.normalize.v4f16(<4 x half> %a)
-  ret <4 x half> %hlsl.normalize
-}
-
-define noundef <4 x float> @normalize_float4(<4 x float> noundef %a) {
-entry:
-  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]]
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
-  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Normalize %[[#arg0]]
-  %hlsl.normalize = call <4 x float> @llvm.spv.normalize.v4f32(<4 x float> %a)
-  ret <4 x float> %hlsl.normalize
-}
-
-declare <4 x half> @llvm.spv.normalize.v4f16(<4 x half>)
-declare <4 x float> @llvm.spv.normalize.v4f32(<4 x float>)
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Make sure SPIRV operation function calls for normalize are lowered correctly.
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+
+define noundef <4 x half> @normalize_half4(<4 x half> noundef %a) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Normalize %[[#arg0]]
+  %hlsl.normalize = call <4 x half> @llvm.spv.normalize.v4f16(<4 x half> %a)
+  ret <4 x half> %hlsl.normalize
+}
+
+define noundef <4 x float> @normalize_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Normalize %[[#arg0]]
+  %hlsl.normalize = call <4 x float> @llvm.spv.normalize.v4f32(<4 x float> %a)
+  ret <4 x float> %hlsl.normalize
+}
+
+declare <4 x half> @llvm.spv.normalize.v4f16(<4 x half>)
+declare <4 x float> @llvm.spv.normalize.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll
index 7c0ee9398d15..bb50d8c790f8 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll
@@ -1,33 +1,33 @@
-; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-
-; Make sure SPIRV operation function calls for step are lowered correctly.
-
-; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
-; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
-; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
-; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
-; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
-
-define noundef <4 x half> @step_half4(<4 x half> noundef %a, <4 x half> noundef %b) {
-entry:
-  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]]
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
-  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]]
-  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Step %[[#arg0]] %[[#arg1]]
-  %hlsl.step = call <4 x half> @llvm.spv.step.v4f16(<4 x half> %a, <4 x half> %b)
-  ret <4 x half> %hlsl.step
-}
-
-define noundef <4 x float> @step_float4(<4 x float> noundef %a, <4 x float> noundef %b) {
-entry:
-  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]]
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
-  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]]
-  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Step %[[#arg0]] %[[#arg1]]
-  %hlsl.step = call <4 x float> @llvm.spv.step.v4f32(<4 x float> %a, <4 x float> %b)
-  ret <4 x float> %hlsl.step
-}
-
-declare <4 x half> @llvm.spv.step.v4f16(<4 x half>, <4 x half>)
-declare <4 x float> @llvm.spv.step.v4f32(<4 x float>, <4 x float>)
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Make sure SPIRV operation function calls for step are lowered correctly.
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+
+define noundef <4 x half> @step_half4(<4 x half> noundef %a, <4 x half> noundef %b) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Step %[[#arg0]] %[[#arg1]]
+  %hlsl.step = call <4 x half> @llvm.spv.step.v4f16(<4 x half> %a, <4 x half> %b)
+  ret <4 x half> %hlsl.step
+}
+
+define noundef <4 x float> @step_float4(<4 x float> noundef %a, <4 x float> noundef %b) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Step %[[#arg0]] %[[#arg1]]
+  %hlsl.step = call <4 x float> @llvm.spv.step.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %hlsl.step
+}
+
+declare <4 x half> @llvm.spv.step.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.spv.step.v4f32(<4 x float>, <4 x float>)
diff --git a/llvm/test/Demangle/ms-placeholder-return-type.test b/llvm/test/Demangle/ms-placeholder-return-type.test
index a656400fe140..18038e636c8d 100644
--- a/llvm/test/Demangle/ms-placeholder-return-type.test
+++ b/llvm/test/Demangle/ms-placeholder-return-type.test
@@ -1,18 +1,18 @@
-; RUN: llvm-undname < %s | FileCheck %s
-
-; CHECK-NOT: Invalid mangled name
-
-?TestNonTemplateAuto@@YA@XZ
-; CHECK: __cdecl TestNonTemplateAuto(void)
-
-??$AutoT@X@@YA?A_PXZ
-; CHECK: auto __cdecl AutoT<void>(void)
-
-??$AutoT@X@@YA?B_PXZ
-; CHECK: auto const __cdecl AutoT<void>(void)
-
-??$AutoT@X@@YA?A_TXZ
-; CHECK: decltype(auto) __cdecl AutoT<void>(void)
-
-??$AutoT@X@@YA?B_TXZ
-; CHECK: decltype(auto) const __cdecl AutoT<void>(void)
+; RUN: llvm-undname < %s | FileCheck %s
+
+; CHECK-NOT: Invalid mangled name
+
+?TestNonTemplateAuto@@YA@XZ
+; CHECK: __cdecl TestNonTemplateAuto(void)
+
+??$AutoT@X@@YA?A_PXZ
+; CHECK: auto __cdecl AutoT<void>(void)
+
+??$AutoT@X@@YA?B_PXZ
+; CHECK: auto const __cdecl AutoT<void>(void)
+
+??$AutoT@X@@YA?A_TXZ
+; CHECK: decltype(auto) __cdecl AutoT<void>(void)
+
+??$AutoT@X@@YA?B_TXZ
+; CHECK: decltype(auto) const __cdecl AutoT<void>(void)
diff --git a/llvm/test/FileCheck/.gitattributes b/llvm/test/FileCheck/.gitattributes
deleted file mode 100644
index ba27d7fad76d..000000000000
--- a/llvm/test/FileCheck/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-dos-style-eol.txt text eol=crlf
diff --git a/llvm/test/FileCheck/dos-style-eol.txt b/llvm/test/FileCheck/dos-style-eol.txt
index 52184f465c3f..4252aad4d3e7 100644
--- a/llvm/test/FileCheck/dos-style-eol.txt
+++ b/llvm/test/FileCheck/dos-style-eol.txt
@@ -1,11 +1,11 @@
-// Test for using FileCheck on DOS style end-of-line
-// This test was deliberately committed with DOS style end of line.
-// Don't change line endings!
-// RUN: FileCheck -input-file %s %s
-// RUN: FileCheck  --strict-whitespace -input-file %s %s
-
-LINE 1
-; CHECK: {{^}}LINE 1{{$}}
-
-LINE 2
+// Test for using FileCheck on DOS style end-of-line
+// This test was deliberately committed with DOS style end of line.
+// Don't change line endings!
+// RUN: FileCheck -input-file %s %s
+// RUN: FileCheck  --strict-whitespace -input-file %s %s
+
+LINE 1
+; CHECK: {{^}}LINE 1{{$}}
+
+LINE 2
 ; CHECK: {{^}}LINE 2{{$}}
 \ No newline at end of file
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
index 8c50d86489c9..7dcab6d807cf 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
@@ -11,8 +11,7 @@ define void @f1(ptr %A) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 93034f4dbe56..5496eed16e54 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -11,10 +11,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen(
 ; TFNONE-NEXT:  entry:
-; TFNONE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; TFNONE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
-; TFNONE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; TFNONE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TFNONE:       vector.ph:
 ; TFNONE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; TFNONE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -146,10 +143,7 @@ for.cond.cleanup:
 define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_if_then(
 ; TFNONE-NEXT:  entry:
-; TFNONE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; TFNONE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
-; TFNONE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; TFNONE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TFNONE:       vector.ph:
 ; TFNONE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; TFNONE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -310,10 +304,7 @@ for.cond.cleanup:
 define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen_if_then_else(
 ; TFNONE-NEXT:  entry:
-; TFNONE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; TFNONE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
-; TFNONE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; TFNONE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TFNONE:       vector.ph:
 ; TFNONE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; TFNONE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -490,10 +481,7 @@ for.cond.cleanup:
 define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen_nomask(
 ; TFNONE-NEXT:  entry:
-; TFNONE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; TFNONE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
-; TFNONE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; TFNONE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TFNONE:       vector.ph:
 ; TFNONE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; TFNONE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -548,11 +536,6 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
 ;
 ; TFFALLBACK-LABEL: @test_widen_nomask(
 ; TFFALLBACK-NEXT:  entry:
-; TFFALLBACK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; TFFALLBACK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; TFFALLBACK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
-; TFFALLBACK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; TFFALLBACK:       vector.ph:
 ; TFFALLBACK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; TFFALLBACK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
 ; TFFALLBACK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
@@ -561,7 +544,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
 ; TFFALLBACK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
 ; TFFALLBACK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFFALLBACK:       vector.body:
-; TFFALLBACK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; TFFALLBACK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; TFFALLBACK-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
 ; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8
 ; TFFALLBACK-NEXT:    [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
@@ -569,12 +552,9 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
 ; TFFALLBACK-NEXT:    store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
 ; TFFALLBACK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
 ; TFFALLBACK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; TFFALLBACK-NEXT:    br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
-; TFFALLBACK:       scalar.ph:
-; TFFALLBACK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[VECTOR_BODY]] ]
-; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
+; TFFALLBACK-NEXT:    br i1 [[TMP9]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; TFFALLBACK:       for.body:
-; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[VECTOR_BODY]] ]
 ; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
 ; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
 ; TFFALLBACK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]]
@@ -626,10 +606,7 @@ for.cond.cleanup:
 define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen_optmask(
 ; TFNONE-NEXT:  entry:
-; TFNONE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; TFNONE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
-; TFNONE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; TFNONE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TFNONE:       vector.ph:
 ; TFNONE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; TFNONE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
@@ -791,10 +768,7 @@ for.cond.cleanup:
 define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, double %m) #4 {
 ; TFNONE-LABEL: @test_widen_fmuladd_and_call(
 ; TFNONE-NEXT:  entry:
-; TFNONE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; TFNONE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
-; TFNONE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; TFNONE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TFNONE:       vector.ph:
 ; TFNONE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; TFNONE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index 0e95d742092e..d18cdc1ae617 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -10,8 +10,7 @@ define void @test_invar_gep(ptr %dst) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP1]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 94b90aa3cfb3..1d150141e625 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -757,8 +757,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll
index 4a2f9d07ed91..4a3bc4679bba 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll
@@ -7,10 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @test_widen(ptr noalias %a, ptr readnone %b) #1 {
 ; WIDE-LABEL: @test_widen(
 ; WIDE-NEXT:  entry:
-; WIDE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; WIDE-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; WIDE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]]
-; WIDE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; WIDE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; WIDE:       vector.ph:
 ; WIDE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; WIDE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
index 2dd47d5c1ea8..322a6c16871a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
@@ -422,28 +422,37 @@ define void @no_high_lmul_or_interleave(ptr %p) {
 ; IF-EVL-NEXT:  entry:
 ; IF-EVL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; IF-EVL:       vector.ph:
+; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP7]], 1
+; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 3002, [[TMP1]]
+; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
+; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
 ; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; IF-EVL:       vector.body:
 ; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; IF-EVL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
-; IF-EVL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; IF-EVL-NEXT:    [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
-; IF-EVL-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], <i64 3001, i64 3001, i64 3001, i64 3001>
+; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 3002, [[EVL_BASED_IV]]
+; IF-EVL-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[AVL]], 1024
+; IF-EVL-NEXT:    [[SAFE_AVL:%.*]] = select i1 [[TMP9]], i64 [[AVL]], i64 1024
+; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[SAFE_AVL]], i32 1, i1 true)
+; IF-EVL-NEXT:    [[TMP0:%.*]] = add i64 [[EVL_BASED_IV]], 0
 ; IF-EVL-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
 ; IF-EVL-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0
-; IF-EVL-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr [[TMP3]], i32 32, <4 x i1> [[TMP1]], <4 x i64> poison)
+; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr align 32 [[TMP3]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]])
 ; IF-EVL-NEXT:    [[TMP4:%.*]] = add i64 [[TMP0]], 1024
 ; IF-EVL-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP4]]
 ; IF-EVL-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0
-; IF-EVL-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_MASKED_LOAD]], ptr [[TMP6]], i32 32, <4 x i1> [[TMP1]])
-; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; IF-EVL-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 3004
-; IF-EVL-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; IF-EVL-NEXT:    call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP6]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]])
+; IF-EVL-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
+; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
+; IF-EVL-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; IF-EVL:       middle.block:
 ; IF-EVL-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; IF-EVL:       scalar.ph:
-; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 3004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; IF-EVL-NEXT:    br label [[LOOP:%.*]]
 ; IF-EVL:       loop:
 ; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
index 383b62b368ef..330cdeaeb7c2 100644
--- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
@@ -1659,6 +1659,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i64 %1
 }
 
+; FIXME: %indvars.iv.next is poison on first iteration due to sub nuw 0, 1.
 define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly {
 ; CHECK-LABEL: define i32 @fcmp_0_sub_select1(
 ; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
@@ -1668,8 +1669,7 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly {
 ; CHECK:       [[FOR_HEADER]]:
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 0, [[ZEXT]]
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
index b3ec3e8f0f3c..5e65832aba8c 100644
--- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
@@ -415,6 +415,7 @@ exit:
 
 ; Test case to make sure that uses of versioned strides of type i1 are properly
 ; extended. From https://github.com/llvm/llvm-project/issues/91369.
+; TODO: Better check (udiv i64 15, %g.64) after checking if %g == 1.
 define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
 ; CHECK-LABEL: define void @zext_of_i1_stride(
 ; CHECK-SAME: i1 [[G:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -423,8 +424,7 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
 ; CHECK-NEXT:    [[G_64:%.*]] = zext i1 [[G]] to i64
 ; CHECK-NEXT:    [[TMP0:%.*]] = udiv i64 15, [[G_64]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
 ; CHECK:       vector.scevcheck:
 ; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true
 ; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
@@ -489,8 +489,7 @@ define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[UMAX]], -1
 ; CHECK-NEXT:    [[TMP1:%.*]] = udiv i64 [[TMP0]], [[G_64]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
 ; CHECK:       vector.scevcheck:
 ; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true
 ; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
index 212177522409..354791ddd6de 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
@@ -43,32 +43,32 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32
 ; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_2]], align 1, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_2]], align 1, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i8>, ptr [[RDD_PTR_2]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP18:%.*]] = zext <16 x i8> [[TMP17]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_2]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> [[TMP19]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP26:%.*]] = sub nsw <16 x i32> [[TMP18]], [[TMP25]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_3]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> [[TMP27]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP27]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP34:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_3]], align 1, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> [[TMP34]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP37:%.*]] = shufflevector <16 x i8> [[TMP35]], <16 x i8> [[TMP36]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP34]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]]
@@ -86,19 +86,19 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32
 ; CHECK-NEXT:    [[TMP53:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP54:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP53]]
 ; CHECK-NEXT:    [[TMP55:%.*]] = sub nsw <16 x i32> [[TMP50]], [[TMP52]]
-; CHECK-NEXT:    [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP57:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 2, i32 5, i32 6, i32 17, i32 18, i32 21, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP57:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 3, i32 4, i32 7, i32 16, i32 19, i32 20, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 3, i32 4, i32 7, i32 16, i32 19, i32 20, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 2, i32 5, i32 6, i32 17, i32 18, i32 21, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP60:%.*]] = sub nsw <16 x i32> [[TMP57]], [[TMP59]]
 ; CHECK-NEXT:    [[TMP61:%.*]] = add nsw <16 x i32> [[TMP56]], [[TMP58]]
-; CHECK-NEXT:    [[TMP62:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP62:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]]
 ; CHECK-NEXT:    [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]]
-; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 20, i32 16, i32 21, i32 17, i32 22, i32 18, i32 23, i32 19>
 ; CHECK-NEXT:    [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP70:%.*]] = and <16 x i32> [[TMP69]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
 ; CHECK-NEXT:    [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 5f0b16048d40..b16164c4e5ff 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -1215,26 +1215,26 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1,
 ; CHECK-NEXT:    [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 [[IDX_EXT63]]
 ; CHECK-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 4
 ; CHECK-NEXT:    [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1
-; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
-; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
-; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
-; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1
-; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1
-; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i8>, ptr [[ADD_PTR_2]], align 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i8>, ptr [[P1]], align 1
+; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, ptr [[P2]], align 1
+; CHECK-NEXT:    [[TMP27:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1
+; CHECK-NEXT:    [[TMP34:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1
+; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_2]], align 1
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP18:%.*]] = zext <16 x i8> [[TMP17]] to <16 x i32>
-; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -1242,14 +1242,14 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1,
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP26:%.*]] = sub nsw <16 x i32> [[TMP18]], [[TMP25]]
-; CHECK-NEXT:    [[TMP27:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1
 ; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP27]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i32>
-; CHECK-NEXT:    [[TMP34:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1
 ; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP37:%.*]] = shufflevector <16 x i8> [[TMP35]], <16 x i8> [[TMP36]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -1262,7 +1262,7 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1,
 ; CHECK-NEXT:    [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 ; CHECK-NEXT:    [[TMP45:%.*]] = add nsw <16 x i32> [[TMP43]], [[TMP44]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = sub nsw <16 x i32> [[TMP43]], [[TMP44]]
-; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP46]], <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
+; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP46]], <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 18, i32 22, i32 26, i32 30, i32 1, i32 5, i32 9, i32 13, i32 16, i32 20, i32 24, i32 28>
 ; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP49:%.*]] = add nsw <16 x i32> [[TMP47]], [[TMP48]]
 ; CHECK-NEXT:    [[TMP50:%.*]] = sub nsw <16 x i32> [[TMP47]], [[TMP48]]
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
index fffa626cae0d..c431b058f0d2 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
@@ -17,18 +17,17 @@
 
 define void @s116_modified(ptr %a) {
 ; CHECK-LABEL: @s116_modified(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 1
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 3
+; CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds float, ptr [[GEP1:%.*]], i64 2
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, ptr [[GEP1]], i64 3
 ; CHECK-NEXT:    [[LD0:%.*]] = load float, ptr [[A]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[GEP1]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[GEP3]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[LD0]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP2]], i64 2)
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP6]], <4 x i32> <i32 1, i32 1, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP7]]
-; CHECK-NEXT:    store <4 x float> [[TMP8]], ptr [[A]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP8]], ptr [[GEP1]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %gep1 = getelementptr inbounds float, ptr %a, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll
index 833bc56c4ec6..2191d04cd797 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll
@@ -7,8 +7,7 @@ define void @vec3_vectorize_call(ptr %Colour, float %0) {
 ; NON-POWER-OF-2-NEXT:  entry:
 ; NON-POWER-OF-2-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[COLOUR:%.*]], align 4
 ; NON-POWER-OF-2-NEXT:    [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 2
-; NON-POWER-OF-2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
-; NON-POWER-OF-2-NEXT:    [[TMP4:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> [[TMP3]], <3 x i32> <i32 3, i32 4, i32 2>
+; NON-POWER-OF-2-NEXT:    [[TMP4:%.*]] = call <3 x float> @llvm.vector.insert.v3f32.v2f32(<3 x float> [[TMP2]], <2 x float> [[TMP1]], i64 0)
 ; NON-POWER-OF-2-NEXT:    [[TMP5:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP4]], <3 x float> zeroinitializer, <3 x float> zeroinitializer)
 ; NON-POWER-OF-2-NEXT:    store <3 x float> [[TMP5]], ptr [[COLOUR]], align 4
 ; NON-POWER-OF-2-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll
index 757d0b1708b6..234b65803238 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll
@@ -11,19 +11,21 @@ define void @foo(ptr %i7, i32 %0, i1 %tobool62.not) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x float>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    [[Y0:%.*]] = getelementptr i8, ptr [[RC21]], i64 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[Y0]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[I7]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x float>, ptr [[RC21]], align 4
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 2
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i32 3
-; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP11]], <2 x float> [[TMP8]], i64 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i32 2
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i32 3
+; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP10]], <2 x float> [[TMP11]], i64 0)
 ; CHECK-NEXT:    [[TMP12:%.*]] = fcmp olt <4 x float> [[TMP13]], zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = fcmp olt <4 x float> [[TMP5]], zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP5]], <4 x float> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP12]], <4 x float> zeroinitializer, <4 x float> [[TMP15]]
-; CHECK-NEXT:    store <4 x float> [[TMP16]], ptr [[RC21]], align 4
+; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT:    store <4 x float> [[TMP27]], ptr [[RC21]], align 4
 ; CHECK-NEXT:    br label [[IF_END:%.*]]
 ; CHECK:       entry.if.end72_crit_edge:
 ; CHECK-NEXT:    br label [[IF_END72:%.*]]
@@ -46,8 +48,7 @@ define void @foo(ptr %i7, i32 %0, i1 %tobool62.not) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = mul <4 x i32> [[TMP23]], [[TMP24]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = sitofp <4 x i32> [[TMP25]] to <4 x float>
-; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <4 x float> [[TMP26]], <4 x float> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:    store <4 x float> [[TMP27]], ptr [[RC21]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP26]], ptr [[RC21]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
index 72e29839230e..c9ff2d6426d2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
@@ -318,22 +318,14 @@ entry:
 define float @f(ptr nocapture readonly %x) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]])
-; CHECK-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP1]])
-; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <48 x float>, ptr [[X:%.*]], align 4
+; CHECK-NEXT:    [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v48f32(float 0.000000e+00, <48 x float> [[TMP0]])
 ; CHECK-NEXT:    ret float [[OP_RDX]]
 ;
 ; THRESHOLD-LABEL: @f(
 ; THRESHOLD-NEXT:  entry:
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
-; THRESHOLD-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]])
-; THRESHOLD-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP1]])
-; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[TMP3]]
+; THRESHOLD-NEXT:    [[TMP0:%.*]] = load <48 x float>, ptr [[X:%.*]], align 4
+; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v48f32(float 0.000000e+00, <48 x float> [[TMP0]])
 ; THRESHOLD-NEXT:    ret float [[OP_RDX]]
 ;
   entry:
@@ -606,18 +598,14 @@ define float @loadadd31(ptr nocapture readonly %x) {
 ; CHECK-LABEL: @loadadd31(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <24 x float>, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
 ; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP0]])
-; CHECK-NEXT:    [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
-; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP0]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]])
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]]
 ; CHECK-NEXT:    [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]]
@@ -627,18 +615,14 @@ define float @loadadd31(ptr nocapture readonly %x) {
 ; THRESHOLD-LABEL: @loadadd31(
 ; THRESHOLD-NEXT:  entry:
 ; THRESHOLD-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
+; THRESHOLD-NEXT:    [[TMP0:%.*]] = load <24 x float>, ptr [[ARRAYIDX]], align 4
 ; THRESHOLD-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
 ; THRESHOLD-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
 ; THRESHOLD-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
 ; THRESHOLD-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
-; THRESHOLD-NEXT:    [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP0]])
-; THRESHOLD-NEXT:    [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
-; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
+; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP0]])
 ; THRESHOLD-NEXT:    [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]])
 ; THRESHOLD-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]]
 ; THRESHOLD-NEXT:    [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
index a7201e776fb4..0bc91d42b0f1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -1013,11 +1013,11 @@ define i32 @maxi8_wrong_parent(i32) {
 ; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
 ; THRESH-NEXT:    br label [[PP:%.*]]
 ; THRESH:       pp:
-; THRESH-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
-; THRESH-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
-; THRESH-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP4]], i64 4)
-; THRESH-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP2]], i64 0)
-; THRESH-NEXT:    [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP3]], i64 2)
+; THRESH-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; THRESH-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; THRESH-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0)
+; THRESH-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP4]], i64 4)
+; THRESH-NEXT:    [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP2]], i64 6)
 ; THRESH-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP7]])
 ; THRESH-NEXT:    ret i32 [[TMP8]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll
index 47dd84c7f6e9..4898111960c0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll
@@ -7,10 +7,11 @@ define void @e(ptr %c, i64 %0) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[C]], align 8
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 112
-; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 104
+; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x ptr>, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[TMP18:%.*]] = load <2 x ptr>, ptr [[ARRAYIDX5]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x ptr> [[TMP18]], <2 x ptr> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <6 x ptr> poison, ptr [[TMP2]], i32 2
@@ -18,7 +19,7 @@ define void @e(ptr %c, i64 %0) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = call <6 x ptr> @llvm.vector.insert.v6p0.v2p0(<6 x ptr> [[TMP8]], <2 x ptr> [[TMP4]], i64 0)
 ; CHECK-NEXT:    [[TMP10:%.*]] = call <6 x ptr> @llvm.vector.insert.v6p0.v2p0(<6 x ptr> [[TMP9]], <2 x ptr> [[TMP6]], i64 4)
 ; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint <6 x ptr> [[TMP10]] to <6 x i64>
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <6 x i64> [[TMP11]], <6 x i64> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <6 x i64> [[TMP11]], <6 x i64> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5>
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <32 x i64> poison, i64 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <32 x i64> [[TMP13]], <32 x i64> poison, <32 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = or <32 x i64> [[TMP14]], [[TMP12]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
index 93258f2975f3..2623b7689f4f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
@@ -8,197 +8,6 @@
 ; into bb1, vectorizing all the way to the broadcast load at the top.
 ; The stores in bb1 are external to this tree, but they are vectorizable and are
 ; in reverse order.
-define void @rotate_with_external_users(ptr %A, ptr %ptr) {
-; CHECK-LABEL: @rotate_with_external_users(
-; CHECK-NEXT:  bb1:
-; CHECK-NEXT:    [[LD:%.*]] = load double, ptr undef, align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 2.200000e+00, double 1.100000e+00>
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 2.200000e+00, double 1.100000e+00>
-; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[A:%.*]], align 8
-; CHECK-NEXT:    br label [[BB2:%.*]]
-; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP2]], <double 4.400000e+00, double 3.300000e+00>
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
-; CHECK-NEXT:    [[SEED:%.*]] = fcmp ogt double [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    ret void
-;
-bb1:
-  %ld = load double, ptr undef
-
-  %add1 = fadd double %ld, 1.1
-  %add2 = fadd double %ld, 2.2
-
-  %mul1 = fmul double %add1, 1.1
-  %mul2 = fmul double %add2, 2.2
-
-  ; Thes are external vectorizable stores with operands in reverse order.
-  %ptrA2 = getelementptr inbounds double, ptr %A, i64 1
-  store double %mul2, ptr %A
-  store double %mul1, ptr %ptrA2
-  br label %bb2
-
-bb2:
-  %add3 = fadd double %mul1, 3.3
-  %add4 = fadd double %mul2, 4.4
-  %seed = fcmp ogt double %add3, %add4
-  ret void
-}
-
-; This checks that non-consecutive external users are skipped.
-define void @non_consecutive_external_users(ptr %A, ptr %ptr) {
-; CHECK-LABEL: @non_consecutive_external_users(
-; CHECK-NEXT:  bb1:
-; CHECK-NEXT:    [[LD:%.*]] = load double, ptr undef, align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> poison, double [[LD]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x double> [[SHUFFLE]], <double 1.100000e+00, double 2.200000e+00, double 3.300000e+00, double 4.400000e+00>
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x double> [[TMP1]], <double 1.100000e+00, double 2.200000e+00, double 3.300000e+00, double 4.400000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x double> [[TMP2]], <double 1.100000e+00, double 2.200000e+00, double 3.300000e+00, double 4.400000e+00>
-; CHECK-NEXT:    [[PTRA4:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 3
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[TMP3]], i32 3
-; CHECK-NEXT:    store double [[TMP4]], ptr [[A]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[TMP3]], i32 2
-; CHECK-NEXT:    store double [[TMP5]], ptr [[A]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP3]], i32 1
-; CHECK-NEXT:    store double [[TMP6]], ptr [[PTRA4]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[TMP3]], i32 0
-; CHECK-NEXT:    store double [[TMP7]], ptr [[PTRA4]], align 8
-; CHECK-NEXT:    br label [[SEED_LOOP:%.*]]
-; CHECK:       seed_loop:
-; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x double> [ [[TMP3]], [[BB1:%.*]] ], [ zeroinitializer, [[SEED_LOOP]] ]
-; CHECK-NEXT:    br label [[SEED_LOOP]]
-;
-bb1:
-  %ld = load double, ptr undef
-
-  %add5 = fadd double %ld, 1.1
-  %add6 = fadd double %ld, 2.2
-  %add7 = fadd double %ld, 3.3
-  %add8 = fadd double %ld, 4.4
-
-  %add1 = fadd double %add5, 1.1
-  %add2 = fadd double %add6, 2.2
-  %add3 = fadd double %add7, 3.3
-  %add4 = fadd double %add8, 4.4
-
-  %mul1 = fmul double %add1, 1.1
-  %mul2 = fmul double %add2, 2.2
-  %mul3 = fmul double %add3, 3.3
-  %mul4 = fmul double %add4, 4.4
-
-  ; External non-consecutive stores.
-  %ptrA4 = getelementptr inbounds double, ptr %A, i64 3
-  store double %mul4, ptr %A
-  store double %mul3, ptr %A
-  store double %mul2, ptr %ptrA4
-  store double %mul1, ptr %ptrA4
-  br label %seed_loop
-
-seed_loop:
-  %phi1 = phi double [ %mul1, %bb1 ], [ 0.0, %seed_loop ]
-  %phi2 = phi double [ %mul2, %bb1 ], [ 0.0, %seed_loop ]
-  %phi3 = phi double [ %mul3, %bb1 ], [ 0.0, %seed_loop ]
-  %phi4 = phi double [ %mul4, %bb1 ], [ 0.0, %seed_loop ]
-  br label %seed_loop
-}
-
-; We have to be careful when the tree contains add/sub patterns that could be
-; combined into a single addsub instruction. Reordering can block the pattern.
-define void @addsub_and_external_users(ptr %A, ptr %ptr) {
-; CHECK-LABEL: @addsub_and_external_users(
-; CHECK-NEXT:  bb1:
-; CHECK-NEXT:    [[LD:%.*]] = load double, ptr undef, align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], <double 1.100000e+00, double 1.200000e+00>
-; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP1]], <double 1.100000e+00, double 1.200000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.100000e+00, double 2.200000e+00>
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.100000e+00, double 3.200000e+00>
-; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    store <2 x double> [[SHUFFLE1]], ptr [[A:%.*]], align 8
-; CHECK-NEXT:    br label [[BB2:%.*]]
-; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP5]], <double 4.100000e+00, double 4.200000e+00>
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
-; CHECK-NEXT:    [[SEED:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    ret void
-;
-bb1:
-  %ld = load double, ptr undef
-
-  %sub1 = fsub double %ld, 1.1
-  %add2 = fadd double %ld, 1.2
-
-  %div1 = fdiv double %sub1, 2.1
-  %div2 = fdiv double %add2, 2.2
-
-  %mul1 = fmul double %div1, 3.1
-  %mul2 = fmul double %div2, 3.2
-
-  ; These are external vectorizable stores with operands in reverse order.
-  %ptrA1 = getelementptr inbounds double, ptr %A, i64 1
-  store double %mul2, ptr %A
-  store double %mul1, ptr %ptrA1
-  br label %bb2
-
-bb2:
-  %addS1 = fadd double %mul1, 4.1
-  %addS2 = fadd double %mul2, 4.2
-  %seed = fcmp ogt double %addS1, %addS2
-  ret void
-}
-
-; This contains a sub/add bundle, reordering it will make it better.
-define void @subadd_and_external_users(ptr %A, ptr %ptr) {
-; CHECK-LABEL: @subadd_and_external_users(
-; CHECK-NEXT:  bb1:
-; CHECK-NEXT:    [[LD:%.*]] = load double, ptr undef, align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], <double 1.200000e+00, double 1.100000e+00>
-; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP1]], <double 1.200000e+00, double 1.100000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.200000e+00, double 2.100000e+00>
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.200000e+00, double 3.100000e+00>
-; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[A:%.*]], align 8
-; CHECK-NEXT:    br label [[BB2:%.*]]
-; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP5]], <double 4.200000e+00, double 4.100000e+00>
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
-; CHECK-NEXT:    [[SEED:%.*]] = fcmp ogt double [[TMP9]], [[TMP8]]
-; CHECK-NEXT:    ret void
-;
-bb1:
-  %ld = load double, ptr undef
-
-  %add1 = fadd double %ld, 1.1
-  %sub2 = fsub double %ld, 1.2
-
-  %div1 = fdiv double %add1, 2.1
-  %div2 = fdiv double %sub2, 2.2
-
-  %mul1 = fmul double %div1, 3.1
-  %mul2 = fmul double %div2, 3.2
-
-  ; These are external vectorizable stores with operands in reverse order.
-  %ptrA1 = getelementptr inbounds double, ptr %A, i64 1
-  store double %mul2, ptr %A
-  store double %mul1, ptr %ptrA1
-  br label %bb2
-
-bb2:
-  %addS1 = fadd double %mul1, 4.1
-  %addS2 = fadd double %mul2, 4.2
-  %seed = fcmp ogt double %addS1, %addS2
-  ret void
-}
-
 define void @alt_but_not_addsub_and_external_users(ptr %A, ptr %ptr) {
 ; CHECK-LABEL: @alt_but_not_addsub_and_external_users(
 ; CHECK-NEXT:  bb1:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll
index fd3c1a57aff3..a821362a883a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll
@@ -7,8 +7,7 @@ define void @vec3_vectorize_call(ptr %Colour, float %0) {
 ; NON-POW2-NEXT:  entry:
 ; NON-POW2-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[COLOUR:%.*]], align 4
 ; NON-POW2-NEXT:    [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 2
-; NON-POW2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
-; NON-POW2-NEXT:    [[TMP4:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> [[TMP3]], <3 x i32> <i32 3, i32 4, i32 2>
+; NON-POW2-NEXT:    [[TMP4:%.*]] = call <3 x float> @llvm.vector.insert.v3f32.v2f32(<3 x float> [[TMP2]], <2 x float> [[TMP1]], i64 0)
 ; NON-POW2-NEXT:    [[TMP5:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP4]], <3 x float> zeroinitializer, <3 x float> zeroinitializer)
 ; NON-POW2-NEXT:    store <3 x float> [[TMP5]], ptr [[COLOUR]], align 4
 ; NON-POW2-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
index e1b091cc6fcd..9719e60a6a69 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
@@ -8,18 +8,18 @@ define void @test(ptr %a, ptr %b) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr null, align 4
 ; CHECK-NEXT:    [[ARRAYIDX120:%.*]] = getelementptr [4 x float], ptr [[B:%.*]], i64 0, i64 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[ARRAYIDX120]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 2
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr null, align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP6]], <2 x float> [[TMP1]], i64 0)
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i32 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT:    [[TMP10:%.*]] = fmul <4 x float> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul <4 x float> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd <4 x float> [[TMP11]], zeroinitializer
 ; CHECK-NEXT:    store <4 x float> [[TMP12]], ptr [[RESULT]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
new file mode 100644
index 000000000000..51b635837d3b
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
+
+define i32 @test(i32 %n) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], <i64 273837369, i64 273837369>
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
+; CHECK-NEXT:    [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    ret i32 [[RES1]]
+;
+entry:
+  %n1 = add i32 %n, 1
+  %zn1 = zext nneg i32 %n1 to i64
+  %m1 = mul nuw nsw i64 %zn1, 273837369
+  %a1 = call i64 @llvm.abs.i64(i64 %m1, i1 true)
+  %t1 = trunc i64 %a1 to i32
+  %n2 = add i32 %n, 2
+  %zn2 = zext nneg i32 %n2 to i64
+  %m2 = mul nuw nsw i64 %zn2, 273837369
+  %a2 = call i64 @llvm.abs.i64(i64 %m2, i1 true)
+  %t2 = trunc i64 %a2 to i32
+  %res1 = add i32 %t1, %t2
+  ret i32 %res1
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll b/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll
new file mode 100644
index 000000000000..6cd44c297882
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
+
+define i32 @test(i1 %.b, i8 %conv18, i32 %k.promoted61) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i1 [[DOTB:%.*]], i8 [[CONV18:%.*]], i32 [[K_PROMOTED61:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[DOTB]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i8>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i8> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i8>
+; CHECK-NEXT:    [[TMP7:%.*]] = freeze <4 x i8> [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i8> poison, i8 [[CONV18]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt <4 x i8> [[TMP7]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i8> zeroinitializer, <4 x i8> [[TMP7]]
+; CHECK-NEXT:    [[TMP12:%.*]] = sub nuw <4 x i8> [[TMP9]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP5]], <4 x i8> [[TMP9]], <4 x i8> [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = zext <4 x i8> [[TMP13]] to <4 x i32>
+; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP14]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = or i32 [[TMP15]], [[K_PROMOTED61]]
+; CHECK-NEXT:    ret i32 [[OP_RDX]]
+;
+  %not..b79 = xor i1 %.b, true
+  %3 = zext i1 %not..b79 to i8
+  %cmp.i51 = icmp eq i8 %3, 0
+  %cond.i55 = freeze i8 %3
+  %.cmp = icmp ugt i8 %cond.i55, %conv18
+  %.urem = select i1 %.cmp, i8 0, i8 %cond.i55
+  %4 = sub nuw i8 %conv18, %.urem
+  %cond.in.i = select i1 %cmp.i51, i8 %conv18, i8 %4
+  %not..b80 = xor i1 %.b, true
+  %5 = zext i1 %not..b80 to i8
+  %cmp.i51.1 = icmp eq i8 %5, 0
+  %cond.i55.1 = freeze i8 %5
+  %.cmp.1 = icmp ugt i8 %cond.i55.1, %conv18
+  %.urem.1 = select i1 %.cmp.1, i8 0, i8 %cond.i55.1
+  %6 = sub nuw i8 %conv18, %.urem.1
+  %cond.in.i.1 = select i1 %cmp.i51.1, i8 %conv18, i8 %6
+  %not..b81 = xor i1 %.b, true
+  %7 = zext i1 %not..b81 to i8
+  %cmp.i51.2 = icmp eq i8 %7, 0
+  %cond.i55.2 = freeze i8 %7
+  %.cmp.2 = icmp ugt i8 %cond.i55.2, %conv18
+  %.urem.2 = select i1 %.cmp.2, i8 0, i8 %cond.i55.2
+  %8 = sub nuw i8 %conv18, %.urem.2
+  %cond.in.i.2 = select i1 %cmp.i51.2, i8 %conv18, i8 %8
+  %not..b = xor i1 %.b, true
+  %9 = zext i1 %not..b to i8
+  %cmp.i51.3 = icmp eq i8 %9, 0
+  %cond.i55.3 = freeze i8 %9
+  %.cmp.3 = icmp ugt i8 %cond.i55.3, %conv18
+  %.urem.3 = select i1 %.cmp.3, i8 0, i8 %cond.i55.3
+  %10 = sub nuw i8 %conv18, %.urem.3
+  %cond.in.i.3 = select i1 %cmp.i51.3, i8 %conv18, i8 %10
+  %conv26 = zext nneg i8 %cond.in.i to i32
+  %or = or i32 %k.promoted61, %conv26
+  %conv26.1 = zext nneg i8 %cond.in.i.1 to i32
+  %or.1 = or i32 %or, %conv26.1
+  %conv26.2 = zext nneg i8 %cond.in.i.2 to i32
+  %or.2 = or i32 %or.1, %conv26.2
+  %conv26.3 = zext nneg i8 %cond.in.i.3 to i32
+  %or.3 = or i32 %or.2, %conv26.3
+  ret i32 %or.3
+}
diff --git a/llvm/test/tools/llvm-ar/Inputs/.gitattributes b/llvm/test/tools/llvm-ar/Inputs/.gitattributes
deleted file mode 100644
index 6c8a26285daf..000000000000
--- a/llvm/test/tools/llvm-ar/Inputs/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-mri-crlf.mri text eol=crlf
diff --git a/llvm/test/tools/llvm-ar/Inputs/mri-crlf.mri b/llvm/test/tools/llvm-ar/Inputs/mri-crlf.mri
index 857c4ff87b6c..72d23d041ae8 100644
--- a/llvm/test/tools/llvm-ar/Inputs/mri-crlf.mri
+++ b/llvm/test/tools/llvm-ar/Inputs/mri-crlf.mri
@@ -1,4 +1,4 @@
-; this file intentionally has crlf line endings
-create crlf.a
-addmod foo.txt
-end
+; this file intentionally has crlf line endings
+create crlf.a
+addmod foo.txt
+end
diff --git a/llvm/test/tools/llvm-cvtres/Inputs/languages.rc b/llvm/test/tools/llvm-cvtres/Inputs/languages.rc
index 82031d0e2083..081b3a77bebc 100644
--- a/llvm/test/tools/llvm-cvtres/Inputs/languages.rc
+++ b/llvm/test/tools/llvm-cvtres/Inputs/languages.rc
@@ -1,36 +1,36 @@
-#include "windows.h"
-
-LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
-randomdat RCDATA
-{
-	"this is a random bit of data that means nothing\0",
-	0x23a9,
-	0x140e,
-	194292,
-}
-
-LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED
-randomdat RCDATA
-{
-	"zhe4 shi4 yi1ge4 sui2ji1 de shu4ju4, zhe4 yi4wei4zhe shen2me\0",
-	0x23a9,
-	0x140e,
-	194292,
-}
-
-LANGUAGE LANG_GERMAN, SUBLANG_GERMAN_LUXEMBOURG
-randomdat RCDATA
-{
-	"Dies ist ein zufälliges Bit von Daten, die nichts bedeutet\0",
-	0x23a9,
-	0x140e,
-	194292,
-}
-
-LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED
-myaccelerators ACCELERATORS
-{
-	"^C", 999, VIRTKEY, ALT
-	"D", 1100, VIRTKEY, CONTROL, SHIFT
-	"^R", 444, ASCII, NOINVERT
-}
+#include "windows.h"
+
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+randomdat RCDATA
+{
+	"this is a random bit of data that means nothing\0",
+	0x23a9,
+	0x140e,
+	194292,
+}
+
+LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED
+randomdat RCDATA
+{
+	"zhe4 shi4 yi1ge4 sui2ji1 de shu4ju4, zhe4 yi4wei4zhe shen2me\0",
+	0x23a9,
+	0x140e,
+	194292,
+}
+
+LANGUAGE LANG_GERMAN, SUBLANG_GERMAN_LUXEMBOURG
+randomdat RCDATA
+{
+	"Dies ist ein zufälliges Bit von Daten, die nichts bedeutet\0",
+	0x23a9,
+	0x140e,
+	194292,
+}
+
+LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED
+myaccelerators ACCELERATORS
+{
+	"^C", 999, VIRTKEY, ALT
+	"D", 1100, VIRTKEY, CONTROL, SHIFT
+	"^R", 444, ASCII, NOINVERT
+}
diff --git a/llvm/test/tools/llvm-cvtres/Inputs/test_resource.rc b/llvm/test/tools/llvm-cvtres/Inputs/test_resource.rc
index 494849f57a0a..5ca097baa0f7 100644
--- a/llvm/test/tools/llvm-cvtres/Inputs/test_resource.rc
+++ b/llvm/test/tools/llvm-cvtres/Inputs/test_resource.rc
@@ -1,50 +1,50 @@
-#include "windows.h"
-
-LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
-
-myaccelerators ACCELERATORS
-{
-	"^C", 999, VIRTKEY, ALT
-	"D", 1100, VIRTKEY, CONTROL, SHIFT
-	"^R", 444, ASCII, NOINVERT
-}
-
-cursor BITMAP "cursor_small.bmp"
-okay BITMAP "okay_small.bmp"
-
-14432 MENU
-LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED
-{
-	MENUITEM "yu", 100
-	MENUITEM "shala", 101
-	MENUITEM "kaoya", 102
-}
-
-testdialog DIALOG 10, 10, 200, 300
-STYLE WS_POPUP | WS_BORDER
-CAPTION "Test"
-{
-	CTEXT "Continue:", 1, 10, 10, 230, 14
-	PUSHBUTTON "&OK", 2, 66, 134, 161, 13
-}
-
-12 ACCELERATORS
-{
-	"X", 164, VIRTKEY, ALT
-	"H", 5678, VIRTKEY, CONTROL, SHIFT
-	"^R", 444, ASCII, NOINVERT
-}
-
-"eat" MENU
-LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_AUS
-{
-	MENUITEM "fish", 100
-	MENUITEM "salad", 101
-	MENUITEM "duck", 102
-}
-
-
-myresource stringarray {
-	"this is a user defined resource\0",
-	"it contains many strings\0",
+#include "windows.h"
+
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+
+myaccelerators ACCELERATORS
+{
+	"^C", 999, VIRTKEY, ALT
+	"D", 1100, VIRTKEY, CONTROL, SHIFT
+	"^R", 444, ASCII, NOINVERT
+}
+
+cursor BITMAP "cursor_small.bmp"
+okay BITMAP "okay_small.bmp"
+
+14432 MENU
+LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED
+{
+	MENUITEM "yu", 100
+	MENUITEM "shala", 101
+	MENUITEM "kaoya", 102
+}
+
+testdialog DIALOG 10, 10, 200, 300
+STYLE WS_POPUP | WS_BORDER
+CAPTION "Test"
+{
+	CTEXT "Continue:", 1, 10, 10, 230, 14
+	PUSHBUTTON "&OK", 2, 66, 134, 161, 13
+}
+
+12 ACCELERATORS
+{
+	"X", 164, VIRTKEY, ALT
+	"H", 5678, VIRTKEY, CONTROL, SHIFT
+	"^R", 444, ASCII, NOINVERT
+}
+
+"eat" MENU
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_AUS
+{
+	MENUITEM "fish", 100
+	MENUITEM "salad", 101
+	MENUITEM "duck", 102
+}
+
+
+myresource stringarray {
+	"this is a user defined resource\0",
+	"it contains many strings\0",
 }
 \ No newline at end of file
diff --git a/llvm/test/tools/llvm-rc/Inputs/dialog-with-menu.rc b/llvm/test/tools/llvm-rc/Inputs/dialog-with-menu.rc
index c700b587af64..bb79dca399c2 100644
--- a/llvm/test/tools/llvm-rc/Inputs/dialog-with-menu.rc
+++ b/llvm/test/tools/llvm-rc/Inputs/dialog-with-menu.rc
@@ -1,16 +1,16 @@
-101 DIALOG 0, 0, 362, 246
-STYLE 0x40l | 0x0004l | 0x0008l | 0x0800l | 0x00020000l |
-    0x00010000l | 0x80000000l | 0x10000000l | 0x02000000l | 0x00C00000l |
-    0x00080000l | 0x00040000l
-CAPTION "MakeNSISW"
-MENU 104
-FONT 8, "MS Shell Dlg"
-BEGIN
-    CONTROL "",202,"RichEdit20A",0x0004l | 0x0040l |
-                    0x0100l | 0x0800l | 0x00008000 |
-                    0x00010000l | 0x00800000l | 0x00200000l,7,22,348,190
-    CONTROL "",-1,"Static",0x00000010l,7,220,346,1
-    LTEXT "",200,7,230,200,12,0x08000000l
-    DEFPUSHBUTTON "Test &Installer",203,230,226,60,15,0x08000000l | 0x00010000l
-    PUSHBUTTON "&Close",2,296,226,49,15,0x00010000l
-END
+101 DIALOG 0, 0, 362, 246
+STYLE 0x40l | 0x0004l | 0x0008l | 0x0800l | 0x00020000l |
+    0x00010000l | 0x80000000l | 0x10000000l | 0x02000000l | 0x00C00000l |
+    0x00080000l | 0x00040000l
+CAPTION "MakeNSISW"
+MENU 104
+FONT 8, "MS Shell Dlg"
+BEGIN
+    CONTROL "",202,"RichEdit20A",0x0004l | 0x0040l |
+                    0x0100l | 0x0800l | 0x00008000 |
+                    0x00010000l | 0x00800000l | 0x00200000l,7,22,348,190
+    CONTROL "",-1,"Static",0x00000010l,7,220,346,1
+    LTEXT "",200,7,230,200,12,0x08000000l
+    DEFPUSHBUTTON "Test &Installer",203,230,226,60,15,0x08000000l | 0x00010000l
+    PUSHBUTTON "&Close",2,296,226,49,15,0x00010000l
+END
diff --git a/llvm/test/tools/llvm-readobj/COFF/Inputs/resources/test_resource.rc b/llvm/test/tools/llvm-readobj/COFF/Inputs/resources/test_resource.rc
index 6ad56bc02d73..fd616520dbe1 100644
--- a/llvm/test/tools/llvm-readobj/COFF/Inputs/resources/test_resource.rc
+++ b/llvm/test/tools/llvm-readobj/COFF/Inputs/resources/test_resource.rc
@@ -1,44 +1,44 @@
-#include "windows.h"
-
-LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
-
-myaccelerators ACCELERATORS
-{
-	"^C", 999, VIRTKEY, ALT
-	"D", 1100, VIRTKEY, CONTROL, SHIFT
-	"^R", 444, ASCII, NOINVERT
-}
-
-cursor BITMAP "cursor_small.bmp"
-okay BITMAP "okay_small.bmp"
-
-14432 MENU
-LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED
-{
-	MENUITEM "yu", 100
-	MENUITEM "shala", 101
-	MENUITEM "kaoya", 102
-}
-
-testdialog DIALOG 10, 10, 200, 300
-STYLE WS_POPUP | WS_BORDER
-CAPTION "Test"
-{
-	CTEXT "Continue:", 1, 10, 10, 230, 14
-	PUSHBUTTON "&OK", 2, 66, 134, 161, 13
-}
-
-12 ACCELERATORS
-{
-	"X", 164, VIRTKEY, ALT
-	"H", 5678, VIRTKEY, CONTROL, SHIFT
-	"^R", 444, ASCII, NOINVERT
-}
-
-"eat" MENU
-LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_AUS
-{
-	MENUITEM "fish", 100
-	MENUITEM "salad", 101
-	MENUITEM "duck", 102
-}
+#include "windows.h"
+
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+
+myaccelerators ACCELERATORS
+{
+	"^C", 999, VIRTKEY, ALT
+	"D", 1100, VIRTKEY, CONTROL, SHIFT
+	"^R", 444, ASCII, NOINVERT
+}
+
+cursor BITMAP "cursor_small.bmp"
+okay BITMAP "okay_small.bmp"
+
+14432 MENU
+LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED
+{
+	MENUITEM "yu", 100
+	MENUITEM "shala", 101
+	MENUITEM "kaoya", 102
+}
+
+testdialog DIALOG 10, 10, 200, 300
+STYLE WS_POPUP | WS_BORDER
+CAPTION "Test"
+{
+	CTEXT "Continue:", 1, 10, 10, 230, 14
+	PUSHBUTTON "&OK", 2, 66, 134, 161, 13
+}
+
+12 ACCELERATORS
+{
+	"X", 164, VIRTKEY, ALT
+	"H", 5678, VIRTKEY, CONTROL, SHIFT
+	"^R", 444, ASCII, NOINVERT
+}
+
+"eat" MENU
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_AUS
+{
+	MENUITEM "fish", 100
+	MENUITEM "salad", 101
+	MENUITEM "duck", 102
+}
diff --git a/llvm/unittests/Support/ModRefTest.cpp b/llvm/unittests/Support/ModRefTest.cpp
index f77e7e39e14e..35107e50b32d 100644
--- a/llvm/unittests/Support/ModRefTest.cpp
+++ b/llvm/unittests/Support/ModRefTest.cpp
@@ -1,27 +1,27 @@
-//===- llvm/unittest/Support/ModRefTest.cpp - ModRef tests ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/ModRef.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/raw_ostream.h"
-#include "gtest/gtest.h"
-#include <string>
-
-using namespace llvm;
-
-namespace {
-
-// Verify that printing a MemoryEffects does not end with a ,.
-TEST(ModRefTest, PrintMemoryEffects) {
-  std::string S;
-  raw_string_ostream OS(S);
-  OS << MemoryEffects::none();
-  EXPECT_EQ(S, "ArgMem: NoModRef, InaccessibleMem: NoModRef, Other: NoModRef");
-}
-
-} // namespace
+//===- llvm/unittest/Support/ModRefTest.cpp - ModRef tests ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ModRef.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <string>
+
+using namespace llvm;
+
+namespace {
+
+// Verify that printing a MemoryEffects does not end with a ,.
+TEST(ModRefTest, PrintMemoryEffects) {
+  std::string S;
+  raw_string_ostream OS(S);
+  OS << MemoryEffects::none();
+  EXPECT_EQ(S, "ArgMem: NoModRef, InaccessibleMem: NoModRef, Other: NoModRef");
+}
+
+} // namespace
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
index dcd7232db5f6..24512cb0225e 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
@@ -11,5 +11,6 @@ add_llvm_unittest(SandboxVectorizerTests
   DependencyGraphTest.cpp
   IntervalTest.cpp
   LegalityTest.cpp
+  SchedulerTest.cpp
   SeedCollectorTest.cpp	
 )
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/DependencyGraphTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/DependencyGraphTest.cpp
index 3f84ad1f731d..061d57c31ce2 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/DependencyGraphTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/DependencyGraphTest.cpp
@@ -254,6 +254,18 @@ define void @foo(ptr %ptr, i8 %v0, i8 %v1) {
   EXPECT_EQ(N0->getNumUnscheduledSuccs(), 1u); // N1
   EXPECT_EQ(N1->getNumUnscheduledSuccs(), 0u);
   EXPECT_EQ(N2->getNumUnscheduledSuccs(), 0u);
+
+  // Check decrUnscheduledSuccs.
+  N0->decrUnscheduledSuccs();
+  EXPECT_EQ(N0->getNumUnscheduledSuccs(), 0u);
+#ifndef NDEBUG
+  EXPECT_DEATH(N0->decrUnscheduledSuccs(), ".*Counting.*");
+#endif // NDEBUG
+
+  // Check scheduled(), setScheduled().
+  EXPECT_FALSE(N0->scheduled());
+  N0->setScheduled(true);
+  EXPECT_TRUE(N0->scheduled());
 }
 
 TEST_F(DependencyGraphTest, Preds) {
@@ -773,4 +785,16 @@ define void @foo(ptr %ptr, i8 %v1, i8 %v2, i8 %v3, i8 %v4, i8 %v5) {
     EXPECT_EQ(S4N->getNumUnscheduledSuccs(), 1u); // S5N
     EXPECT_EQ(S5N->getNumUnscheduledSuccs(), 0u);
   }
+
+  {
+    // Check UnscheduledSuccs when a node is scheduled
+    sandboxir::DependencyGraph DAG(getAA(*LLVMF));
+    DAG.extend({S2, S2});
+    auto *S2N = cast<sandboxir::MemDGNode>(DAG.getNode(S2));
+    S2N->setScheduled(true);
+
+    DAG.extend({S1, S1});
+    auto *S1N = cast<sandboxir::MemDGNode>(DAG.getNode(S1));
+    EXPECT_EQ(S1N->getNumUnscheduledSuccs(), 0u); // S1 is scheduled
+  }
 }
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp
new file mode 100644
index 000000000000..92e767e55fbd
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp
@@ -0,0 +1,204 @@
+//===- SchedulerTest.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/SandboxIR/Context.h"
+#include "llvm/SandboxIR/Function.h"
+#include "llvm/SandboxIR/Instruction.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gmock/gmock-matchers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+struct SchedulerTest : public testing::Test {
+  LLVMContext C;
+  std::unique_ptr<Module> M;
+  std::unique_ptr<AssumptionCache> AC;
+  std::unique_ptr<DominatorTree> DT;
+  std::unique_ptr<BasicAAResult> BAA;
+  std::unique_ptr<AAResults> AA;
+
+  void parseIR(LLVMContext &C, const char *IR) {
+    SMDiagnostic Err;
+    M = parseAssemblyString(IR, Err, C);
+    if (!M)
+      Err.print("SchedulerTest", errs());
+  }
+
+  AAResults &getAA(llvm::Function &LLVMF) {
+    TargetLibraryInfoImpl TLII;
+    TargetLibraryInfo TLI(TLII);
+    AA = std::make_unique<AAResults>(TLI);
+    AC = std::make_unique<AssumptionCache>(LLVMF);
+    DT = std::make_unique<DominatorTree>(LLVMF);
+    BAA = std::make_unique<BasicAAResult>(M->getDataLayout(), LLVMF, TLI, *AC,
+                                          DT.get());
+    AA->addAAResult(*BAA);
+    return *AA;
+  }
+};
+
+TEST_F(SchedulerTest, SchedBundle) {
+  parseIR(C, R"IR(
+define void @foo(ptr %ptr, i8 %v0, i8 %v1) {
+  store i8 %v0, ptr %ptr
+  %other = add i8 %v0, %v1
+  store i8 %v1, ptr %ptr
+  ret void
+}
+)IR");
+  llvm::Function *LLVMF = &*M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+  auto *F = Ctx.createFunction(LLVMF);
+  auto *BB = &*F->begin();
+  auto It = BB->begin();
+  auto *S0 = cast<sandboxir::StoreInst>(&*It++);
+  auto *Other = &*It++;
+  auto *S1 = cast<sandboxir::StoreInst>(&*It++);
+  auto *Ret = cast<sandboxir::ReturnInst>(&*It++);
+
+  sandboxir::DependencyGraph DAG(getAA(*LLVMF));
+  DAG.extend({&*BB->begin(), BB->getTerminator()});
+  auto *SN0 = DAG.getNode(S0);
+  auto *SN1 = DAG.getNode(S1);
+  sandboxir::SchedBundle Bndl({SN0, SN1});
+
+  // Check getTop().
+  EXPECT_EQ(Bndl.getTop(), SN0);
+  // Check getBot().
+  EXPECT_EQ(Bndl.getBot(), SN1);
+  // Check cluster().
+  Bndl.cluster(S1->getIterator());
+  {
+    auto It = BB->begin();
+    EXPECT_EQ(&*It++, Other);
+    EXPECT_EQ(&*It++, S0);
+    EXPECT_EQ(&*It++, S1);
+    EXPECT_EQ(&*It++, Ret);
+    S0->moveBefore(Other);
+  }
+
+  Bndl.cluster(S0->getIterator());
+  {
+    auto It = BB->begin();
+    EXPECT_EQ(&*It++, S0);
+    EXPECT_EQ(&*It++, S1);
+    EXPECT_EQ(&*It++, Other);
+    EXPECT_EQ(&*It++, Ret);
+    S1->moveAfter(Other);
+  }
+
+  Bndl.cluster(Other->getIterator());
+  {
+    auto It = BB->begin();
+    EXPECT_EQ(&*It++, S0);
+    EXPECT_EQ(&*It++, S1);
+    EXPECT_EQ(&*It++, Other);
+    EXPECT_EQ(&*It++, Ret);
+    S1->moveAfter(Other);
+  }
+
+  Bndl.cluster(Ret->getIterator());
+  {
+    auto It = BB->begin();
+    EXPECT_EQ(&*It++, Other);
+    EXPECT_EQ(&*It++, S0);
+    EXPECT_EQ(&*It++, S1);
+    EXPECT_EQ(&*It++, Ret);
+    Other->moveBefore(S1);
+  }
+
+  Bndl.cluster(BB->end());
+  {
+    auto It = BB->begin();
+    EXPECT_EQ(&*It++, Other);
+    EXPECT_EQ(&*It++, Ret);
+    EXPECT_EQ(&*It++, S0);
+    EXPECT_EQ(&*It++, S1);
+    Ret->moveAfter(S1);
+    Other->moveAfter(S0);
+  }
+  // Check iterators.
+  EXPECT_THAT(Bndl, testing::ElementsAre(SN0, SN1));
+  EXPECT_THAT((const sandboxir::SchedBundle &)Bndl,
+              testing::ElementsAre(SN0, SN1));
+}
+
+TEST_F(SchedulerTest, Basic) {
+  parseIR(C, R"IR(
+define void @foo(ptr %ptr, i8 %v0, i8 %v1) {
+  store i8 %v0, ptr %ptr
+  store i8 %v1, ptr %ptr
+  ret void
+}
+)IR");
+  llvm::Function *LLVMF = &*M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+  auto *F = Ctx.createFunction(LLVMF);
+  auto *BB = &*F->begin();
+  auto It = BB->begin();
+  auto *S0 = cast<sandboxir::StoreInst>(&*It++);
+  auto *S1 = cast<sandboxir::StoreInst>(&*It++);
+  auto *Ret = cast<sandboxir::ReturnInst>(&*It++);
+
+  {
+    // Schedule all instructions in sequence.
+    sandboxir::Scheduler Sched(getAA(*LLVMF));
+    EXPECT_TRUE(Sched.trySchedule({Ret}));
+    EXPECT_TRUE(Sched.trySchedule({S1}));
+    EXPECT_TRUE(Sched.trySchedule({S0}));
+  }
+  {
+    // Skip instructions.
+    sandboxir::Scheduler Sched(getAA(*LLVMF));
+    EXPECT_TRUE(Sched.trySchedule({Ret}));
+    EXPECT_TRUE(Sched.trySchedule({S0}));
+  }
+  {
+    // Try invalid scheduling
+    sandboxir::Scheduler Sched(getAA(*LLVMF));
+    EXPECT_TRUE(Sched.trySchedule({Ret}));
+    EXPECT_TRUE(Sched.trySchedule({S0}));
+    EXPECT_FALSE(Sched.trySchedule({S1}));
+  }
+}
+
+TEST_F(SchedulerTest, Bundles) {
+  parseIR(C, R"IR(
+define void @foo(ptr noalias %ptr0, ptr noalias %ptr1) {
+  %ld0 = load i8, ptr %ptr0
+  %ld1 = load i8, ptr %ptr1
+  store i8 %ld0, ptr %ptr0
+  store i8 %ld1, ptr %ptr1
+  ret void
+}
+)IR");
+  llvm::Function *LLVMF = &*M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+  auto *F = Ctx.createFunction(LLVMF);
+  auto *BB = &*F->begin();
+  auto It = BB->begin();
+  auto *L0 = cast<sandboxir::LoadInst>(&*It++);
+  auto *L1 = cast<sandboxir::LoadInst>(&*It++);
+  auto *S0 = cast<sandboxir::StoreInst>(&*It++);
+  auto *S1 = cast<sandboxir::StoreInst>(&*It++);
+  auto *Ret = cast<sandboxir::ReturnInst>(&*It++);
+
+  sandboxir::Scheduler Sched(getAA(*LLVMF));
+  EXPECT_TRUE(Sched.trySchedule({Ret}));
+  EXPECT_TRUE(Sched.trySchedule({S0, S1}));
+  EXPECT_TRUE(Sched.trySchedule({L0, L1}));
+}
diff --git a/llvm/utils/LLVMVisualizers/llvm.natvis b/llvm/utils/LLVMVisualizers/llvm.natvis
index 03ca2d33a80b..d83ae8013c51 100644
--- a/llvm/utils/LLVMVisualizers/llvm.natvis
+++ b/llvm/utils/LLVMVisualizers/llvm.natvis
@@ -1,408 +1,408 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!--
-Visual Studio Native Debugging Visualizers for LLVM
-
-For Visual Studio 2013 only, put this file into
-"%USERPROFILE%\Documents\Visual Studio 2013\Visualizers" or create a symbolic link so it updates automatically.
-
-For later versions of Visual Studio, no setup is required.
--->
-<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
-  <Type Name="llvm::SmallVectorImpl&lt;*&gt;">
-    <DisplayString Condition="Size == 0">empty</DisplayString>
-    <DisplayString Condition="Size &amp;&amp; Size &lt; 4">{(value_type*)BeginX,[Size]}</DisplayString>
-    <DisplayString Condition="Size &gt; 3">{Size} elements</DisplayString>
-    <DisplayString>Uninitialized</DisplayString>
-    <Expand>
-      <Item Name="[size]">Size</Item>
-      <Item Name="[capacity]">Capacity</Item>
-      <ArrayItems>
-        <Size>Size</Size>
-        <ValuePointer>(value_type*)BeginX</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-  <Type Name="llvm::APInt">
-    <!-- For now, only handle up to 64-bit unsigned ints -->
-    <DisplayString Condition="BitWidth &lt;= 64">{U.VAL}</DisplayString>
-    <DisplayString>Cannot visualize APInts longer than 64 bits</DisplayString>
-  </Type>
-  <Type Name="llvm::ArrayRef&lt;*&gt;">
-    <DisplayString Condition="Length &lt; 4">{Data,[Length]}</DisplayString>
-    <DisplayString Condition="Length &gt; 3">{Length} elements</DisplayString>
-    <DisplayString>Uninitialized</DisplayString>
-    <Expand>
-      <Item Name="[size]">Length</Item>
-      <ArrayItems>
-        <Size>Length</Size>
-        <ValuePointer>Data</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-  <Type Name="llvm::SmallString&lt;*&gt;">
-    <DisplayString>{(const char*)BeginX,[Size]s8}</DisplayString>
-    <StringView>(const char*)BeginX,[Size]</StringView>
-    <Expand>
-      <Item Name="[size]">Size</Item>
-      <Item Name="[capacity]">Capacity</Item>
-      <ArrayItems>
-        <Size>Size</Size>
-        <ValuePointer>(char*)BeginX</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-
-  <Type Name="StringView">
-    <DisplayString>{First,[Last - First]s8}</DisplayString>
-  </Type>
-
-  <Type Name="llvm::StringRef">
-    <DisplayString>{Data,[Length]s8}</DisplayString>
-    <StringView>Data,[Length]s8</StringView>
-    <Expand>
-      <Item Name="[size]">Length</Item>
-      <ArrayItems>
-        <Size>Length</Size>
-        <ValuePointer>Data</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::PunnedPointer">
-    <DisplayString>{($T1)*(intptr_t *)Data}</DisplayString>
-  </Type>
-
-  <!-- PointerIntPair. In addition to the regular view, it is possible to view
-       just the pointer or just the int. The same code is duplicated from the
-       regular viewer to improve the performance of the common case. Note, we
-       need to specify PointerIntPair<PointerUnion<*>, *> differently because
-       we need to "look through" the PointerUnion to display it. Otherwise, we
-       get errors about ambiguous conversion from uintptr_t to PointerUnion.-->
-  <Type Name="llvm::PointerIntPair&lt;llvm::PointerUnion&lt;*&gt;, *&gt;">
-    <!-- $T1 is the parameter pack of PointerUnion, $T3 is IntBits,
-         $T4 is IntType, $T5 is PtrTraits, and $T6 is Info. -->
-    <DisplayString IncludeView="ptr">{($T1)(*(intptr_t *)Value.Data &amp; $T6::PointerBitMask)}</DisplayString>
-    <DisplayString IncludeView="int">{($T4)((*(intptr_t *)Value.Data &gt;&gt; $T6::IntShift) &amp; $T6::IntMask)}</DisplayString>
-    <DisplayString>{$T6::IntMask}: {($T1)(*(intptr_t *)Value.Data &amp; $T6::PointerBitMask)} [{($T4)((*(intptr_t *)Value.Data &gt;&gt; $T6::IntShift) &amp; $T6::IntMask)}]</DisplayString>
-    <Expand>
-      <Item Name="[ptr]">($T1)(*(intptr_t *)Value.Data &amp; $T6::PointerBitMask)</Item>
-      <Item Name="[int]">($T4)((*(intptr_t *)Value.Data &gt;&gt; $T6::IntShift) &amp; $T6::IntMask)</Item>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::PointerIntPair&lt;*&gt;">
-    <DisplayString IncludeView="ptr">{($T1)(*(intptr_t *)Value.Data &amp; $T5::PointerBitMask)}</DisplayString>
-    <DisplayString IncludeView="int">{((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)}</DisplayString>
-    <DisplayString>{$T5::IntMask}: {($T1)(*(intptr_t *)Value.Data &amp; $T5::PointerBitMask)} [{((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)}]</DisplayString>
-    <Expand>
-      <Item Name="[ptr]">($T1)(*(intptr_t *)Value.Data &amp; $T5::PointerBitMask)</Item>
-      <Item Name="[int]">((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)</Item>
-    </Expand>
-  </Type>
-  <!-- PointerUnion types -->
-  <Type Name="llvm::pointer_union_detail::PointerUnionMembers&lt;*&gt;">
-    <DisplayString Optional="true" Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 0">
-      {($T4)(*(intptr_t *)Val.Value.Data &amp; $T2::InfoTy::PointerBitMask)}
-    </DisplayString>
-    <DisplayString Optional="true" Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 1">
-      {($T5)(*(intptr_t *)Val.Value.Data &amp; $T2::InfoTy::PointerBitMask)}
-    </DisplayString>
-    <DisplayString>Unexpected index in PointerUnion: {(*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask}</DisplayString>
-    <Expand>
-      <Item Name="[Holds]" Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 0">"$T4",s8b</Item>
-      <Item Name="[Ptr]" Optional="true"  Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 0">
-        ($T4)(*(intptr_t *)Val.Value.Data &amp; $T2::InfoTy::PointerBitMask)
-      </Item>
-      <Item Name="[Holds]" Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 1">"$T5",s8b</Item>
-      <Item Name="[Ptr]" Optional="true"  Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 1">
-        ($T5)(*(intptr_t *)Val.Value.Data &amp; $T2::InfoTy::PointerBitMask)
-      </Item>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::iplist&lt;*,*&gt;">
-    <DisplayString Condition="Head == 0">{{ empty }}</DisplayString>
-    <DisplayString Condition="Head != 0">{{ head={Head} }}</DisplayString>
-    <Expand>
-      <LinkedListItems>
-        <HeadPointer>Head</HeadPointer>
-        <NextPointer>Next</NextPointer>
-        <ValueNode>this</ValueNode>
-      </LinkedListItems>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::IntrusiveRefCntPtr&lt;*&gt;">
-    <DisplayString Condition="Obj == 0">empty</DisplayString>
-    <DisplayString Condition="(Obj != 0) &amp;&amp; (Obj-&gt;RefCount == 1)">RefPtr [1 ref] {*Obj}</DisplayString>
-    <DisplayString Condition="(Obj != 0) &amp;&amp; (Obj-&gt;RefCount != 1)">RefPtr [{Obj-&gt;RefCount} refs] {*Obj}</DisplayString>
-    <Expand>
-      <Item Condition="Obj != 0" Name="[refs]">Obj-&gt;RefCount</Item>
-      <ExpandedItem Condition="Obj != 0">Obj</ExpandedItem>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::SmallPtrSet&lt;*,*&gt;">
-    <DisplayString Condition="CurArray == SmallArray">{{ [Small Mode] size={NumNonEmpty}, capacity={CurArraySize} }}</DisplayString>
-    <DisplayString Condition="CurArray != SmallArray">{{ [Big Mode] size={NumNonEmpty}, capacity={CurArraySize} }}</DisplayString>
-    <Expand>
-      <Item Name="[size]">NumNonEmpty</Item>
-      <Item Name="[capacity]">CurArraySize</Item>
-      <ArrayItems>
-        <Size>NumNonEmpty</Size>
-        <ValuePointer>($T1*)CurArray</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::DenseMap&lt;*,*,*&gt;">
-    <DisplayString Condition="NumEntries == 0">empty</DisplayString>
-    <DisplayString Condition="NumEntries != 0">{{ size={NumEntries}, buckets={NumBuckets} }}</DisplayString>
-    <Expand>
-      <Item Name="[size]">NumEntries</Item>
-      <Item Name="[buckets]">NumBuckets</Item>
-      <ArrayItems>
-        <Size>NumBuckets</Size>
-        <ValuePointer>Buckets</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::StringMap&lt;*,*&gt;">
-    <DisplayString>{{ size={NumItems}, buckets={NumBuckets} }}</DisplayString>
-    <Expand>
-      <Item Name="[size]">NumItems</Item>
-      <Item Name="[buckets]">NumBuckets</Item>
-      <ArrayItems>
-        <Size>NumBuckets</Size>
-        <ValuePointer>(MapEntryTy**)TheTable</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::StringMapEntry&lt;*&gt;">
-    <DisplayString Condition="keyLength == 0">empty</DisplayString>
-    <DisplayString Condition="keyLength != 0">({this+1,s8}, {second})</DisplayString>
-    <Expand>
-      <Item Name="[key]">this+1,s</Item>
-      <Item Name="[value]" Condition="keyLength != 0">second</Item>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::Triple">
-    <DisplayString>{Data}</DisplayString>
-  </Type>
-
-  <Type Name="llvm::Optional&lt;*&gt;">
-    <DisplayString Condition="!Storage.hasVal">None</DisplayString>
-    <DisplayString Condition="Storage.hasVal">{Storage.value}</DisplayString>
-    <Expand>
-      <Item Name="[underlying]" Condition="Storage.hasVal">Storage.value</Item>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::Expected&lt;*&gt;">
-    <DisplayString Condition="HasError">Error</DisplayString>
-    <DisplayString Condition="!HasError">{*((storage_type *)TStorage.buffer)}</DisplayString>
-    <Expand>
-      <Item Name="[value]" Condition="!HasError">*((storage_type *)TStorage.buffer)</Item>
-      <Item Name="[error]" Condition="HasError">*((error_type *)ErrorStorage.buffer)</Item>
-    </Expand>
-  </Type>
-
-
-  <!-- Since we're in MSVC, we can assume that the system is little endian.  Therefore
-       the little and native cases just require a cast.  Handle this easy case first. Use
-       a wildcard for the second template argument (the endianness), but we will use a
-       specific value of 0 later on for the big endian to give it priority for being a
-       better match.  -->
-  <Type Name="llvm::support::detail::packed_endian_specific_integral&lt;*,*,1&gt;">
-    <DisplayString>{{little endian value = {*(($T1*)(unsigned char *)Value.buffer)} }}</DisplayString>
-    <Expand>
-      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==1">(unsigned char *)Value.buffer,1</Item>
-      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==2">(unsigned char *)Value.buffer,2</Item>
-      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==4">(unsigned char *)Value.buffer,4</Item>
-      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==8">(unsigned char *)Value.buffer,8</Item>
-    </Expand>
-  </Type>
-
-  <!-- Now handle the hard case of big endian.  We need to do the swizzling here, but
-       we need to specialize it based on the size of the value type. -->
-  <Type Name="llvm::support::detail::packed_endian_specific_integral&lt;*,0,1&gt;">
-    <DisplayString Condition="sizeof($T1)==1">{{ big endian value = {*(unsigned char *)Value.buffer} }}</DisplayString>
-    <DisplayString Condition="sizeof($T1)==2">{{ big endian value = {(($T1)(*(unsigned char *)Value.buffer) &lt;&lt; 8)
-                                                                    | ($T1)(*((unsigned char *)Value.buffer+1))} }}</DisplayString>
-    <DisplayString Condition="sizeof($T1)==4">{{ big endian value = {(($T1)(*(unsigned char *)Value.buffer) &lt;&lt; 24)
-                                                                    | (($T1)(*((unsigned char *)Value.buffer+1)) &lt;&lt; 16)
-                                                                    | (($T1)(*((unsigned char *)Value.buffer+2)) &lt;&lt; 8)
-                                                                    |  ($T1)(*((unsigned char *)Value.buffer+3))} }}</DisplayString>
-    <DisplayString Condition="sizeof($T1)==8">{{ big endian value = {(($T1)(*(unsigned char *)Value.buffer) &lt;&lt; 56)
-                                                                    | (($T1)(*((unsigned char *)Value.buffer+1)) &lt;&lt; 48)
-                                                                    | (($T1)(*((unsigned char *)Value.buffer+2)) &lt;&lt; 40)
-                                                                    | (($T1)(*((unsigned char *)Value.buffer+3)) &lt;&lt; 32)
-                                                                    | (($T1)(*((unsigned char *)Value.buffer+4)) &lt;&lt; 24)
-                                                                    | (($T1)(*((unsigned char *)Value.buffer+5)) &lt;&lt; 16)
-                                                                    | (($T1)(*((unsigned char *)Value.buffer+6)) &lt;&lt; 8)
-                                                                    |  ($T1)(*((unsigned char *)Value.buffer+7))} }}</DisplayString>
-    <Expand>
-      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==1">(unsigned char *)Value.buffer,1</Item>
-      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==2">(unsigned char *)Value.buffer,2</Item>
-      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==4">(unsigned char *)Value.buffer,4</Item>
-      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==8">(unsigned char *)Value.buffer,8</Item>
-    </Expand>
-  </Type>
-  <!-- llvm::Type has two fields, SubclassData and ContainedTys, the meaning of which change depending on the TypeID.
-       This visualiser decodes those fields based on the value of ID.
-  -->
-  <Type Name="llvm::Type">
-    <DisplayString>{ID}</DisplayString>
-    <Expand>
-      <Item Name="ID">ID</Item>
-
-      <Item Name="NumBits" Condition="ID == llvm::Type::TypeID::IntegerTyID">SubclassData</Item>
-
-      <Item Name="ReturnType" Condition="ID == llvm::Type::TypeID::FunctionTyID">*ContainedTys</Item>
-      <Synthetic Name="Arguments" Condition="ID == llvm::Type::TypeID::FunctionTyID">
-        <DisplayString>{NumContainedTys - 1}</DisplayString>
-        <Expand>
-        <ArrayItems>
-          <Size>NumContainedTys - 1</Size>
-          <ValuePointer>ContainedTys + 1</ValuePointer>
-        </ArrayItems>
-        </Expand>
-      </Synthetic>
-      <Item Name="IsVarArg" Condition="ID == llvm::Type::TypeID::FunctionTyID">SubclassData == 1</Item>
-
-      <Item Name="HasBody" Condition="ID == llvm::Type::TypeID::StructTyID">(SubclassData 	&amp; llvm::StructType::SCDB_HasBody) != 0</Item>
-      <Item Name="Packed" Condition="ID == llvm::Type::TypeID::StructTyID">(SubclassData 	&amp; llvm::StructType::SCDB_Packed) != 0</Item>
-      <Item Name="IsLiteral" Condition="ID == llvm::Type::TypeID::StructTyID">(SubclassData 	&amp; llvm::StructType::SCDB_IsLiteral) != 0</Item>
-      <Item Name="IsSized" Condition="ID == llvm::Type::TypeID::StructTyID">(SubclassData 	&amp; llvm::StructType::SCDB_IsSized) != 0</Item>
-      <Synthetic Name="Members" Condition="ID == llvm::Type::TypeID::StructTyID">
-        <DisplayString>{NumContainedTys}</DisplayString>
-        <Expand>
-          <ArrayItems>
-            <Size>NumContainedTys</Size>
-            <ValuePointer>ContainedTys</ValuePointer>
-          </ArrayItems>
-        </Expand>
-      </Synthetic>
-
-      <Item Name="ElementType" Condition="ID == llvm::Type::TypeID::ArrayTyID">*ContainedTys</Item>
-      <Item Name="NumElements" Condition="ID == llvm::Type::TypeID::ArrayTyID">((llvm::ArrayType*)this)->NumElements</Item>
-
-      <Item Name="ElementType" Condition="ID == llvm::Type::TypeID::FixedVectorTyID">*ContainedTys</Item>
-      <Item Name="NumElements" Condition="ID == llvm::Type::TypeID::FixedVectorTyID">((llvm::VectorType*)this)->ElementQuantity</Item>
-
-      <Item Name="ElementType" Condition="ID == llvm::Type::TypeID::ScalableVectorTyID">*ContainedTys</Item>
-      <Item Name="MinNumElements" Condition="ID == llvm::Type::TypeID::ScalableVectorTyID">((llvm::VectorType*)this)->ElementQuantity</Item>
-
-      <Item Name="AddressSpace" Condition="ID == llvm::Type::TypeID::PointerTyID">SubclassData</Item>
-      <Item Name="PointeeType" Condition="ID == llvm::Type::TypeID::PointerTyID">*ContainedTys</Item>
-
-      <Item Name="Context">Context</Item>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::ConstantSDNode">
-    <DisplayString>$(Type) {*Value}</DisplayString>
-  </Type>
-
-  <Type Name="llvm::SDNode">
-    <DisplayString>$(Type) {(llvm::ISD::NodeType)this->NodeType}</DisplayString>
-    <Expand>
-      <ArrayItems>
-        <Size>NumOperands</Size>
-        <ValuePointer>OperandList</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::ConstantInt">
-    <DisplayString>i{Val.BitWidth} {Val.VAL}</DisplayString>
-  </Type>
-
-  <Type Name="llvm::IntegerType">
-    <DisplayString>{IDAndSubclassData >> 8}bit integer type</DisplayString>
-  </Type>
-
-  <Type Name="llvm::Value">
-    <DisplayString Condition="HasName">$(Type) {*VTy} {this->getName()} {SubclassData}</DisplayString>
-    <DisplayString Condition="!HasName">$(Type) {*VTy} anon {SubclassData}</DisplayString>
-    <Expand>
-      <Item Name="[Inst]" Condition="SubclassID > InstructionVal">(Instruction*)this</Item>
-      <Item Name="Operands">(User*)this</Item>
-      <LinkedListItems>
-        <HeadPointer>UseList</HeadPointer>
-        <NextPointer>Next</NextPointer>
-        <ValueNode>Prev.Value &amp; 3 == 3 ? (User*)(this + 1) : (User*)(this + 2)</ValueNode>
-      </LinkedListItems>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::Use">
-    <Expand>
-      <Item Name="Value">Val</Item>
-      <!--
-      <LinkedListItems>
-        <HeadPointer>this</HeadPointer>
-        <NextPointer>Next</NextPointer>
-        <ValueNode>Prev.Value &amp; 3 == 3 ? (User*)(this + 1) : (User*)(this + 2)</ValueNode>
-      </LinkedListItems>
-      -->
-    </Expand>
-  </Type>
-
-  <!-- uses other values, like Operands -->
-  <Type Name="llvm::User">
-    <DisplayString Condition="HasName">$(Type) {*VTy} {this->getName()} {SubclassData}</DisplayString>
-    <DisplayString Condition="!HasName">$(Type) {*VTy} anon {SubclassData}</DisplayString>
-    <Expand>
-      <Item Name="[Value]">(Value*)this,nd</Item>
-      <Item Name="[Type]">*VTy</Item>
-      <ArrayItems Condition="!HasHungOffUses">
-        <Size>NumUserOperands</Size>
-        <ValuePointer>(llvm::Use*)this - NumUserOperands</ValuePointer>
-      </ArrayItems>
-      <ArrayItems Condition="HasHungOffUses">
-        <Size>NumUserOperands</Size>
-        <ValuePointer>*((llvm::Use**)this - 1)</ValuePointer>
-      </ArrayItems>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::Instruction">
-    <DisplayString>{getOpcodeName(SubclassID - InstructionVal)}</DisplayString>
-    <Expand>
-      <Item Name="[User]">(User*)this,nd</Item>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::GlobalValue">
-    <DisplayString>{this->getName()} {(LinkageTypes)Linkage} {(VisibilityTypes)Visibility} {(DLLStorageClassTypes)DllStorageClass} {(llvm::GlobalValue::ThreadLocalMode) ThreadLocal}</DisplayString>
-  </Type>
-
-  <!-- TODO doesn't work cause it doesn't know the dynamic type -->
-  <Type Name="llvm::ilist_node">
-    <Expand>
-      <LinkedListItems>
-        <HeadPointer>this</HeadPointer>
-        <NextPointer>Next</NextPointer>
-        <ValueNode>this</ValueNode>
-      </LinkedListItems>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::LLVMContext">
-    <Expand>
-      <ExpandedItem>pImpl</ExpandedItem>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::Module">
-    <DisplayString>{ModuleID,s8} {TargetTriple}</DisplayString>
-  </Type>
-
-  <Type Name="llvm::Pass">
-    <DisplayString>$(Type) {PassID} {Kind}</DisplayString>
-  </Type>
-</AutoVisualizer>
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+Visual Studio Native Debugging Visualizers for LLVM
+
+For Visual Studio 2013 only, put this file into
+"%USERPROFILE%\Documents\Visual Studio 2013\Visualizers" or create a symbolic link so it updates automatically.
+
+For later versions of Visual Studio, no setup is required.
+-->
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+  <Type Name="llvm::SmallVectorImpl&lt;*&gt;">
+    <DisplayString Condition="Size == 0">empty</DisplayString>
+    <DisplayString Condition="Size &amp;&amp; Size &lt; 4">{(value_type*)BeginX,[Size]}</DisplayString>
+    <DisplayString Condition="Size &gt; 3">{Size} elements</DisplayString>
+    <DisplayString>Uninitialized</DisplayString>
+    <Expand>
+      <Item Name="[size]">Size</Item>
+      <Item Name="[capacity]">Capacity</Item>
+      <ArrayItems>
+        <Size>Size</Size>
+        <ValuePointer>(value_type*)BeginX</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+  <Type Name="llvm::APInt">
+    <!-- For now, only handle up to 64-bit unsigned ints -->
+    <DisplayString Condition="BitWidth &lt;= 64">{U.VAL}</DisplayString>
+    <DisplayString>Cannot visualize APInts longer than 64 bits</DisplayString>
+  </Type>
+  <Type Name="llvm::ArrayRef&lt;*&gt;">
+    <DisplayString Condition="Length &lt; 4">{Data,[Length]}</DisplayString>
+    <DisplayString Condition="Length &gt; 3">{Length} elements</DisplayString>
+    <DisplayString>Uninitialized</DisplayString>
+    <Expand>
+      <Item Name="[size]">Length</Item>
+      <ArrayItems>
+        <Size>Length</Size>
+        <ValuePointer>Data</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+  <Type Name="llvm::SmallString&lt;*&gt;">
+    <DisplayString>{(const char*)BeginX,[Size]s8}</DisplayString>
+    <StringView>(const char*)BeginX,[Size]</StringView>
+    <Expand>
+      <Item Name="[size]">Size</Item>
+      <Item Name="[capacity]">Capacity</Item>
+      <ArrayItems>
+        <Size>Size</Size>
+        <ValuePointer>(char*)BeginX</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="StringView">
+    <DisplayString>{First,[Last - First]s8}</DisplayString>
+  </Type>
+
+  <Type Name="llvm::StringRef">
+    <DisplayString>{Data,[Length]s8}</DisplayString>
+    <StringView>Data,[Length]s8</StringView>
+    <Expand>
+      <Item Name="[size]">Length</Item>
+      <ArrayItems>
+        <Size>Length</Size>
+        <ValuePointer>Data</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::PunnedPointer">
+    <DisplayString>{($T1)*(intptr_t *)Data}</DisplayString>
+  </Type>
+
+  <!-- PointerIntPair. In addition to the regular view, it is possible to view
+       just the pointer or just the int. The same code is duplicated from the
+       regular viewer to improve the performance of the common case. Note, we
+       need to specify PointerIntPair<PointerUnion<*>, *> differently because
+       we need to "look through" the PointerUnion to display it. Otherwise, we
+       get errors about ambiguous conversion from uintptr_t to PointerUnion.-->
+  <Type Name="llvm::PointerIntPair&lt;llvm::PointerUnion&lt;*&gt;, *&gt;">
+    <!-- $T1 is the parameter pack of PointerUnion, $T3 is IntBits,
+         $T4 is IntType, $T5 is PtrTraits, and $T6 is Info. -->
+    <DisplayString IncludeView="ptr">{($T1)(*(intptr_t *)Value.Data &amp; $T6::PointerBitMask)}</DisplayString>
+    <DisplayString IncludeView="int">{($T4)((*(intptr_t *)Value.Data &gt;&gt; $T6::IntShift) &amp; $T6::IntMask)}</DisplayString>
+    <DisplayString>{$T6::IntMask}: {($T1)(*(intptr_t *)Value.Data &amp; $T6::PointerBitMask)} [{($T4)((*(intptr_t *)Value.Data &gt;&gt; $T6::IntShift) &amp; $T6::IntMask)}]</DisplayString>
+    <Expand>
+      <Item Name="[ptr]">($T1)(*(intptr_t *)Value.Data &amp; $T6::PointerBitMask)</Item>
+      <Item Name="[int]">($T4)((*(intptr_t *)Value.Data &gt;&gt; $T6::IntShift) &amp; $T6::IntMask)</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::PointerIntPair&lt;*&gt;">
+    <DisplayString IncludeView="ptr">{($T1)(*(intptr_t *)Value.Data &amp; $T5::PointerBitMask)}</DisplayString>
+    <DisplayString IncludeView="int">{((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)}</DisplayString>
+    <DisplayString>{$T5::IntMask}: {($T1)(*(intptr_t *)Value.Data &amp; $T5::PointerBitMask)} [{((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)}]</DisplayString>
+    <Expand>
+      <Item Name="[ptr]">($T1)(*(intptr_t *)Value.Data &amp; $T5::PointerBitMask)</Item>
+      <Item Name="[int]">((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)</Item>
+    </Expand>
+  </Type>
+  <!-- PointerUnion types -->
+  <Type Name="llvm::pointer_union_detail::PointerUnionMembers&lt;*&gt;">
+    <DisplayString Optional="true" Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 0">
+      {($T4)(*(intptr_t *)Val.Value.Data &amp; $T2::InfoTy::PointerBitMask)}
+    </DisplayString>
+    <DisplayString Optional="true" Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 1">
+      {($T5)(*(intptr_t *)Val.Value.Data &amp; $T2::InfoTy::PointerBitMask)}
+    </DisplayString>
+    <DisplayString>Unexpected index in PointerUnion: {(*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask}</DisplayString>
+    <Expand>
+      <Item Name="[Holds]" Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 0">"$T4",s8b</Item>
+      <Item Name="[Ptr]" Optional="true"  Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 0">
+        ($T4)(*(intptr_t *)Val.Value.Data &amp; $T2::InfoTy::PointerBitMask)
+      </Item>
+      <Item Name="[Holds]" Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 1">"$T5",s8b</Item>
+      <Item Name="[Ptr]" Optional="true"  Condition="((*(intptr_t *)Val.Value.Data&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 1">
+        ($T5)(*(intptr_t *)Val.Value.Data &amp; $T2::InfoTy::PointerBitMask)
+      </Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::iplist&lt;*,*&gt;">
+    <DisplayString Condition="Head == 0">{{ empty }}</DisplayString>
+    <DisplayString Condition="Head != 0">{{ head={Head} }}</DisplayString>
+    <Expand>
+      <LinkedListItems>
+        <HeadPointer>Head</HeadPointer>
+        <NextPointer>Next</NextPointer>
+        <ValueNode>this</ValueNode>
+      </LinkedListItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::IntrusiveRefCntPtr&lt;*&gt;">
+    <DisplayString Condition="Obj == 0">empty</DisplayString>
+    <DisplayString Condition="(Obj != 0) &amp;&amp; (Obj-&gt;RefCount == 1)">RefPtr [1 ref] {*Obj}</DisplayString>
+    <DisplayString Condition="(Obj != 0) &amp;&amp; (Obj-&gt;RefCount != 1)">RefPtr [{Obj-&gt;RefCount} refs] {*Obj}</DisplayString>
+    <Expand>
+      <Item Condition="Obj != 0" Name="[refs]">Obj-&gt;RefCount</Item>
+      <ExpandedItem Condition="Obj != 0">Obj</ExpandedItem>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::SmallPtrSet&lt;*,*&gt;">
+    <DisplayString Condition="CurArray == SmallArray">{{ [Small Mode] size={NumNonEmpty}, capacity={CurArraySize} }}</DisplayString>
+    <DisplayString Condition="CurArray != SmallArray">{{ [Big Mode] size={NumNonEmpty}, capacity={CurArraySize} }}</DisplayString>
+    <Expand>
+      <Item Name="[size]">NumNonEmpty</Item>
+      <Item Name="[capacity]">CurArraySize</Item>
+      <ArrayItems>
+        <Size>NumNonEmpty</Size>
+        <ValuePointer>($T1*)CurArray</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::DenseMap&lt;*,*,*&gt;">
+    <DisplayString Condition="NumEntries == 0">empty</DisplayString>
+    <DisplayString Condition="NumEntries != 0">{{ size={NumEntries}, buckets={NumBuckets} }}</DisplayString>
+    <Expand>
+      <Item Name="[size]">NumEntries</Item>
+      <Item Name="[buckets]">NumBuckets</Item>
+      <ArrayItems>
+        <Size>NumBuckets</Size>
+        <ValuePointer>Buckets</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::StringMap&lt;*,*&gt;">
+    <DisplayString>{{ size={NumItems}, buckets={NumBuckets} }}</DisplayString>
+    <Expand>
+      <Item Name="[size]">NumItems</Item>
+      <Item Name="[buckets]">NumBuckets</Item>
+      <ArrayItems>
+        <Size>NumBuckets</Size>
+        <ValuePointer>(MapEntryTy**)TheTable</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::StringMapEntry&lt;*&gt;">
+    <DisplayString Condition="keyLength == 0">empty</DisplayString>
+    <DisplayString Condition="keyLength != 0">({this+1,s8}, {second})</DisplayString>
+    <Expand>
+      <Item Name="[key]">this+1,s</Item>
+      <Item Name="[value]" Condition="keyLength != 0">second</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::Triple">
+    <DisplayString>{Data}</DisplayString>
+  </Type>
+
+  <Type Name="llvm::Optional&lt;*&gt;">
+    <DisplayString Condition="!Storage.hasVal">None</DisplayString>
+    <DisplayString Condition="Storage.hasVal">{Storage.value}</DisplayString>
+    <Expand>
+      <Item Name="[underlying]" Condition="Storage.hasVal">Storage.value</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::Expected&lt;*&gt;">
+    <DisplayString Condition="HasError">Error</DisplayString>
+    <DisplayString Condition="!HasError">{*((storage_type *)TStorage.buffer)}</DisplayString>
+    <Expand>
+      <Item Name="[value]" Condition="!HasError">*((storage_type *)TStorage.buffer)</Item>
+      <Item Name="[error]" Condition="HasError">*((error_type *)ErrorStorage.buffer)</Item>
+    </Expand>
+  </Type>
+
+
+  <!-- Since we're in MSVC, we can assume that the system is little endian.  Therefore
+       the little and native cases just require a cast.  Handle this easy case first. Use
+       a wildcard for the second template argument (the endianness), but we will use a
+       specific value of 0 later on for the big endian to give it priority for being a
+       better match.  -->
+  <Type Name="llvm::support::detail::packed_endian_specific_integral&lt;*,*,1&gt;">
+    <DisplayString>{{little endian value = {*(($T1*)(unsigned char *)Value.buffer)} }}</DisplayString>
+    <Expand>
+      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==1">(unsigned char *)Value.buffer,1</Item>
+      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==2">(unsigned char *)Value.buffer,2</Item>
+      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==4">(unsigned char *)Value.buffer,4</Item>
+      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==8">(unsigned char *)Value.buffer,8</Item>
+    </Expand>
+  </Type>
+
+  <!-- Now handle the hard case of big endian.  We need to do the swizzling here, but
+       we need to specialize it based on the size of the value type. -->
+  <Type Name="llvm::support::detail::packed_endian_specific_integral&lt;*,0,1&gt;">
+    <DisplayString Condition="sizeof($T1)==1">{{ big endian value = {*(unsigned char *)Value.buffer} }}</DisplayString>
+    <DisplayString Condition="sizeof($T1)==2">{{ big endian value = {(($T1)(*(unsigned char *)Value.buffer) &lt;&lt; 8)
+                                                                    | ($T1)(*((unsigned char *)Value.buffer+1))} }}</DisplayString>
+    <DisplayString Condition="sizeof($T1)==4">{{ big endian value = {(($T1)(*(unsigned char *)Value.buffer) &lt;&lt; 24)
+                                                                    | (($T1)(*((unsigned char *)Value.buffer+1)) &lt;&lt; 16)
+                                                                    | (($T1)(*((unsigned char *)Value.buffer+2)) &lt;&lt; 8)
+                                                                    |  ($T1)(*((unsigned char *)Value.buffer+3))} }}</DisplayString>
+    <DisplayString Condition="sizeof($T1)==8">{{ big endian value = {(($T1)(*(unsigned char *)Value.buffer) &lt;&lt; 56)
+                                                                    | (($T1)(*((unsigned char *)Value.buffer+1)) &lt;&lt; 48)
+                                                                    | (($T1)(*((unsigned char *)Value.buffer+2)) &lt;&lt; 40)
+                                                                    | (($T1)(*((unsigned char *)Value.buffer+3)) &lt;&lt; 32)
+                                                                    | (($T1)(*((unsigned char *)Value.buffer+4)) &lt;&lt; 24)
+                                                                    | (($T1)(*((unsigned char *)Value.buffer+5)) &lt;&lt; 16)
+                                                                    | (($T1)(*((unsigned char *)Value.buffer+6)) &lt;&lt; 8)
+                                                                    |  ($T1)(*((unsigned char *)Value.buffer+7))} }}</DisplayString>
+    <Expand>
+      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==1">(unsigned char *)Value.buffer,1</Item>
+      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==2">(unsigned char *)Value.buffer,2</Item>
+      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==4">(unsigned char *)Value.buffer,4</Item>
+      <Item Name="[Raw Bytes]" Condition="sizeof($T1)==8">(unsigned char *)Value.buffer,8</Item>
+    </Expand>
+  </Type>
+  <!-- llvm::Type has two fields, SubclassData and ContainedTys, the meaning of which change depending on the TypeID.
+       This visualiser decodes those fields based on the value of ID.
+  -->
+  <Type Name="llvm::Type">
+    <DisplayString>{ID}</DisplayString>
+    <Expand>
+      <Item Name="ID">ID</Item>
+
+      <Item Name="NumBits" Condition="ID == llvm::Type::TypeID::IntegerTyID">SubclassData</Item>
+
+      <Item Name="ReturnType" Condition="ID == llvm::Type::TypeID::FunctionTyID">*ContainedTys</Item>
+      <Synthetic Name="Arguments" Condition="ID == llvm::Type::TypeID::FunctionTyID">
+        <DisplayString>{NumContainedTys - 1}</DisplayString>
+        <Expand>
+        <ArrayItems>
+          <Size>NumContainedTys - 1</Size>
+          <ValuePointer>ContainedTys + 1</ValuePointer>
+        </ArrayItems>
+        </Expand>
+      </Synthetic>
+      <Item Name="IsVarArg" Condition="ID == llvm::Type::TypeID::FunctionTyID">SubclassData == 1</Item>
+
+      <Item Name="HasBody" Condition="ID == llvm::Type::TypeID::StructTyID">(SubclassData 	&amp; llvm::StructType::SCDB_HasBody) != 0</Item>
+      <Item Name="Packed" Condition="ID == llvm::Type::TypeID::StructTyID">(SubclassData 	&amp; llvm::StructType::SCDB_Packed) != 0</Item>
+      <Item Name="IsLiteral" Condition="ID == llvm::Type::TypeID::StructTyID">(SubclassData 	&amp; llvm::StructType::SCDB_IsLiteral) != 0</Item>
+      <Item Name="IsSized" Condition="ID == llvm::Type::TypeID::StructTyID">(SubclassData 	&amp; llvm::StructType::SCDB_IsSized) != 0</Item>
+      <Synthetic Name="Members" Condition="ID == llvm::Type::TypeID::StructTyID">
+        <DisplayString>{NumContainedTys}</DisplayString>
+        <Expand>
+          <ArrayItems>
+            <Size>NumContainedTys</Size>
+            <ValuePointer>ContainedTys</ValuePointer>
+          </ArrayItems>
+        </Expand>
+      </Synthetic>
+
+      <Item Name="ElementType" Condition="ID == llvm::Type::TypeID::ArrayTyID">*ContainedTys</Item>
+      <Item Name="NumElements" Condition="ID == llvm::Type::TypeID::ArrayTyID">((llvm::ArrayType*)this)->NumElements</Item>
+
+      <Item Name="ElementType" Condition="ID == llvm::Type::TypeID::FixedVectorTyID">*ContainedTys</Item>
+      <Item Name="NumElements" Condition="ID == llvm::Type::TypeID::FixedVectorTyID">((llvm::VectorType*)this)->ElementQuantity</Item>
+
+      <Item Name="ElementType" Condition="ID == llvm::Type::TypeID::ScalableVectorTyID">*ContainedTys</Item>
+      <Item Name="MinNumElements" Condition="ID == llvm::Type::TypeID::ScalableVectorTyID">((llvm::VectorType*)this)->ElementQuantity</Item>
+
+      <Item Name="AddressSpace" Condition="ID == llvm::Type::TypeID::PointerTyID">SubclassData</Item>
+      <Item Name="PointeeType" Condition="ID == llvm::Type::TypeID::PointerTyID">*ContainedTys</Item>
+
+      <Item Name="Context">Context</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::ConstantSDNode">
+    <DisplayString>$(Type) {*Value}</DisplayString>
+  </Type>
+
+  <Type Name="llvm::SDNode">
+    <DisplayString>$(Type) {(llvm::ISD::NodeType)this->NodeType}</DisplayString>
+    <Expand>
+      <ArrayItems>
+        <Size>NumOperands</Size>
+        <ValuePointer>OperandList</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::ConstantInt">
+    <DisplayString>i{Val.BitWidth} {Val.VAL}</DisplayString>
+  </Type>
+
+  <Type Name="llvm::IntegerType">
+    <DisplayString>{IDAndSubclassData >> 8}bit integer type</DisplayString>
+  </Type>
+
+  <Type Name="llvm::Value">
+    <DisplayString Condition="HasName">$(Type) {*VTy} {this->getName()} {SubclassData}</DisplayString>
+    <DisplayString Condition="!HasName">$(Type) {*VTy} anon {SubclassData}</DisplayString>
+    <Expand>
+      <Item Name="[Inst]" Condition="SubclassID > InstructionVal">(Instruction*)this</Item>
+      <Item Name="Operands">(User*)this</Item>
+      <LinkedListItems>
+        <HeadPointer>UseList</HeadPointer>
+        <NextPointer>Next</NextPointer>
+        <ValueNode>Prev.Value &amp; 3 == 3 ? (User*)(this + 1) : (User*)(this + 2)</ValueNode>
+      </LinkedListItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::Use">
+    <Expand>
+      <Item Name="Value">Val</Item>
+      <!--
+      <LinkedListItems>
+        <HeadPointer>this</HeadPointer>
+        <NextPointer>Next</NextPointer>
+        <ValueNode>Prev.Value &amp; 3 == 3 ? (User*)(this + 1) : (User*)(this + 2)</ValueNode>
+      </LinkedListItems>
+      -->
+    </Expand>
+  </Type>
+
+  <!-- uses other values, like Operands -->
+  <Type Name="llvm::User">
+    <DisplayString Condition="HasName">$(Type) {*VTy} {this->getName()} {SubclassData}</DisplayString>
+    <DisplayString Condition="!HasName">$(Type) {*VTy} anon {SubclassData}</DisplayString>
+    <Expand>
+      <Item Name="[Value]">(Value*)this,nd</Item>
+      <Item Name="[Type]">*VTy</Item>
+      <ArrayItems Condition="!HasHungOffUses">
+        <Size>NumUserOperands</Size>
+        <ValuePointer>(llvm::Use*)this - NumUserOperands</ValuePointer>
+      </ArrayItems>
+      <ArrayItems Condition="HasHungOffUses">
+        <Size>NumUserOperands</Size>
+        <ValuePointer>*((llvm::Use**)this - 1)</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::Instruction">
+    <DisplayString>{getOpcodeName(SubclassID - InstructionVal)}</DisplayString>
+    <Expand>
+      <Item Name="[User]">(User*)this,nd</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::GlobalValue">
+    <DisplayString>{this->getName()} {(LinkageTypes)Linkage} {(VisibilityTypes)Visibility} {(DLLStorageClassTypes)DllStorageClass} {(llvm::GlobalValue::ThreadLocalMode) ThreadLocal}</DisplayString>
+  </Type>
+
+  <!-- TODO doesn't work cause it doesn't know the dynamic type -->
+  <Type Name="llvm::ilist_node">
+    <Expand>
+      <LinkedListItems>
+        <HeadPointer>this</HeadPointer>
+        <NextPointer>Next</NextPointer>
+        <ValueNode>this</ValueNode>
+      </LinkedListItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::LLVMContext">
+    <Expand>
+      <ExpandedItem>pImpl</ExpandedItem>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::Module">
+    <DisplayString>{ModuleID,s8} {TargetTriple}</DisplayString>
+  </Type>
+
+  <Type Name="llvm::Pass">
+    <DisplayString>$(Type) {PassID} {Kind}</DisplayString>
+  </Type>
+</AutoVisualizer>
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn
index 85dfd7738c17..d54b12e3a20d 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn
@@ -19,6 +19,7 @@ static_library("Vectorize") {
     "SandboxVectorizer/Passes/RegionsFromMetadata.cpp",
     "SandboxVectorizer/SandboxVectorizer.cpp",
     "SandboxVectorizer/SandboxVectorizerPassBuilder.cpp",
+    "SandboxVectorizer/Scheduler.cpp",
     "SandboxVectorizer/SeedCollector.cpp",
     "VPlan.cpp",
     "VPlanAnalysis.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn
index a01525a0c80b..44640c6527c9 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn
@@ -13,5 +13,6 @@ unittest("SandboxVectorizerTests") {
     "DependencyGraphTest.cpp",
     "IntervalTest.cpp",
     "LegalityTest.cpp",
+    "SchedulerTest.cpp",
   ]
 }
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/.gitattributes b/llvm/utils/lit/tests/Inputs/shtest-shell/.gitattributes
deleted file mode 100644
index 2df17345df5b..000000000000
--- a/llvm/utils/lit/tests/Inputs/shtest-shell/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-*.dos text eol=crlf
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.dos b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.dos
index 0f25621c787e..7a0560654c5c 100644
--- a/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.dos
+++ b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.dos
@@ -1,3 +1,3 @@
-In this file, the
-sequence "\r\n"
-terminates lines.
+In this file, the
+sequence "\r\n"
+terminates lines.
diff --git a/llvm/utils/release/build_llvm_release.bat b/llvm/utils/release/build_llvm_release.bat
index 3718673ae7a2..dd041d7d384e 100755
--- a/llvm/utils/release/build_llvm_release.bat
+++ b/llvm/utils/release/build_llvm_release.bat
@@ -1,515 +1,515 @@
-@echo off
-setlocal enabledelayedexpansion
-
-goto begin
-
-:usage
-echo Script for building the LLVM installer on Windows,
-echo used for the releases at https://github.com/llvm/llvm-project/releases
-echo.
-echo Usage: build_llvm_release.bat --version ^<version^> [--x86,--x64, --arm64] [--skip-checkout] [--local-python]
-echo.
-echo Options:
-echo --version: [required] version to build
-echo --help: display this help
-echo --x86: build and test x86 variant
-echo --x64: build and test x64 variant
-echo --arm64: build and test arm64 variant
-echo --skip-checkout: use local git checkout instead of downloading src.zip
-echo --local-python: use installed Python and does not try to use a specific version (3.10)
-echo.
-echo Note: At least one variant to build is required.
-echo.
-echo Example: build_llvm_release.bat --version 15.0.0 --x86 --x64
-exit /b 1
-
-:begin
-
-::==============================================================================
-:: parse args
-set version=
-set help=
-set x86=
-set x64=
-set arm64=
-set skip-checkout=
-set local-python=
-call :parse_args %*
-
-if "%help%" NEQ "" goto usage
-
-if "%version%" == "" (
-    echo --version option is required
-    echo =============================
-    goto usage
-)
-
-if "%arm64%" == "" if "%x64%" == "" if "%x86%" == "" (
-    echo nothing to build!
-    echo choose one or several variants from: --x86 --x64 --arm64
-    exit /b 1
-)
-
-::==============================================================================
-:: check prerequisites
-REM Note:
-REM   7zip versions 21.x and higher will try to extract the symlinks in
-REM   llvm's git archive, which requires running as administrator.
-
-REM Check 7-zip version and/or administrator permissions.
-for /f "delims=" %%i in ('7z.exe ^| findstr /r "2[1-9].[0-9][0-9]"') do set version_7z=%%i
-if not "%version_7z%"=="" (
-  REM Unique temporary filename to use by the 'mklink' command.
-  set "link_name=%temp%\%username%_%random%_%random%.tmp"
-
-  REM As the 'mklink' requires elevated permissions, the symbolic link
-  REM creation will fail if the script is not running as administrator.
-  mklink /d "!link_name!" . 1>nul 2>nul
-  if errorlevel 1 (
-    echo.
-    echo Script requires administrator permissions, or a 7-zip version 20.x or older.
-    echo Current version is "%version_7z%"
-    exit /b 1
-  ) else (
-    REM Remove the temporary symbolic link.
-    rd "!link_name!"
-  )
-)
-
-REM Prerequisites:
-REM
-REM   Visual Studio 2019, CMake, Ninja, GNUWin32, SWIG, Python 3,
-REM   NSIS with the strlen_8192 patch,
-REM   Perl (for the OpenMP run-time).
-REM
-REM
-REM   For LLDB, SWIG version 4.1.1 should be used.
-REM
-
-:: Detect Visual Studio
-set vsinstall=
-set vswhere=%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe
-
-if "%VSINSTALLDIR%" NEQ "" (
-  echo using enabled Visual Studio installation
-  set "vsinstall=%VSINSTALLDIR%"
-) else (
-  echo using vswhere to detect Visual Studio installation
-  FOR /F "delims=" %%r IN ('^""%vswhere%" -nologo -latest -products "*" -all -property installationPath^"') DO set vsinstall=%%r
-)
-set "vsdevcmd=%vsinstall%\Common7\Tools\VsDevCmd.bat"
-
-if not exist "%vsdevcmd%" (
-  echo Can't find any installation of Visual Studio
-  exit /b 1
-)
-echo Using VS devcmd: %vsdevcmd%
-
-::==============================================================================
-:: start echoing what we do
-@echo on
-
-set python32_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python310-32
-set python64_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python310
-set pythonarm64_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python311-arm64
-
-set revision=llvmorg-%version%
-set package_version=%version%
-set build_dir=%cd%\llvm_package_%package_version%
-
-echo Revision: %revision%
-echo Package version: %package_version%
-echo Build dir: %build_dir%
-echo.
-
-if exist %build_dir% (
-  echo Build directory already exists: %build_dir%
-  exit /b 1
-)
-mkdir %build_dir%
-cd %build_dir% || exit /b 1
-
-if "%skip-checkout%" == "true" (
-  echo Using local source
-  set llvm_src=%~dp0..\..\..
-) else (
-  echo Checking out %revision%
-  curl -L https://github.com/llvm/llvm-project/archive/%revision%.zip -o src.zip || exit /b 1
-  7z x src.zip || exit /b 1
-  mv llvm-project-* llvm-project || exit /b 1
-  set llvm_src=%build_dir%\llvm-project
-)
-
-curl -O https://gitlab.gnome.org/GNOME/libxml2/-/archive/v2.9.12/libxml2-v2.9.12.tar.gz || exit /b 1
-tar zxf libxml2-v2.9.12.tar.gz
-
-REM Setting CMAKE_CL_SHOWINCLUDES_PREFIX to work around PR27226.
-REM Common flags for all builds.
-set common_compiler_flags=-DLIBXML_STATIC
-set common_cmake_flags=^
-  -DCMAKE_BUILD_TYPE=Release ^
-  -DLLVM_ENABLE_ASSERTIONS=OFF ^
-  -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON ^
-  -DLLVM_TARGETS_TO_BUILD="AArch64;ARM;X86" ^
-  -DLLVM_BUILD_LLVM_C_DYLIB=ON ^
-  -DCMAKE_INSTALL_UCRT_LIBRARIES=ON ^
-  -DPython3_FIND_REGISTRY=NEVER ^
-  -DPACKAGE_VERSION=%package_version% ^
-  -DLLDB_RELOCATABLE_PYTHON=1 ^
-  -DLLDB_EMBED_PYTHON_HOME=OFF ^
-  -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: " ^
-  -DLLVM_ENABLE_LIBXML2=FORCE_ON ^
-  -DLLDB_ENABLE_LIBXML2=OFF ^
-  -DCLANG_ENABLE_LIBXML2=OFF ^
-  -DCMAKE_C_FLAGS="%common_compiler_flags%" ^
-  -DCMAKE_CXX_FLAGS="%common_compiler_flags%" ^
-  -DLLVM_ENABLE_RPMALLOC=ON ^
-  -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;lld;compiler-rt;lldb;openmp"
-
-set cmake_profile_flags=""
-
-REM Preserve original path
-set OLDPATH=%PATH%
-
-REM Build the 32-bits and/or 64-bits binaries.
-if "%x86%" == "true" call :do_build_32 || exit /b 1
-if "%x64%" == "true" call :do_build_64 || exit /b 1
-if "%arm64%" == "true" call :do_build_arm64 || exit /b 1
-exit /b 0
-
-::==============================================================================
-:: Build 32-bits binaries.
-::==============================================================================
-:do_build_32
-call :set_environment %python32_dir% || exit /b 1
-call "%vsdevcmd%" -arch=x86 || exit /b 1
-@echo on
-mkdir build32_stage0
-cd build32_stage0
-call :do_build_libxml || exit /b 1
-
-REM Stage0 binaries directory; used in stage1.
-set "stage0_bin_dir=%build_dir%/build32_stage0/bin"
-set cmake_flags=^
-  %common_cmake_flags% ^
-  -DLLVM_ENABLE_RPMALLOC=OFF ^
-  -DLLDB_TEST_COMPILER=%stage0_bin_dir%/clang.exe ^
-  -DPYTHON_HOME=%PYTHONHOME% ^
-  -DPython3_ROOT_DIR=%PYTHONHOME% ^
-  -DLIBXML2_INCLUDE_DIR=%libxmldir%/include/libxml2 ^
-  -DLIBXML2_LIBRARIES=%libxmldir%/lib/libxml2s.lib
-
-cmake -GNinja %cmake_flags% %llvm_src%\llvm || exit /b 1
-ninja || ninja || ninja || exit /b 1
-REM ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b 1
-REM ninja check-clang || ninja check-clang || ninja check-clang || exit /b 1
-ninja check-lld || ninja check-lld || ninja check-lld || exit /b 1
-ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b 1
-REM ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b 1
-cd..
-
-REM CMake expects the paths that specifies the compiler and linker to be
-REM with forward slash.
-set all_cmake_flags=^
-  %cmake_flags% ^
-  -DCMAKE_C_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
-  -DCMAKE_CXX_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
-  -DCMAKE_LINKER=%stage0_bin_dir%/lld-link.exe ^
-  -DCMAKE_AR=%stage0_bin_dir%/llvm-lib.exe ^
-  -DCMAKE_RC=%stage0_bin_dir%/llvm-windres.exe
-set cmake_flags=%all_cmake_flags:\=/%
-
-mkdir build32
-cd build32
-cmake -GNinja %cmake_flags% %llvm_src%\llvm || exit /b 1
-ninja || ninja || ninja || exit /b 1
-REM ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b 1
-REM ninja check-clang || ninja check-clang || ninja check-clang || exit /b 1
-ninja check-lld || ninja check-lld || ninja check-lld || exit /b 1
-ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b 1
-REM ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b 1
-ninja package || exit /b 1
-cd ..
-
-exit /b 0
-::==============================================================================
-
-::==============================================================================
-:: Build 64-bits binaries.
-::==============================================================================
-:do_build_64
-call :set_environment %python64_dir% || exit /b 1
-call "%vsdevcmd%" -arch=amd64 || exit /b 1
-@echo on
-mkdir build64_stage0
-cd build64_stage0
-call :do_build_libxml || exit /b 1
-
-REM Stage0 binaries directory; used in stage1.
-set "stage0_bin_dir=%build_dir%/build64_stage0/bin"
-set cmake_flags=^
-  %common_cmake_flags% ^
-  -DLLDB_TEST_COMPILER=%stage0_bin_dir%/clang.exe ^
-  -DPYTHON_HOME=%PYTHONHOME% ^
-  -DPython3_ROOT_DIR=%PYTHONHOME% ^
-  -DLIBXML2_INCLUDE_DIR=%libxmldir%/include/libxml2 ^
-  -DLIBXML2_LIBRARIES=%libxmldir%/lib/libxml2s.lib
-
-cmake -GNinja %cmake_flags% %llvm_src%\llvm || exit /b 1
-ninja || ninja || ninja || exit /b 1
-ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b 1
-ninja check-clang || ninja check-clang || ninja check-clang || exit /b 1
-ninja check-lld || ninja check-lld || ninja check-lld || exit /b 1
-ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b 1
-ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b 1
-ninja check-clangd || ninja check-clangd || ninja check-clangd || exit /b 1
-cd..
-
-REM CMake expects the paths that specifies the compiler and linker to be
-REM with forward slash.
-set all_cmake_flags=^
-  %cmake_flags% ^
-  -DCMAKE_C_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
-  -DCMAKE_CXX_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
-  -DCMAKE_LINKER=%stage0_bin_dir%/lld-link.exe ^
-  -DCMAKE_AR=%stage0_bin_dir%/llvm-lib.exe ^
-  -DCMAKE_RC=%stage0_bin_dir%/llvm-windres.exe
-set cmake_flags=%all_cmake_flags:\=/%
-
-
-mkdir build64
-cd build64
-call :do_generate_profile || exit /b 1
-cmake -GNinja %cmake_flags% %cmake_profile_flags% %llvm_src%\llvm || exit /b 1
-ninja || ninja || ninja || exit /b 1
-ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b 1
-ninja check-clang || ninja check-clang || ninja check-clang || exit /b 1
-ninja check-lld || ninja check-lld || ninja check-lld || exit /b 1
-ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b 1
-ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b 1
-ninja check-clangd || ninja check-clangd || ninja check-clangd || exit /b 1
-ninja package || exit /b 1
-
-:: generate tarball with install toolchain only off
-set filename=clang+llvm-%version%-x86_64-pc-windows-msvc
-cmake -GNinja %cmake_flags% %cmake_profile_flags% -DLLVM_INSTALL_TOOLCHAIN_ONLY=OFF ^
-  -DCMAKE_INSTALL_PREFIX=%build_dir%/%filename% ..\llvm-project\llvm || exit /b 1
-ninja install || exit /b 1
-:: check llvm_config is present & returns something
-%build_dir%/%filename%/bin/llvm-config.exe --bindir || exit /b 1
-cd ..
-7z a -ttar -so %filename%.tar %filename% | 7z a -txz -si %filename%.tar.xz
-
-exit /b 0
-::==============================================================================
-
-::==============================================================================
-:: Build arm64 binaries.
-::==============================================================================
-:do_build_arm64
-call :set_environment %pythonarm64_dir% || exit /b 1
-call "%vsdevcmd%" -host_arch=x64 -arch=arm64 || exit /b 1
-@echo on
-mkdir build_arm64_stage0
-cd build_arm64_stage0
-call :do_build_libxml || exit /b 1
-
-REM Stage0 binaries directory; used in stage1.
-set "stage0_bin_dir=%build_dir%/build_arm64_stage0/bin"
-set cmake_flags=^
-  %common_cmake_flags% ^
-  -DCLANG_DEFAULT_LINKER=lld ^
-  -DLIBXML2_INCLUDE_DIR=%libxmldir%/include/libxml2 ^
-  -DLIBXML2_LIBRARIES=%libxmldir%/lib/libxml2s.lib ^
-  -DPython3_ROOT_DIR=%PYTHONHOME% ^
-  -DCOMPILER_RT_BUILD_PROFILE=OFF ^
-  -DCOMPILER_RT_BUILD_SANITIZERS=OFF
-
-REM We need to build stage0 compiler-rt with clang-cl (msvc lacks some builtins).
-cmake -GNinja %cmake_flags% ^
-  -DCMAKE_C_COMPILER=clang-cl.exe ^
-  -DCMAKE_CXX_COMPILER=clang-cl.exe ^
-  %llvm_src%\llvm || exit /b 1
-ninja || exit /b 1
-::ninja check-llvm || exit /b 1
-::ninja check-clang || exit /b 1
-::ninja check-lld || exit /b 1
-::ninja check-sanitizer || exit /b 1
-::ninja check-clang-tools || exit /b 1
-::ninja check-clangd || exit /b 1
-cd..
-
-REM CMake expects the paths that specifies the compiler and linker to be
-REM with forward slash.
-REM CPACK_SYSTEM_NAME is set to have a correct name for installer generated.
-set all_cmake_flags=^
-  %cmake_flags% ^
-  -DCMAKE_C_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
-  -DCMAKE_CXX_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
-  -DCMAKE_LINKER=%stage0_bin_dir%/lld-link.exe ^
-  -DCMAKE_AR=%stage0_bin_dir%/llvm-lib.exe ^
-  -DCMAKE_RC=%stage0_bin_dir%/llvm-windres.exe ^
-  -DCPACK_SYSTEM_NAME=woa64
-set cmake_flags=%all_cmake_flags:\=/%
-
-mkdir build_arm64
-cd build_arm64
-cmake -GNinja %cmake_flags% %llvm_src%\llvm || exit /b 1
-ninja || exit /b 1
-REM Check but do not fail on errors.
-ninja check-lldb
-::ninja check-llvm || exit /b 1
-::ninja check-clang || exit /b 1
-::ninja check-lld || exit /b 1
-::ninja check-sanitizer || exit /b 1
-::ninja check-clang-tools || exit /b 1
-::ninja check-clangd || exit /b 1
-ninja package || exit /b 1
-cd ..
-
-exit /b 0
-::==============================================================================
-::
-::==============================================================================
-:: Set PATH and some environment variables.
-::==============================================================================
-:set_environment
-REM Restore original path
-set PATH=%OLDPATH%
-
-set python_dir=%1
-
-REM Set Python environment
-if "%local-python%" == "true" (
-  FOR /F "delims=" %%i IN ('where python.exe ^| head -1') DO set python_exe=%%i
-  set PYTHONHOME=!python_exe:~0,-11!
-) else (
-  %python_dir%/python.exe --version || exit /b 1
-  set PYTHONHOME=%python_dir%
-)
-set PATH=%PYTHONHOME%;%PATH%
-
-set "VSCMD_START_DIR=%build_dir%"
-
-exit /b 0
-
-::=============================================================================
-
-::==============================================================================
-:: Build libxml.
-::==============================================================================
-:do_build_libxml
-mkdir libxmlbuild
-cd libxmlbuild
-cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install ^
-  -DBUILD_SHARED_LIBS=OFF -DLIBXML2_WITH_C14N=OFF -DLIBXML2_WITH_CATALOG=OFF ^
-  -DLIBXML2_WITH_DEBUG=OFF -DLIBXML2_WITH_DOCB=OFF -DLIBXML2_WITH_FTP=OFF ^
-  -DLIBXML2_WITH_HTML=OFF -DLIBXML2_WITH_HTTP=OFF -DLIBXML2_WITH_ICONV=OFF ^
-  -DLIBXML2_WITH_ICU=OFF -DLIBXML2_WITH_ISO8859X=OFF -DLIBXML2_WITH_LEGACY=OFF ^
-  -DLIBXML2_WITH_LZMA=OFF -DLIBXML2_WITH_MEM_DEBUG=OFF -DLIBXML2_WITH_MODULES=OFF ^
-  -DLIBXML2_WITH_OUTPUT=ON -DLIBXML2_WITH_PATTERN=OFF -DLIBXML2_WITH_PROGRAMS=OFF ^
-  -DLIBXML2_WITH_PUSH=OFF -DLIBXML2_WITH_PYTHON=OFF -DLIBXML2_WITH_READER=OFF ^
-  -DLIBXML2_WITH_REGEXPS=OFF -DLIBXML2_WITH_RUN_DEBUG=OFF -DLIBXML2_WITH_SAX1=OFF ^
-  -DLIBXML2_WITH_SCHEMAS=OFF -DLIBXML2_WITH_SCHEMATRON=OFF -DLIBXML2_WITH_TESTS=OFF ^
-  -DLIBXML2_WITH_THREADS=ON -DLIBXML2_WITH_THREAD_ALLOC=OFF -DLIBXML2_WITH_TREE=ON ^
-  -DLIBXML2_WITH_VALID=OFF -DLIBXML2_WITH_WRITER=OFF -DLIBXML2_WITH_XINCLUDE=OFF ^
-  -DLIBXML2_WITH_XPATH=OFF -DLIBXML2_WITH_XPTR=OFF -DLIBXML2_WITH_ZLIB=OFF ^
-  -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded ^
-  ../../libxml2-v2.9.12 || exit /b 1
-ninja install || exit /b 1
-set libxmldir=%cd%\install
-set "libxmldir=%libxmldir:\=/%"
-cd ..
-exit /b 0
-
-::==============================================================================
-:: Generate a PGO profile.
-::==============================================================================
-:do_generate_profile
-REM Build Clang with instrumentation.
-mkdir instrument
-cd instrument
-cmake -GNinja %cmake_flags% -DLLVM_TARGETS_TO_BUILD=Native ^
-  -DLLVM_BUILD_INSTRUMENTED=IR %llvm_src%\llvm || exit /b 1
-ninja clang || ninja clang || ninja clang || exit /b 1
-set instrumented_clang=%cd:\=/%/bin/clang-cl.exe
-cd ..
-REM Use that to build part of llvm to generate a profile.
-mkdir train
-cd train
-cmake -GNinja %cmake_flags% ^
-  -DCMAKE_C_COMPILER=%instrumented_clang% ^
-  -DCMAKE_CXX_COMPILER=%instrumented_clang% ^
-  -DLLVM_ENABLE_PROJECTS=clang ^
-  -DLLVM_TARGETS_TO_BUILD=Native ^
-  %llvm_src%\llvm || exit /b 1
-REM Drop profiles generated from running cmake; those are not representative.
-del ..\instrument\profiles\*.profraw
-ninja tools/clang/lib/Sema/CMakeFiles/obj.clangSema.dir/Sema.cpp.obj
-cd ..
-set profile=%cd:\=/%/profile.profdata
-%stage0_bin_dir%\llvm-profdata merge -output=%profile% instrument\profiles\*.profraw || exit /b 1
-set common_compiler_flags=%common_compiler_flags% -Wno-backend-plugin
-set cmake_profile_flags=-DLLVM_PROFDATA_FILE=%profile% ^
-  -DCMAKE_C_FLAGS="%common_compiler_flags%" ^
-  -DCMAKE_CXX_FLAGS="%common_compiler_flags%"
-exit /b 0
-
-::=============================================================================
-:: Parse command line arguments.
-:: The format for the arguments is:
-::   Boolean: --option
-::   Value:   --option<separator>value
-::     with <separator> being: space, colon, semicolon or equal sign
-::
-:: Command line usage example:
-::   my-batch-file.bat --build --type=release --version 123
-:: It will create 3 variables:
-::   'build' with the value 'true'
-::   'type' with the value 'release'
-::   'version' with the value '123'
-::
-:: Usage:
-::   set "build="
-::   set "type="
-::   set "version="
-::
-::   REM Parse arguments.
-::   call :parse_args %*
-::
-::   if defined build (
-::     ...
-::   )
-::   if %type%=='release' (
-::     ...
-::   )
-::   if %version%=='123' (
-::     ...
-::   )
-::=============================================================================
-:parse_args
-  set "arg_name="
-  :parse_args_start
-  if "%1" == "" (
-    :: Set a seen boolean argument.
-    if "%arg_name%" neq "" (
-      set "%arg_name%=true"
-    )
-    goto :parse_args_done
-  )
-  set aux=%1
-  if "%aux:~0,2%" == "--" (
-    :: Set a seen boolean argument.
-    if "%arg_name%" neq "" (
-      set "%arg_name%=true"
-    )
-    set "arg_name=%aux:~2,250%"
-  ) else (
-    set "%arg_name%=%1"
-    set "arg_name="
-  )
-  shift
-  goto :parse_args_start
-
-:parse_args_done
-exit /b 0
+@echo off
+setlocal enabledelayedexpansion
+
+goto begin
+
+:usage
+echo Script for building the LLVM installer on Windows,
+echo used for the releases at https://github.com/llvm/llvm-project/releases
+echo.
+echo Usage: build_llvm_release.bat --version ^<version^> [--x86,--x64, --arm64] [--skip-checkout] [--local-python]
+echo.
+echo Options:
+echo --version: [required] version to build
+echo --help: display this help
+echo --x86: build and test x86 variant
+echo --x64: build and test x64 variant
+echo --arm64: build and test arm64 variant
+echo --skip-checkout: use local git checkout instead of downloading src.zip
+echo --local-python: use installed Python and does not try to use a specific version (3.10)
+echo.
+echo Note: At least one variant to build is required.
+echo.
+echo Example: build_llvm_release.bat --version 15.0.0 --x86 --x64
+exit /b 1
+
+:begin
+
+::==============================================================================
+:: parse args
+set version=
+set help=
+set x86=
+set x64=
+set arm64=
+set skip-checkout=
+set local-python=
+call :parse_args %*
+
+if "%help%" NEQ "" goto usage
+
+if "%version%" == "" (
+    echo --version option is required
+    echo =============================
+    goto usage
+)
+
+if "%arm64%" == "" if "%x64%" == "" if "%x86%" == "" (
+    echo nothing to build!
+    echo choose one or several variants from: --x86 --x64 --arm64
+    exit /b 1
+)
+
+::==============================================================================
+:: check prerequisites
+REM Note:
+REM   7zip versions 21.x and higher will try to extract the symlinks in
+REM   llvm's git archive, which requires running as administrator.
+
+REM Check 7-zip version and/or administrator permissions.
+for /f "delims=" %%i in ('7z.exe ^| findstr /r "2[1-9].[0-9][0-9]"') do set version_7z=%%i
+if not "%version_7z%"=="" (
+  REM Unique temporary filename to use by the 'mklink' command.
+  set "link_name=%temp%\%username%_%random%_%random%.tmp"
+
+  REM As the 'mklink' requires elevated permissions, the symbolic link
+  REM creation will fail if the script is not running as administrator.
+  mklink /d "!link_name!" . 1>nul 2>nul
+  if errorlevel 1 (
+    echo.
+    echo Script requires administrator permissions, or a 7-zip version 20.x or older.
+    echo Current version is "%version_7z%"
+    exit /b 1
+  ) else (
+    REM Remove the temporary symbolic link.
+    rd "!link_name!"
+  )
+)
+
+REM Prerequisites:
+REM
+REM   Visual Studio 2019, CMake, Ninja, GNUWin32, SWIG, Python 3,
+REM   NSIS with the strlen_8192 patch,
+REM   Perl (for the OpenMP run-time).
+REM
+REM
+REM   For LLDB, SWIG version 4.1.1 should be used.
+REM
+
+:: Detect Visual Studio
+set vsinstall=
+set vswhere=%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe
+
+if "%VSINSTALLDIR%" NEQ "" (
+  echo using enabled Visual Studio installation
+  set "vsinstall=%VSINSTALLDIR%"
+) else (
+  echo using vswhere to detect Visual Studio installation
+  FOR /F "delims=" %%r IN ('^""%vswhere%" -nologo -latest -products "*" -all -property installationPath^"') DO set vsinstall=%%r
+)
+set "vsdevcmd=%vsinstall%\Common7\Tools\VsDevCmd.bat"
+
+if not exist "%vsdevcmd%" (
+  echo Can't find any installation of Visual Studio
+  exit /b 1
+)
+echo Using VS devcmd: %vsdevcmd%
+
+::==============================================================================
+:: start echoing what we do
+@echo on
+
+set python32_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python310-32
+set python64_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python310
+set pythonarm64_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python311-arm64
+
+set revision=llvmorg-%version%
+set package_version=%version%
+set build_dir=%cd%\llvm_package_%package_version%
+
+echo Revision: %revision%
+echo Package version: %package_version%
+echo Build dir: %build_dir%
+echo.
+
+if exist %build_dir% (
+  echo Build directory already exists: %build_dir%
+  exit /b 1
+)
+mkdir %build_dir%
+cd %build_dir% || exit /b 1
+
+if "%skip-checkout%" == "true" (
+  echo Using local source
+  set llvm_src=%~dp0..\..\..
+) else (
+  echo Checking out %revision%
+  curl -L https://github.com/llvm/llvm-project/archive/%revision%.zip -o src.zip || exit /b 1
+  7z x src.zip || exit /b 1
+  mv llvm-project-* llvm-project || exit /b 1
+  set llvm_src=%build_dir%\llvm-project
+)
+
+curl -O https://gitlab.gnome.org/GNOME/libxml2/-/archive/v2.9.12/libxml2-v2.9.12.tar.gz || exit /b 1
+tar zxf libxml2-v2.9.12.tar.gz
+
+REM Setting CMAKE_CL_SHOWINCLUDES_PREFIX to work around PR27226.
+REM Common flags for all builds.
+set common_compiler_flags=-DLIBXML_STATIC
+set common_cmake_flags=^
+  -DCMAKE_BUILD_TYPE=Release ^
+  -DLLVM_ENABLE_ASSERTIONS=OFF ^
+  -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON ^
+  -DLLVM_TARGETS_TO_BUILD="AArch64;ARM;X86" ^
+  -DLLVM_BUILD_LLVM_C_DYLIB=ON ^
+  -DCMAKE_INSTALL_UCRT_LIBRARIES=ON ^
+  -DPython3_FIND_REGISTRY=NEVER ^
+  -DPACKAGE_VERSION=%package_version% ^
+  -DLLDB_RELOCATABLE_PYTHON=1 ^
+  -DLLDB_EMBED_PYTHON_HOME=OFF ^
+  -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: " ^
+  -DLLVM_ENABLE_LIBXML2=FORCE_ON ^
+  -DLLDB_ENABLE_LIBXML2=OFF ^
+  -DCLANG_ENABLE_LIBXML2=OFF ^
+  -DCMAKE_C_FLAGS="%common_compiler_flags%" ^
+  -DCMAKE_CXX_FLAGS="%common_compiler_flags%" ^
+  -DLLVM_ENABLE_RPMALLOC=ON ^
+  -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;lld;compiler-rt;lldb;openmp"
+
+set cmake_profile_flags=""
+
+REM Preserve original path
+set OLDPATH=%PATH%
+
+REM Build the 32-bits and/or 64-bits binaries.
+if "%x86%" == "true" call :do_build_32 || exit /b 1
+if "%x64%" == "true" call :do_build_64 || exit /b 1
+if "%arm64%" == "true" call :do_build_arm64 || exit /b 1
+exit /b 0
+
+::==============================================================================
+:: Build 32-bits binaries.
+::==============================================================================
+:do_build_32
+call :set_environment %python32_dir% || exit /b 1
+call "%vsdevcmd%" -arch=x86 || exit /b 1
+@echo on
+mkdir build32_stage0
+cd build32_stage0
+call :do_build_libxml || exit /b 1
+
+REM Stage0 binaries directory; used in stage1.
+set "stage0_bin_dir=%build_dir%/build32_stage0/bin"
+set cmake_flags=^
+  %common_cmake_flags% ^
+  -DLLVM_ENABLE_RPMALLOC=OFF ^
+  -DLLDB_TEST_COMPILER=%stage0_bin_dir%/clang.exe ^
+  -DPYTHON_HOME=%PYTHONHOME% ^
+  -DPython3_ROOT_DIR=%PYTHONHOME% ^
+  -DLIBXML2_INCLUDE_DIR=%libxmldir%/include/libxml2 ^
+  -DLIBXML2_LIBRARIES=%libxmldir%/lib/libxml2s.lib
+
+cmake -GNinja %cmake_flags% %llvm_src%\llvm || exit /b 1
+ninja || ninja || ninja || exit /b 1
+REM ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b 1
+REM ninja check-clang || ninja check-clang || ninja check-clang || exit /b 1
+ninja check-lld || ninja check-lld || ninja check-lld || exit /b 1
+ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b 1
+REM ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b 1
+cd..
+
+REM CMake expects the paths that specifies the compiler and linker to be
+REM with forward slash.
+set all_cmake_flags=^
+  %cmake_flags% ^
+  -DCMAKE_C_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
+  -DCMAKE_CXX_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
+  -DCMAKE_LINKER=%stage0_bin_dir%/lld-link.exe ^
+  -DCMAKE_AR=%stage0_bin_dir%/llvm-lib.exe ^
+  -DCMAKE_RC=%stage0_bin_dir%/llvm-windres.exe
+set cmake_flags=%all_cmake_flags:\=/%
+
+mkdir build32
+cd build32
+cmake -GNinja %cmake_flags% %llvm_src%\llvm || exit /b 1
+ninja || ninja || ninja || exit /b 1
+REM ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b 1
+REM ninja check-clang || ninja check-clang || ninja check-clang || exit /b 1
+ninja check-lld || ninja check-lld || ninja check-lld || exit /b 1
+ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b 1
+REM ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b 1
+ninja package || exit /b 1
+cd ..
+
+exit /b 0
+::==============================================================================
+
+::==============================================================================
+:: Build 64-bits binaries.
+::==============================================================================
+:do_build_64
+call :set_environment %python64_dir% || exit /b 1
+call "%vsdevcmd%" -arch=amd64 || exit /b 1
+@echo on
+mkdir build64_stage0
+cd build64_stage0
+call :do_build_libxml || exit /b 1
+
+REM Stage0 binaries directory; used in stage1.
+set "stage0_bin_dir=%build_dir%/build64_stage0/bin"
+set cmake_flags=^
+  %common_cmake_flags% ^
+  -DLLDB_TEST_COMPILER=%stage0_bin_dir%/clang.exe ^
+  -DPYTHON_HOME=%PYTHONHOME% ^
+  -DPython3_ROOT_DIR=%PYTHONHOME% ^
+  -DLIBXML2_INCLUDE_DIR=%libxmldir%/include/libxml2 ^
+  -DLIBXML2_LIBRARIES=%libxmldir%/lib/libxml2s.lib
+
+cmake -GNinja %cmake_flags% %llvm_src%\llvm || exit /b 1
+ninja || ninja || ninja || exit /b 1
+ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b 1
+ninja check-clang || ninja check-clang || ninja check-clang || exit /b 1
+ninja check-lld || ninja check-lld || ninja check-lld || exit /b 1
+ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b 1
+ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b 1
+ninja check-clangd || ninja check-clangd || ninja check-clangd || exit /b 1
+cd..
+
+REM CMake expects the paths that specifies the compiler and linker to be
+REM with forward slash.
+set all_cmake_flags=^
+  %cmake_flags% ^
+  -DCMAKE_C_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
+  -DCMAKE_CXX_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
+  -DCMAKE_LINKER=%stage0_bin_dir%/lld-link.exe ^
+  -DCMAKE_AR=%stage0_bin_dir%/llvm-lib.exe ^
+  -DCMAKE_RC=%stage0_bin_dir%/llvm-windres.exe
+set cmake_flags=%all_cmake_flags:\=/%
+
+
+mkdir build64
+cd build64
+call :do_generate_profile || exit /b 1
+cmake -GNinja %cmake_flags% %cmake_profile_flags% %llvm_src%\llvm || exit /b 1
+ninja || ninja || ninja || exit /b 1
+ninja check-llvm || ninja check-llvm || ninja check-llvm || exit /b 1
+ninja check-clang || ninja check-clang || ninja check-clang || exit /b 1
+ninja check-lld || ninja check-lld || ninja check-lld || exit /b 1
+ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b 1
+ninja check-clang-tools || ninja check-clang-tools || ninja check-clang-tools || exit /b 1
+ninja check-clangd || ninja check-clangd || ninja check-clangd || exit /b 1
+ninja package || exit /b 1
+
+:: generate tarball with install toolchain only off
+set filename=clang+llvm-%version%-x86_64-pc-windows-msvc
+cmake -GNinja %cmake_flags% %cmake_profile_flags% -DLLVM_INSTALL_TOOLCHAIN_ONLY=OFF ^
+  -DCMAKE_INSTALL_PREFIX=%build_dir%/%filename% ..\llvm-project\llvm || exit /b 1
+ninja install || exit /b 1
+:: check llvm_config is present & returns something
+%build_dir%/%filename%/bin/llvm-config.exe --bindir || exit /b 1
+cd ..
+7z a -ttar -so %filename%.tar %filename% | 7z a -txz -si %filename%.tar.xz
+
+exit /b 0
+::==============================================================================
+
+::==============================================================================
+:: Build arm64 binaries.
+::==============================================================================
+:do_build_arm64
+call :set_environment %pythonarm64_dir% || exit /b 1
+call "%vsdevcmd%" -host_arch=x64 -arch=arm64 || exit /b 1
+@echo on
+mkdir build_arm64_stage0
+cd build_arm64_stage0
+call :do_build_libxml || exit /b 1
+
+REM Stage0 binaries directory; used in stage1.
+set "stage0_bin_dir=%build_dir%/build_arm64_stage0/bin"
+set cmake_flags=^
+  %common_cmake_flags% ^
+  -DCLANG_DEFAULT_LINKER=lld ^
+  -DLIBXML2_INCLUDE_DIR=%libxmldir%/include/libxml2 ^
+  -DLIBXML2_LIBRARIES=%libxmldir%/lib/libxml2s.lib ^
+  -DPython3_ROOT_DIR=%PYTHONHOME% ^
+  -DCOMPILER_RT_BUILD_PROFILE=OFF ^
+  -DCOMPILER_RT_BUILD_SANITIZERS=OFF
+
+REM We need to build stage0 compiler-rt with clang-cl (msvc lacks some builtins).
+cmake -GNinja %cmake_flags% ^
+  -DCMAKE_C_COMPILER=clang-cl.exe ^
+  -DCMAKE_CXX_COMPILER=clang-cl.exe ^
+  %llvm_src%\llvm || exit /b 1
+ninja || exit /b 1
+::ninja check-llvm || exit /b 1
+::ninja check-clang || exit /b 1
+::ninja check-lld || exit /b 1
+::ninja check-sanitizer || exit /b 1
+::ninja check-clang-tools || exit /b 1
+::ninja check-clangd || exit /b 1
+cd..
+
+REM CMake expects the paths that specifies the compiler and linker to be
+REM with forward slash.
+REM CPACK_SYSTEM_NAME is set to have a correct name for installer generated.
+set all_cmake_flags=^
+  %cmake_flags% ^
+  -DCMAKE_C_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
+  -DCMAKE_CXX_COMPILER=%stage0_bin_dir%/clang-cl.exe ^
+  -DCMAKE_LINKER=%stage0_bin_dir%/lld-link.exe ^
+  -DCMAKE_AR=%stage0_bin_dir%/llvm-lib.exe ^
+  -DCMAKE_RC=%stage0_bin_dir%/llvm-windres.exe ^
+  -DCPACK_SYSTEM_NAME=woa64
+set cmake_flags=%all_cmake_flags:\=/%
+
+mkdir build_arm64
+cd build_arm64
+cmake -GNinja %cmake_flags% %llvm_src%\llvm || exit /b 1
+ninja || exit /b 1
+REM Check but do not fail on errors.
+ninja check-lldb
+::ninja check-llvm || exit /b 1
+::ninja check-clang || exit /b 1
+::ninja check-lld || exit /b 1
+::ninja check-sanitizer || exit /b 1
+::ninja check-clang-tools || exit /b 1
+::ninja check-clangd || exit /b 1
+ninja package || exit /b 1
+cd ..
+
+exit /b 0
+::==============================================================================
+::
+::==============================================================================
+:: Set PATH and some environment variables.
+::==============================================================================
+:set_environment
+REM Restore original path
+set PATH=%OLDPATH%
+
+set python_dir=%1
+
+REM Set Python environment
+if "%local-python%" == "true" (
+  FOR /F "delims=" %%i IN ('where python.exe ^| head -1') DO set python_exe=%%i
+  set PYTHONHOME=!python_exe:~0,-11!
+) else (
+  %python_dir%/python.exe --version || exit /b 1
+  set PYTHONHOME=%python_dir%
+)
+set PATH=%PYTHONHOME%;%PATH%
+
+set "VSCMD_START_DIR=%build_dir%"
+
+exit /b 0
+
+::=============================================================================
+
+::==============================================================================
+:: Build libxml.
+::==============================================================================
+:do_build_libxml
+mkdir libxmlbuild
+cd libxmlbuild
+cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install ^
+  -DBUILD_SHARED_LIBS=OFF -DLIBXML2_WITH_C14N=OFF -DLIBXML2_WITH_CATALOG=OFF ^
+  -DLIBXML2_WITH_DEBUG=OFF -DLIBXML2_WITH_DOCB=OFF -DLIBXML2_WITH_FTP=OFF ^
+  -DLIBXML2_WITH_HTML=OFF -DLIBXML2_WITH_HTTP=OFF -DLIBXML2_WITH_ICONV=OFF ^
+  -DLIBXML2_WITH_ICU=OFF -DLIBXML2_WITH_ISO8859X=OFF -DLIBXML2_WITH_LEGACY=OFF ^
+  -DLIBXML2_WITH_LZMA=OFF -DLIBXML2_WITH_MEM_DEBUG=OFF -DLIBXML2_WITH_MODULES=OFF ^
+  -DLIBXML2_WITH_OUTPUT=ON -DLIBXML2_WITH_PATTERN=OFF -DLIBXML2_WITH_PROGRAMS=OFF ^
+  -DLIBXML2_WITH_PUSH=OFF -DLIBXML2_WITH_PYTHON=OFF -DLIBXML2_WITH_READER=OFF ^
+  -DLIBXML2_WITH_REGEXPS=OFF -DLIBXML2_WITH_RUN_DEBUG=OFF -DLIBXML2_WITH_SAX1=OFF ^
+  -DLIBXML2_WITH_SCHEMAS=OFF -DLIBXML2_WITH_SCHEMATRON=OFF -DLIBXML2_WITH_TESTS=OFF ^
+  -DLIBXML2_WITH_THREADS=ON -DLIBXML2_WITH_THREAD_ALLOC=OFF -DLIBXML2_WITH_TREE=ON ^
+  -DLIBXML2_WITH_VALID=OFF -DLIBXML2_WITH_WRITER=OFF -DLIBXML2_WITH_XINCLUDE=OFF ^
+  -DLIBXML2_WITH_XPATH=OFF -DLIBXML2_WITH_XPTR=OFF -DLIBXML2_WITH_ZLIB=OFF ^
+  -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded ^
+  ../../libxml2-v2.9.12 || exit /b 1
+ninja install || exit /b 1
+set libxmldir=%cd%\install
+set "libxmldir=%libxmldir:\=/%"
+cd ..
+exit /b 0
+
+::==============================================================================
+:: Generate a PGO profile.
+::==============================================================================
+:do_generate_profile
+REM Build Clang with instrumentation.
+mkdir instrument
+cd instrument
+cmake -GNinja %cmake_flags% -DLLVM_TARGETS_TO_BUILD=Native ^
+  -DLLVM_BUILD_INSTRUMENTED=IR %llvm_src%\llvm || exit /b 1
+ninja clang || ninja clang || ninja clang || exit /b 1
+set instrumented_clang=%cd:\=/%/bin/clang-cl.exe
+cd ..
+REM Use that to build part of llvm to generate a profile.
+mkdir train
+cd train
+cmake -GNinja %cmake_flags% ^
+  -DCMAKE_C_COMPILER=%instrumented_clang% ^
+  -DCMAKE_CXX_COMPILER=%instrumented_clang% ^
+  -DLLVM_ENABLE_PROJECTS=clang ^
+  -DLLVM_TARGETS_TO_BUILD=Native ^
+  %llvm_src%\llvm || exit /b 1
+REM Drop profiles generated from running cmake; those are not representative.
+del ..\instrument\profiles\*.profraw
+ninja tools/clang/lib/Sema/CMakeFiles/obj.clangSema.dir/Sema.cpp.obj
+cd ..
+set profile=%cd:\=/%/profile.profdata
+%stage0_bin_dir%\llvm-profdata merge -output=%profile% instrument\profiles\*.profraw || exit /b 1
+set common_compiler_flags=%common_compiler_flags% -Wno-backend-plugin
+set cmake_profile_flags=-DLLVM_PROFDATA_FILE=%profile% ^
+  -DCMAKE_C_FLAGS="%common_compiler_flags%" ^
+  -DCMAKE_CXX_FLAGS="%common_compiler_flags%"
+exit /b 0
+
+::=============================================================================
+:: Parse command line arguments.
+:: The format for the arguments is:
+::   Boolean: --option
+::   Value:   --option<separator>value
+::     with <separator> being: space, colon, semicolon or equal sign
+::
+:: Command line usage example:
+::   my-batch-file.bat --build --type=release --version 123
+:: It will create 3 variables:
+::   'build' with the value 'true'
+::   'type' with the value 'release'
+::   'version' with the value '123'
+::
+:: Usage:
+::   set "build="
+::   set "type="
+::   set "version="
+::
+::   REM Parse arguments.
+::   call :parse_args %*
+::
+::   if defined build (
+::     ...
+::   )
+::   if %type%=='release' (
+::     ...
+::   )
+::   if %version%=='123' (
+::     ...
+::   )
+::=============================================================================
+:parse_args
+  set "arg_name="
+  :parse_args_start
+  if "%1" == "" (
+    :: Set a seen boolean argument.
+    if "%arg_name%" neq "" (
+      set "%arg_name%=true"
+    )
+    goto :parse_args_done
+  )
+  set aux=%1
+  if "%aux:~0,2%" == "--" (
+    :: Set a seen boolean argument.
+    if "%arg_name%" neq "" (
+      set "%arg_name%=true"
+    )
+    set "arg_name=%aux:~2,250%"
+  ) else (
+    set "%arg_name%=%1"
+    set "arg_name="
+  )
+  shift
+  goto :parse_args_start
+
+:parse_args_done
+exit /b 0
diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index c6d44908a111..1e80daabddec 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -16,7 +16,7 @@ endif()
 
 # Must go below project(..)
 include(GNUInstallDirs)
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to conform to")
 
 if(MLIR_STANDALONE_BUILD)
   find_package(LLVM CONFIG REQUIRED)
@@ -84,13 +84,20 @@ check_c_compiler_flag("-Werror=mismatched-tags" C_SUPPORTS_WERROR_MISMATCHED_TAG
 append_if(C_SUPPORTS_WERROR_MISMATCHED_TAGS "-Werror=mismatched-tags" CMAKE_C_FLAGS)
 append_if(C_SUPPORTS_WERROR_MISMATCHED_TAGS "-Werror=mismatched-tags" CMAKE_CXX_FLAGS)
 
-# Silence a false positive GCC -Wunused-but-set-parameter warning in constexpr
-# cases, by marking SelectedCase as used. See
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85827 for details. The issue is
-# fixed in GCC 10.
-if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "10.0")
-  check_cxx_compiler_flag("-Wno-unused-but-set-parameter" CXX_SUPPORTS_WNO_UNUSED_BUT_SET_PARAMETER)
-  append_if(CXX_SUPPORTS_WNO_UNUSED_BUT_SET_PARAMETER "-Wno-unused-but-set-parameter" CMAKE_CXX_FLAGS)
+if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+  # Silence a false positive GCC -Wunused-but-set-parameter warning in
+  # constexpr cases. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85827
+  # for details
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "14.0")
+    check_cxx_compiler_flag("-Wno-unused-but-set-parameter" CXX_SUPPORTS_WNO_UNUSED_BUT_SET_PARAMETER)
+    append_if(CXX_SUPPORTS_WNO_UNUSED_BUT_SET_PARAMETER "-Wno-unused-but-set-parameter" CMAKE_CXX_FLAGS)
+  endif()
+  # Silence a false positive GCC -Wdeprecated-copy warning in cases where
+  # a copy operator is defined through "using" a base class copy operator.
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "12.0")
+    check_cxx_compiler_flag("-Wno-deprecated-copy" CXX_SUPPORTS_WNO_DEPRECTAED_COPY)
+    append_if(CXX_SUPPORTS_WNO_DEPRECTAED_COPY "-Wno-deprecated-copy" CMAKE_CXX_FLAGS)
+  endif()
 endif()
 
 # Installing the headers and docs needs to depend on generating any public
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 0915bbde3072..040c04b0410e 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -2294,13 +2294,15 @@ def HoistRedundantVectorTransfersOp :
     function op.
   }];
 
-  let arguments = (ins TransformHandleTypeInterface:$target);
+  let arguments = (ins TransformHandleTypeInterface:$target,
+                   UnitAttr:$verify_non_zero_trip);
   let results = (outs TransformHandleTypeInterface:$transformed);
 
   let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) ";
 
   let builders = [
-    OpBuilder<(ins "Value":$target)>,
+    OpBuilder<(ins "Value":$target,
+               CArg<"bool", "false">:$verify_non_zero_trip)>,
   ];
   let extraClassDeclaration = [{
     ::mlir::DiagnosedSilenceableFailure applyToOne(
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
index 236c2ce7d48e..4edf432d9d97 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
@@ -29,6 +29,9 @@ namespace linalg {
 ///   4. The source operands for vector.transfer_{read|write} do not originate
 ///   from Ops implementing ViewLikeOpInterface (to reduce the risk of
 ///   aliasing).
+///   5. If `verifyNonZeroTrip` is true, then the lower bound of the loop must
+///   be statically smaller than the upper bound of the loop, guaranteeing that
+///   the loop body will execute at least once.
 /// To improve hoisting opportunities, call the `moveLoopInvariantCode` helper
 /// function on the candidate loop above which to hoist. Hoisting the transfers
 /// results in scf::ForOp yielding the value that originally transited through
@@ -41,7 +44,12 @@ namespace linalg {
 ///
 /// WARNING: This hoisting does not model parallelism and is generally incorrect
 /// when used on distributed loops with memref semantics!
-void hoistRedundantVectorTransfers(Operation *root);
+/// NOTE: Setting `verifyNonZeroTrip = true` makes this more stable for
+/// distributed loops with memref semantics, but there could still be some
+/// issues when loops are executed a different number of times for different
+/// threads.
+void hoistRedundantVectorTransfers(Operation *root,
+                                   bool verifyNonZeroTrip = false);
 
 /// Hoist vector.extract/vector.broadcast pairs out of immediately enclosing
 /// scf::ForOp iteratively, if the following conditions are met:
diff --git a/mlir/include/mlir/Dialect/Utils/StaticValueUtils.h b/mlir/include/mlir/Dialect/Utils/StaticValueUtils.h
index ba4f084d3efd..4d7aa1ae17fd 100644
--- a/mlir/include/mlir/Dialect/Utils/StaticValueUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/StaticValueUtils.h
@@ -92,6 +92,12 @@ getConstantIntValues(ArrayRef<OpFoldResult> ofrs);
 
 /// Return true if `ofr` is constant integer equal to `value`.
 bool isConstantIntValue(OpFoldResult ofr, int64_t value);
+/// Return true if all of `ofrs` are constant integers equal to `value`.
+bool areAllConstantIntValue(ArrayRef<OpFoldResult> ofrs, int64_t value);
+/// Return true if all of `ofrs` are constant integers equal to the
+/// corresponding value in `values`.
+bool areConstantIntValues(ArrayRef<OpFoldResult> ofrs,
+                          ArrayRef<int64_t> values);
 
 /// Return true if ofr1 and ofr2 are the same integer constant attribute
 /// values or the same SSA value. Ignore integer bitwitdh and type mismatch
diff --git a/mlir/lib/CAPI/IR/IR.cpp b/mlir/lib/CAPI/IR/IR.cpp
index 5eb531b70aee..e7e6b11c81b9 100644
--- a/mlir/lib/CAPI/IR/IR.cpp
+++ b/mlir/lib/CAPI/IR/IR.cpp
@@ -736,6 +736,7 @@ static mlir::WalkResult unwrap(MlirWalkResult result) {
   case MlirWalkResultSkip:
     return mlir::WalkResult::skip();
   }
+  llvm_unreachable("unknown result in WalkResult::unwrap");
 }
 
 void mlirOperationWalk(MlirOperation op, MlirOperationWalkCallback callback,
diff --git a/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp b/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp
index f1fa411b8291..40a3489f7a4d 100644
--- a/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp
+++ b/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp
@@ -81,6 +81,7 @@ static Operation *createLoadTileSliceIntrinsic(
       break;
     }
   }
+  llvm_unreachable("unknown type in createLoadTileSliceIntrinsic");
 }
 
 /// Helper to create an arm_sme.intr.st1*.(horiz|vert)' intrinsic.
@@ -125,6 +126,7 @@ static Operation *createStoreTileSliceIntrinsic(
           loc, maskOp, ptr, tileId, tileSliceI32);
     }
   }
+  llvm_unreachable("unknown type in createStoreTileSliceIntrinsic");
 }
 
 IntegerAttr getTileIdOrError(arm_sme::ArmSMETileOpInterface op) {
@@ -850,6 +852,7 @@ struct StreamingVLOpConversion
       case arm_sme::TypeSize::Double:
         return rewriter.create<arm_sme::aarch64_sme_cntsd>(loc, i64Type);
       }
+      llvm_unreachable("unknown type size in StreamingVLOpConversion");
     }();
     rewriter.replaceOpWithNewOp<arith::IndexCastOp>(
         streamingVlOp, rewriter.getIndexType(), intrOp->getResult(0));
diff --git a/mlir/lib/Conversion/IndexToSPIRV/IndexToSPIRV.cpp b/mlir/lib/Conversion/IndexToSPIRV/IndexToSPIRV.cpp
index b4cc8324883e..7c441830e1e3 100644
--- a/mlir/lib/Conversion/IndexToSPIRV/IndexToSPIRV.cpp
+++ b/mlir/lib/Conversion/IndexToSPIRV/IndexToSPIRV.cpp
@@ -310,6 +310,7 @@ struct ConvertIndexCmpPattern final : OpConversionPattern<CmpOp> {
     case IndexCmpPredicate::ULT:
       return rewriteCmpOp<spirv::ULessThanOp>(op, adaptor, rewriter);
     }
+    llvm_unreachable("Unknown predicate in ConvertIndexCmpPattern");
   }
 };
 
diff --git a/mlir/lib/Debug/DebuggerExecutionContextHook.cpp b/mlir/lib/Debug/DebuggerExecutionContextHook.cpp
index 744a0380ec71..863113928d5b 100644
--- a/mlir/lib/Debug/DebuggerExecutionContextHook.cpp
+++ b/mlir/lib/Debug/DebuggerExecutionContextHook.cpp
@@ -301,7 +301,7 @@ void mlirDebuggerAddFileLineColLocBreakpoint(const char *file, int line,
 
 LLVM_ATTRIBUTE_NOINLINE void mlirDebuggerBreakpointHook() {
   static LLVM_THREAD_LOCAL void *volatile sink;
-  sink = (void *)&sink;
+  sink = static_cast<void *>(const_cast<void **>(&sink));
 }
 
 static void preventLinkerDeadCodeElim() {
@@ -321,7 +321,7 @@ static void preventLinkerDeadCodeElim() {
     sink = (void *)mlirDebuggerAddTagBreakpoint;
     sink = (void *)mlirDebuggerAddRewritePatternBreakpoint;
     sink = (void *)mlirDebuggerAddFileLineColLocBreakpoint;
-    sink = (void *)&sink;
+    sink = static_cast<void *>(const_cast<void **>(&sink));
     return true;
   }();
   (void)initialized;
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index 63447baa31eb..492e4781f578 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -349,7 +349,7 @@ LogicalResult DPPOp::verify() {
       return emitOpError("quad_perm attribute must have exactly 4 elements");
     }
     for (auto elem : quadPermAttr.getAsRange<IntegerAttr>()) {
-      uint32_t num = elem.getInt();
+      int32_t num = elem.getInt();
       if (num < 0 || num > 3) {
         return emitOpError(
             "Each element of quad_perm must be in the range [0, 3]");
diff --git a/mlir/lib/Dialect/ArmSME/Transforms/TileAllocation.cpp b/mlir/lib/Dialect/ArmSME/Transforms/TileAllocation.cpp
index 3a2042d23e53..84556fbefbc9 100644
--- a/mlir/lib/Dialect/ArmSME/Transforms/TileAllocation.cpp
+++ b/mlir/lib/Dialect/ArmSME/Transforms/TileAllocation.cpp
@@ -137,6 +137,7 @@ static ArrayRef<TileMask> getMasks(ArmSMETileType type) {
   case ArmSMETileType::ZAQ:
     return ZA_Q_MASKS;
   }
+  llvm_unreachable("unknown type in getMasks");
 }
 
 class TileAllocator {
diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp
index 8c3e25355f60..273101ce5f3e 100644
--- a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp
+++ b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp
@@ -141,7 +141,6 @@ struct ParallelComputeFunctionArgs {
   BlockArgument blockSize();
   ArrayRef<BlockArgument> tripCounts();
   ArrayRef<BlockArgument> lowerBounds();
-  ArrayRef<BlockArgument> upperBounds();
   ArrayRef<BlockArgument> steps();
   ArrayRef<BlockArgument> captures();
 
@@ -175,10 +174,6 @@ ArrayRef<BlockArgument> ParallelComputeFunctionArgs::lowerBounds() {
   return args.drop_front(2 + 1 * numLoops).take_front(numLoops);
 }
 
-ArrayRef<BlockArgument> ParallelComputeFunctionArgs::upperBounds() {
-  return args.drop_front(2 + 2 * numLoops).take_front(numLoops);
-}
-
 ArrayRef<BlockArgument> ParallelComputeFunctionArgs::steps() {
   return args.drop_front(2 + 3 * numLoops).take_front(numLoops);
 }
diff --git a/mlir/lib/Dialect/Index/IR/IndexOps.cpp b/mlir/lib/Dialect/Index/IR/IndexOps.cpp
index 42401dae217c..5ad989b7da12 100644
--- a/mlir/lib/Dialect/Index/IR/IndexOps.cpp
+++ b/mlir/lib/Dialect/Index/IR/IndexOps.cpp
@@ -594,6 +594,7 @@ static bool compareSameArgs(IndexCmpPredicate pred) {
   case IndexCmpPredicate::ULT:
     return false;
   }
+  llvm_unreachable("unknown predicate in compareSameArgs");
 }
 
 OpFoldResult CmpOp::fold(FoldAdaptor adaptor) {
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index ad72b5d7becc..1f1d8ad89ae2 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -3558,7 +3558,7 @@ transform::HoistRedundantVectorTransfersOp::applyToOne(
   // WARNING: This hoisting does not model parallelism and is generally
   // incorrect when used on distributed loops with memref semantics!
   // TODO: obsolete and should be retired.
-  linalg::hoistRedundantVectorTransfers(target);
+  linalg::hoistRedundantVectorTransfers(target, getVerifyNonZeroTrip());
   results.push_back(target);
   return DiagnosedSilenceableFailure::success();
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 94f6b6029875..acfd9683f01f 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -199,7 +199,8 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
   return true;
 }
 
-void mlir::linalg::hoistRedundantVectorTransfers(Operation *root) {
+void mlir::linalg::hoistRedundantVectorTransfers(Operation *root,
+                                                 bool verifyNonZeroTrip) {
   bool changed = true;
   while (changed) {
     changed = false;
@@ -208,6 +209,43 @@ void mlir::linalg::hoistRedundantVectorTransfers(Operation *root) {
     root->walk(
         [&](LoopLikeOpInterface loopLike) { moveLoopInvariantCode(loopLike); });
 
+    // Find all loops that are certain to have non zero trip count. Any loops
+    // that are not part of this set cannot be hoisted from, since hoisting from
+    // a potentially zero trip count loop may cause a vector transfer to be
+    // executed when it shouldn't be.
+    llvm::DenseSet<LoopLikeOpInterface> definiteNonZeroTripCountLoops;
+    if (verifyNonZeroTrip) {
+      root->walk([&](LoopLikeOpInterface loopLike) {
+        std::optional<SmallVector<OpFoldResult>> lbs =
+            loopLike.getLoopLowerBounds();
+        std::optional<SmallVector<OpFoldResult>> ubs =
+            loopLike.getLoopUpperBounds();
+        // If loop bounds cannot be found, assume possibly zero trip count.
+        if (!lbs || !ubs)
+          return;
+
+        // Otherwise, use ValueBounds to find the maximum lower bound and
+        // minimum upper bound. If the bounds are found, and maxLb is less
+        // than the minUb, then the loop will not have zero trip count.
+        for (auto [lb, ub] : llvm::zip_equal(lbs.value(), ubs.value())) {
+          FailureOr<int64_t> maxLb =
+              ValueBoundsConstraintSet::computeConstantBound(
+                  presburger::BoundType::UB, lb,
+                  /*stopCondition=*/nullptr, /*closedUB=*/true);
+          if (failed(maxLb))
+            return;
+          FailureOr<int64_t> minUb =
+              ValueBoundsConstraintSet::computeConstantBound(
+                  presburger::BoundType::LB, ub);
+          if (failed(minUb))
+            return;
+          if (minUb.value() <= maxLb.value())
+            return;
+          definiteNonZeroTripCountLoops.insert(loopLike);
+        }
+      });
+    }
+
     root->walk([&](vector::TransferReadOp transferRead) {
       if (!isa<MemRefType>(transferRead.getShapedType()))
         return WalkResult::advance();
@@ -220,6 +258,12 @@ void mlir::linalg::hoistRedundantVectorTransfers(Operation *root) {
       if (!isa_and_nonnull<scf::ForOp, affine::AffineForOp>(loop))
         return WalkResult::advance();
 
+      if (verifyNonZeroTrip && !definiteNonZeroTripCountLoops.contains(loop)) {
+        LLVM_DEBUG(DBGS() << "Loop may have zero trip count: " << *loop
+                          << "\n");
+        return WalkResult::advance();
+      }
+
       LLVM_DEBUG(DBGS() << "Candidate read: " << *transferRead.getOperation()
                         << "\n");
 
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index 87464ccb7172..c2b8614148bf 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -19,6 +19,7 @@
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tensor/Transforms/SubsetInsertionOpInterfaceImpl.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
+#include "mlir/IR/BuiltinTypeInterfaces.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/Operation.h"
 
@@ -636,6 +637,28 @@ struct InsertOpInterface
   }
 };
 
+template <typename InsertOpTy>
+static bool insertSliceOpRequiresRead(InsertOpTy insertSliceOp,
+                                      OpOperand &opOperand) {
+  // The source is always read.
+  if (opOperand == insertSliceOp.getSourceMutable())
+    return true;
+
+  // For the destination, it depends...
+  assert(opOperand == insertSliceOp.getDestMutable() && "expected dest");
+
+  // Dest is not read if it is entirely overwritten. E.g.:
+  // tensor.insert_slice %a into %t[0][10][1] : ... into tensor<10xf32>
+  bool allOffsetsZero =
+      llvm::all_of(insertSliceOp.getMixedOffsets(), isZeroIndex);
+  RankedTensorType destType = insertSliceOp.getDestType();
+  bool sizesMatchDestSizes =
+      areConstantIntValues(insertSliceOp.getMixedSizes(), destType.getShape());
+  bool allStridesOne =
+      areAllConstantIntValue(insertSliceOp.getMixedStrides(), 1);
+  return !(allOffsetsZero && sizesMatchDestSizes && allStridesOne);
+}
+
 /// Bufferization of tensor.insert_slice. Replace with a memory copy. Under
 /// certain circumstances, this op can also be a no-op.
 ///
@@ -646,32 +669,8 @@ struct InsertSliceOpInterface
                                                      tensor::InsertSliceOp> {
   bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
                               const AnalysisState &state) const {
-    auto insertSliceOp = cast<tensor::InsertSliceOp>(op);
-    RankedTensorType destType = insertSliceOp.getDestType();
-
-    // The source is always read.
-    if (opOperand == insertSliceOp.getSourceMutable())
-      return true;
-
-    // For the destination, it depends...
-    assert(opOperand == insertSliceOp.getDestMutable() && "expected dest");
-
-    // Dest is not read if it is entirely overwritten. E.g.:
-    // tensor.insert_slice %a into %t[0][10][1] : ... into tensor<10xf32>
-    bool allOffsetsZero =
-        llvm::all_of(insertSliceOp.getMixedOffsets(), [](OpFoldResult ofr) {
-          return isConstantIntValue(ofr, 0);
-        });
-    bool sizesMatchDestSizes = llvm::all_of(
-        llvm::enumerate(insertSliceOp.getMixedSizes()), [&](const auto &it) {
-          return getConstantIntValue(it.value()) ==
-                 destType.getDimSize(it.index());
-        });
-    bool allStridesOne =
-        llvm::all_of(insertSliceOp.getMixedStrides(), [](OpFoldResult ofr) {
-          return isConstantIntValue(ofr, 1);
-        });
-    return !(allOffsetsZero && sizesMatchDestSizes && allStridesOne);
+    return insertSliceOpRequiresRead(cast<tensor::InsertSliceOp>(op),
+                                     opOperand);
   }
 
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
@@ -931,7 +930,8 @@ struct ParallelInsertSliceOpInterface
 
   bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
                               const AnalysisState &state) const {
-    return true;
+    return insertSliceOpRequiresRead(cast<tensor::ParallelInsertSliceOp>(op),
+                                     opOperand);
   }
 
   bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
diff --git a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
index 995486c87771..3566714c6529 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
@@ -16,11 +16,6 @@ namespace mlir {
 namespace tensor {
 namespace {
 
-static bool areAllConstantIntValue(ArrayRef<OpFoldResult> ofrs, int64_t value) {
-  return llvm::all_of(
-      ofrs, [&](OpFoldResult ofr) { return isConstantIntValue(ofr, value); });
-}
-
 /// Returns the number of shape sizes that is either dynamic or greater than 1.
 static int64_t getNumGtOneDims(ArrayRef<int64_t> shape) {
   return llvm::count_if(
diff --git a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp
index 547d120404ab..3eb6215a7a0b 100644
--- a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp
+++ b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp
@@ -10,6 +10,7 @@
 #include "mlir/IR/Matchers.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/MathExtras.h"
 
 namespace mlir {
@@ -131,12 +132,24 @@ getConstantIntValues(ArrayRef<OpFoldResult> ofrs) {
   return res;
 }
 
-/// Return true if `ofr` is constant integer equal to `value`.
 bool isConstantIntValue(OpFoldResult ofr, int64_t value) {
   auto val = getConstantIntValue(ofr);
   return val && *val == value;
 }
 
+bool areAllConstantIntValue(ArrayRef<OpFoldResult> ofrs, int64_t value) {
+  return llvm::all_of(
+      ofrs, [&](OpFoldResult ofr) { return isConstantIntValue(ofr, value); });
+}
+
+bool areConstantIntValues(ArrayRef<OpFoldResult> ofrs,
+                          ArrayRef<int64_t> values) {
+  if (ofrs.size() != values.size())
+    return false;
+  std::optional<SmallVector<int64_t>> constOfrs = getConstantIntValues(ofrs);
+  return constOfrs && llvm::equal(constOfrs.value(), values);
+}
+
 /// Return true if ofr1 and ofr2 are the same integer constant attribute values
 /// or the same SSA value.
 /// Ignore integer bitwidth and type mismatch that come from the fact there is
diff --git a/mlir/lib/TableGen/AttrOrTypeDef.cpp b/mlir/lib/TableGen/AttrOrTypeDef.cpp
index e72ca155bcf7..9e8f789d71b5 100644
--- a/mlir/lib/TableGen/AttrOrTypeDef.cpp
+++ b/mlir/lib/TableGen/AttrOrTypeDef.cpp
@@ -17,6 +17,12 @@
 
 using namespace mlir;
 using namespace mlir::tblgen;
+using llvm::DefInit;
+using llvm::Init;
+using llvm::ListInit;
+using llvm::Record;
+using llvm::RecordVal;
+using llvm::StringInit;
 
 //===----------------------------------------------------------------------===//
 // AttrOrTypeBuilder
@@ -35,14 +41,13 @@ bool AttrOrTypeBuilder::hasInferredContextParameter() const {
 // AttrOrTypeDef
 //===----------------------------------------------------------------------===//
 
-AttrOrTypeDef::AttrOrTypeDef(const llvm::Record *def) : def(def) {
+AttrOrTypeDef::AttrOrTypeDef(const Record *def) : def(def) {
   // Populate the builders.
-  auto *builderList =
-      dyn_cast_or_null<llvm::ListInit>(def->getValueInit("builders"));
+  const auto *builderList =
+      dyn_cast_or_null<ListInit>(def->getValueInit("builders"));
   if (builderList && !builderList->empty()) {
-    for (const llvm::Init *init : builderList->getValues()) {
-      AttrOrTypeBuilder builder(cast<llvm::DefInit>(init)->getDef(),
-                                def->getLoc());
+    for (const Init *init : builderList->getValues()) {
+      AttrOrTypeBuilder builder(cast<DefInit>(init)->getDef(), def->getLoc());
 
       // Ensure that all parameters have names.
       for (const AttrOrTypeBuilder::Parameter &param :
@@ -56,16 +61,16 @@ AttrOrTypeDef::AttrOrTypeDef(const llvm::Record *def) : def(def) {
 
   // Populate the traits.
   if (auto *traitList = def->getValueAsListInit("traits")) {
-    SmallPtrSet<const llvm::Init *, 32> traitSet;
+    SmallPtrSet<const Init *, 32> traitSet;
     traits.reserve(traitSet.size());
-    llvm::unique_function<void(const llvm::ListInit *)> processTraitList =
-        [&](const llvm::ListInit *traitList) {
+    llvm::unique_function<void(const ListInit *)> processTraitList =
+        [&](const ListInit *traitList) {
           for (auto *traitInit : *traitList) {
             if (!traitSet.insert(traitInit).second)
               continue;
 
             // If this is an interface, add any bases to the trait list.
-            auto *traitDef = cast<llvm::DefInit>(traitInit)->getDef();
+            auto *traitDef = cast<DefInit>(traitInit)->getDef();
             if (traitDef->isSubClassOf("Interface")) {
               if (auto *bases = traitDef->getValueAsListInit("baseInterfaces"))
                 processTraitList(bases);
@@ -111,7 +116,7 @@ AttrOrTypeDef::AttrOrTypeDef(const llvm::Record *def) : def(def) {
 }
 
 Dialect AttrOrTypeDef::getDialect() const {
-  auto *dialect = dyn_cast<llvm::DefInit>(def->getValue("dialect")->getValue());
+  const auto *dialect = dyn_cast<DefInit>(def->getValue("dialect")->getValue());
   return Dialect(dialect ? dialect->getDef() : nullptr);
 }
 
@@ -126,8 +131,8 @@ StringRef AttrOrTypeDef::getCppBaseClassName() const {
 }
 
 bool AttrOrTypeDef::hasDescription() const {
-  const llvm::RecordVal *desc = def->getValue("description");
-  return desc && isa<llvm::StringInit>(desc->getValue());
+  const RecordVal *desc = def->getValue("description");
+  return desc && isa<StringInit>(desc->getValue());
 }
 
 StringRef AttrOrTypeDef::getDescription() const {
@@ -135,8 +140,8 @@ StringRef AttrOrTypeDef::getDescription() const {
 }
 
 bool AttrOrTypeDef::hasSummary() const {
-  const llvm::RecordVal *summary = def->getValue("summary");
-  return summary && isa<llvm::StringInit>(summary->getValue());
+  const RecordVal *summary = def->getValue("summary");
+  return summary && isa<StringInit>(summary->getValue());
 }
 
 StringRef AttrOrTypeDef::getSummary() const {
@@ -249,9 +254,9 @@ StringRef TypeDef::getTypeName() const {
 template <typename InitT>
 auto AttrOrTypeParameter::getDefValue(StringRef name) const {
   std::optional<decltype(std::declval<InitT>().getValue())> result;
-  if (auto *param = dyn_cast<llvm::DefInit>(getDef()))
-    if (auto *init = param->getDef()->getValue(name))
-      if (auto *value = dyn_cast_or_null<InitT>(init->getValue()))
+  if (const auto *param = dyn_cast<DefInit>(getDef()))
+    if (const auto *init = param->getDef()->getValue(name))
+      if (const auto *value = dyn_cast_or_null<InitT>(init->getValue()))
         result = value->getValue();
   return result;
 }
@@ -270,20 +275,20 @@ std::string AttrOrTypeParameter::getAccessorName() const {
 }
 
 std::optional<StringRef> AttrOrTypeParameter::getAllocator() const {
-  return getDefValue<llvm::StringInit>("allocator");
+  return getDefValue<StringInit>("allocator");
 }
 
 StringRef AttrOrTypeParameter::getComparator() const {
-  return getDefValue<llvm::StringInit>("comparator").value_or("$_lhs == $_rhs");
+  return getDefValue<StringInit>("comparator").value_or("$_lhs == $_rhs");
 }
 
 StringRef AttrOrTypeParameter::getCppType() const {
-  if (auto *stringType = dyn_cast<llvm::StringInit>(getDef()))
+  if (auto *stringType = dyn_cast<StringInit>(getDef()))
     return stringType->getValue();
-  auto cppType = getDefValue<llvm::StringInit>("cppType");
+  auto cppType = getDefValue<StringInit>("cppType");
   if (cppType)
     return *cppType;
-  if (auto *init = dyn_cast<llvm::DefInit>(getDef()))
+  if (const auto *init = dyn_cast<DefInit>(getDef()))
     llvm::PrintFatalError(
         init->getDef()->getLoc(),
         Twine("Missing `cppType` field in Attribute/Type parameter: ") +
@@ -295,34 +300,33 @@ StringRef AttrOrTypeParameter::getCppType() const {
 }
 
 StringRef AttrOrTypeParameter::getCppAccessorType() const {
-  return getDefValue<llvm::StringInit>("cppAccessorType")
-      .value_or(getCppType());
+  return getDefValue<StringInit>("cppAccessorType").value_or(getCppType());
 }
 
 StringRef AttrOrTypeParameter::getCppStorageType() const {
-  return getDefValue<llvm::StringInit>("cppStorageType").value_or(getCppType());
+  return getDefValue<StringInit>("cppStorageType").value_or(getCppType());
 }
 
 StringRef AttrOrTypeParameter::getConvertFromStorage() const {
-  return getDefValue<llvm::StringInit>("convertFromStorage").value_or("$_self");
+  return getDefValue<StringInit>("convertFromStorage").value_or("$_self");
 }
 
 std::optional<StringRef> AttrOrTypeParameter::getParser() const {
-  return getDefValue<llvm::StringInit>("parser");
+  return getDefValue<StringInit>("parser");
 }
 
 std::optional<StringRef> AttrOrTypeParameter::getPrinter() const {
-  return getDefValue<llvm::StringInit>("printer");
+  return getDefValue<StringInit>("printer");
 }
 
 std::optional<StringRef> AttrOrTypeParameter::getSummary() const {
-  return getDefValue<llvm::StringInit>("summary");
+  return getDefValue<StringInit>("summary");
 }
 
 StringRef AttrOrTypeParameter::getSyntax() const {
-  if (auto *stringType = dyn_cast<llvm::StringInit>(getDef()))
+  if (auto *stringType = dyn_cast<StringInit>(getDef()))
     return stringType->getValue();
-  return getDefValue<llvm::StringInit>("syntax").value_or(getCppType());
+  return getDefValue<StringInit>("syntax").value_or(getCppType());
 }
 
 bool AttrOrTypeParameter::isOptional() const {
@@ -330,17 +334,14 @@ bool AttrOrTypeParameter::isOptional() const {
 }
 
 std::optional<StringRef> AttrOrTypeParameter::getDefaultValue() const {
-  std::optional<StringRef> result =
-      getDefValue<llvm::StringInit>("defaultValue");
+  std::optional<StringRef> result = getDefValue<StringInit>("defaultValue");
   return result && !result->empty() ? result : std::nullopt;
 }
 
-const llvm::Init *AttrOrTypeParameter::getDef() const {
-  return def->getArg(index);
-}
+const Init *AttrOrTypeParameter::getDef() const { return def->getArg(index); }
 
 std::optional<Constraint> AttrOrTypeParameter::getConstraint() const {
-  if (auto *param = dyn_cast<llvm::DefInit>(getDef()))
+  if (const auto *param = dyn_cast<DefInit>(getDef()))
     if (param->getDef()->isSubClassOf("Constraint"))
       return Constraint(param->getDef());
   return std::nullopt;
@@ -351,8 +352,8 @@ std::optional<Constraint> AttrOrTypeParameter::getConstraint() const {
 //===----------------------------------------------------------------------===//
 
 bool AttributeSelfTypeParameter::classof(const AttrOrTypeParameter *param) {
-  const llvm::Init *paramDef = param->getDef();
-  if (auto *paramDefInit = dyn_cast<llvm::DefInit>(paramDef))
+  const Init *paramDef = param->getDef();
+  if (const auto *paramDefInit = dyn_cast<DefInit>(paramDef))
     return paramDefInit->getDef()->isSubClassOf("AttributeSelfTypeParameter");
   return false;
 }
diff --git a/mlir/lib/TableGen/Attribute.cpp b/mlir/lib/TableGen/Attribute.cpp
index 887553bca661..f9fc58a40f33 100644
--- a/mlir/lib/TableGen/Attribute.cpp
+++ b/mlir/lib/TableGen/Attribute.cpp
@@ -71,7 +71,7 @@ StringRef Attribute::getReturnType() const {
 // Return the type constraint corresponding to the type of this attribute, or
 // std::nullopt if this is not a TypedAttr.
 std::optional<Type> Attribute::getValueType() const {
-  if (auto *defInit = dyn_cast<llvm::DefInit>(def->getValueInit("valueType")))
+  if (const auto *defInit = dyn_cast<DefInit>(def->getValueInit("valueType")))
     return Type(defInit->getDef());
   return std::nullopt;
 }
@@ -92,8 +92,7 @@ StringRef Attribute::getConstBuilderTemplate() const {
 }
 
 Attribute Attribute::getBaseAttr() const {
-  if (const auto *defInit =
-          llvm::dyn_cast<llvm::DefInit>(def->getValueInit("baseAttr"))) {
+  if (const auto *defInit = dyn_cast<DefInit>(def->getValueInit("baseAttr"))) {
     return Attribute(defInit).getBaseAttr();
   }
   return *this;
@@ -132,7 +131,7 @@ Dialect Attribute::getDialect() const {
   return Dialect(nullptr);
 }
 
-const llvm::Record &Attribute::getDef() const { return *def; }
+const Record &Attribute::getDef() const { return *def; }
 
 ConstantAttr::ConstantAttr(const DefInit *init) : def(init->getDef()) {
   assert(def->isSubClassOf("ConstantAttr") &&
@@ -147,12 +146,12 @@ StringRef ConstantAttr::getConstantValue() const {
   return def->getValueAsString("value");
 }
 
-EnumAttrCase::EnumAttrCase(const llvm::Record *record) : Attribute(record) {
+EnumAttrCase::EnumAttrCase(const Record *record) : Attribute(record) {
   assert(isSubClassOf("EnumAttrCaseInfo") &&
          "must be subclass of TableGen 'EnumAttrInfo' class");
 }
 
-EnumAttrCase::EnumAttrCase(const llvm::DefInit *init)
+EnumAttrCase::EnumAttrCase(const DefInit *init)
     : EnumAttrCase(init->getDef()) {}
 
 StringRef EnumAttrCase::getSymbol() const {
@@ -163,16 +162,16 @@ StringRef EnumAttrCase::getStr() const { return def->getValueAsString("str"); }
 
 int64_t EnumAttrCase::getValue() const { return def->getValueAsInt("value"); }
 
-const llvm::Record &EnumAttrCase::getDef() const { return *def; }
+const Record &EnumAttrCase::getDef() const { return *def; }
 
-EnumAttr::EnumAttr(const llvm::Record *record) : Attribute(record) {
+EnumAttr::EnumAttr(const Record *record) : Attribute(record) {
   assert(isSubClassOf("EnumAttrInfo") &&
          "must be subclass of TableGen 'EnumAttr' class");
 }
 
-EnumAttr::EnumAttr(const llvm::Record &record) : Attribute(&record) {}
+EnumAttr::EnumAttr(const Record &record) : Attribute(&record) {}
 
-EnumAttr::EnumAttr(const llvm::DefInit *init) : EnumAttr(init->getDef()) {}
+EnumAttr::EnumAttr(const DefInit *init) : EnumAttr(init->getDef()) {}
 
 bool EnumAttr::classof(const Attribute *attr) {
   return attr->isSubClassOf("EnumAttrInfo");
@@ -218,8 +217,8 @@ std::vector<EnumAttrCase> EnumAttr::getAllCases() const {
   std::vector<EnumAttrCase> cases;
   cases.reserve(inits->size());
 
-  for (const llvm::Init *init : *inits) {
-    cases.emplace_back(cast<llvm::DefInit>(init));
+  for (const Init *init : *inits) {
+    cases.emplace_back(cast<DefInit>(init));
   }
 
   return cases;
@@ -229,7 +228,7 @@ bool EnumAttr::genSpecializedAttr() const {
   return def->getValueAsBit("genSpecializedAttr");
 }
 
-const llvm::Record *EnumAttr::getBaseAttrClass() const {
+const Record *EnumAttr::getBaseAttrClass() const {
   return def->getValueAsDef("baseAttrClass");
 }
 
diff --git a/mlir/lib/TableGen/Builder.cpp b/mlir/lib/TableGen/Builder.cpp
index 044765c72601..a94e1cca5fc5 100644
--- a/mlir/lib/TableGen/Builder.cpp
+++ b/mlir/lib/TableGen/Builder.cpp
@@ -12,6 +12,11 @@
 
 using namespace mlir;
 using namespace mlir::tblgen;
+using llvm::DagInit;
+using llvm::DefInit;
+using llvm::Init;
+using llvm::Record;
+using llvm::StringInit;
 
 //===----------------------------------------------------------------------===//
 // Builder::Parameter
@@ -19,9 +24,9 @@ using namespace mlir::tblgen;
 
 /// Return a string containing the C++ type of this parameter.
 StringRef Builder::Parameter::getCppType() const {
-  if (const auto *stringInit = dyn_cast<llvm::StringInit>(def))
+  if (const auto *stringInit = dyn_cast<StringInit>(def))
     return stringInit->getValue();
-  const llvm::Record *record = cast<llvm::DefInit>(def)->getDef();
+  const Record *record = cast<DefInit>(def)->getDef();
   // Inlining the first part of `Record::getValueAsString` to give better
   // error messages.
   const llvm::RecordVal *type = record->getValue("type");
@@ -35,9 +40,9 @@ StringRef Builder::Parameter::getCppType() const {
 /// Return an optional string containing the default value to use for this
 /// parameter.
 std::optional<StringRef> Builder::Parameter::getDefaultValue() const {
-  if (isa<llvm::StringInit>(def))
+  if (isa<StringInit>(def))
     return std::nullopt;
-  const llvm::Record *record = cast<llvm::DefInit>(def)->getDef();
+  const Record *record = cast<DefInit>(def)->getDef();
   std::optional<StringRef> value =
       record->getValueAsOptionalString("defaultValue");
   return value && !value->empty() ? value : std::nullopt;
@@ -47,18 +52,17 @@ std::optional<StringRef> Builder::Parameter::getDefaultValue() const {
 // Builder
 //===----------------------------------------------------------------------===//
 
-Builder::Builder(const llvm::Record *record, ArrayRef<SMLoc> loc)
-    : def(record) {
+Builder::Builder(const Record *record, ArrayRef<SMLoc> loc) : def(record) {
   // Initialize the parameters of the builder.
-  const llvm::DagInit *dag = def->getValueAsDag("dagParams");
-  auto *defInit = dyn_cast<llvm::DefInit>(dag->getOperator());
+  const DagInit *dag = def->getValueAsDag("dagParams");
+  auto *defInit = dyn_cast<DefInit>(dag->getOperator());
   if (!defInit || defInit->getDef()->getName() != "ins")
     PrintFatalError(def->getLoc(), "expected 'ins' in builders");
 
   bool seenDefaultValue = false;
   for (unsigned i = 0, e = dag->getNumArgs(); i < e; ++i) {
-    const llvm::StringInit *paramName = dag->getArgName(i);
-    const llvm::Init *paramValue = dag->getArg(i);
+    const StringInit *paramName = dag->getArgName(i);
+    const Init *paramValue = dag->getArg(i);
     Parameter param(paramName ? paramName->getValue()
                               : std::optional<StringRef>(),
                     paramValue);
diff --git a/mlir/lib/TableGen/CodeGenHelpers.cpp b/mlir/lib/TableGen/CodeGenHelpers.cpp
index 2f13887aa0bb..747af1ce5a4d 100644
--- a/mlir/lib/TableGen/CodeGenHelpers.cpp
+++ b/mlir/lib/TableGen/CodeGenHelpers.cpp
@@ -24,32 +24,32 @@ using namespace mlir::tblgen;
 
 /// Generate a unique label based on the current file name to prevent name
 /// collisions if multiple generated files are included at once.
-static std::string getUniqueOutputLabel(const llvm::RecordKeeper &records,
+static std::string getUniqueOutputLabel(const RecordKeeper &records,
                                         StringRef tag) {
   // Use the input file name when generating a unique name.
   std::string inputFilename = records.getInputFilename();
 
   // Drop all but the base filename.
-  StringRef nameRef = llvm::sys::path::filename(inputFilename);
+  StringRef nameRef = sys::path::filename(inputFilename);
   nameRef.consume_back(".td");
 
   // Sanitize any invalid characters.
   std::string uniqueName(tag);
   for (char c : nameRef) {
-    if (llvm::isAlnum(c) || c == '_')
+    if (isAlnum(c) || c == '_')
       uniqueName.push_back(c);
     else
-      uniqueName.append(llvm::utohexstr((unsigned char)c));
+      uniqueName.append(utohexstr((unsigned char)c));
   }
   return uniqueName;
 }
 
 StaticVerifierFunctionEmitter::StaticVerifierFunctionEmitter(
-    raw_ostream &os, const llvm::RecordKeeper &records, StringRef tag)
+    raw_ostream &os, const RecordKeeper &records, StringRef tag)
     : os(os), uniqueOutputLabel(getUniqueOutputLabel(records, tag)) {}
 
 void StaticVerifierFunctionEmitter::emitOpConstraints(
-    ArrayRef<const llvm::Record *> opDefs) {
+    ArrayRef<const Record *> opDefs) {
   NamespaceEmitter namespaceEmitter(os, Operator(*opDefs[0]).getCppNamespace());
   emitTypeConstraints();
   emitAttrConstraints();
@@ -58,7 +58,7 @@ void StaticVerifierFunctionEmitter::emitOpConstraints(
 }
 
 void StaticVerifierFunctionEmitter::emitPatternConstraints(
-    const llvm::ArrayRef<DagLeaf> constraints) {
+    const ArrayRef<DagLeaf> constraints) {
   collectPatternConstraints(constraints);
   emitPatternConstraints();
 }
@@ -298,7 +298,7 @@ void StaticVerifierFunctionEmitter::collectOpConstraints(
 }
 
 void StaticVerifierFunctionEmitter::collectPatternConstraints(
-    const llvm::ArrayRef<DagLeaf> constraints) {
+    const ArrayRef<DagLeaf> constraints) {
   for (auto &leaf : constraints) {
     assert(leaf.isOperandMatcher() || leaf.isAttrMatcher());
     collectConstraint(
@@ -313,7 +313,7 @@ void StaticVerifierFunctionEmitter::collectPatternConstraints(
 
 std::string mlir::tblgen::escapeString(StringRef value) {
   std::string ret;
-  llvm::raw_string_ostream os(ret);
+  raw_string_ostream os(ret);
   os.write_escaped(value);
   return ret;
 }
diff --git a/mlir/lib/TableGen/Interfaces.cpp b/mlir/lib/TableGen/Interfaces.cpp
index 4a6709a43d0a..dc9a74c4e8a9 100644
--- a/mlir/lib/TableGen/Interfaces.cpp
+++ b/mlir/lib/TableGen/Interfaces.cpp
@@ -16,17 +16,22 @@
 
 using namespace mlir;
 using namespace mlir::tblgen;
+using llvm::DagInit;
+using llvm::DefInit;
+using llvm::Init;
+using llvm::ListInit;
+using llvm::Record;
+using llvm::StringInit;
 
 //===----------------------------------------------------------------------===//
 // InterfaceMethod
 //===----------------------------------------------------------------------===//
 
-InterfaceMethod::InterfaceMethod(const llvm::Record *def) : def(def) {
-  const llvm::DagInit *args = def->getValueAsDag("arguments");
+InterfaceMethod::InterfaceMethod(const Record *def) : def(def) {
+  const DagInit *args = def->getValueAsDag("arguments");
   for (unsigned i = 0, e = args->getNumArgs(); i != e; ++i) {
-    arguments.push_back(
-        {llvm::cast<llvm::StringInit>(args->getArg(i))->getValue(),
-         args->getArgNameStr(i)});
+    arguments.push_back({cast<StringInit>(args->getArg(i))->getValue(),
+                         args->getArgNameStr(i)});
   }
 }
 
@@ -72,18 +77,17 @@ bool InterfaceMethod::arg_empty() const { return arguments.empty(); }
 // Interface
 //===----------------------------------------------------------------------===//
 
-Interface::Interface(const llvm::Record *def) : def(def) {
+Interface::Interface(const Record *def) : def(def) {
   assert(def->isSubClassOf("Interface") &&
          "must be subclass of TableGen 'Interface' class");
 
   // Initialize the interface methods.
-  auto *listInit = dyn_cast<llvm::ListInit>(def->getValueInit("methods"));
-  for (const llvm::Init *init : listInit->getValues())
-    methods.emplace_back(cast<llvm::DefInit>(init)->getDef());
+  auto *listInit = dyn_cast<ListInit>(def->getValueInit("methods"));
+  for (const Init *init : listInit->getValues())
+    methods.emplace_back(cast<DefInit>(init)->getDef());
 
   // Initialize the interface base classes.
-  auto *basesInit =
-      dyn_cast<llvm::ListInit>(def->getValueInit("baseInterfaces"));
+  auto *basesInit = dyn_cast<ListInit>(def->getValueInit("baseInterfaces"));
   // Chained inheritance will produce duplicates in the base interface set.
   StringSet<> basesAdded;
   llvm::unique_function<void(Interface)> addBaseInterfaceFn =
@@ -98,8 +102,8 @@ Interface::Interface(const llvm::Record *def) : def(def) {
         baseInterfaces.push_back(std::make_unique<Interface>(baseInterface));
         basesAdded.insert(baseInterface.getName());
       };
-  for (const llvm::Init *init : basesInit->getValues())
-    addBaseInterfaceFn(Interface(cast<llvm::DefInit>(init)->getDef()));
+  for (const Init *init : basesInit->getValues())
+    addBaseInterfaceFn(Interface(cast<DefInit>(init)->getDef()));
 }
 
 // Return the name of this interface.
diff --git a/mlir/lib/TableGen/Operator.cpp b/mlir/lib/TableGen/Operator.cpp
index 86670e9f8712..904cc6637d53 100644
--- a/mlir/lib/TableGen/Operator.cpp
+++ b/mlir/lib/TableGen/Operator.cpp
@@ -35,9 +35,12 @@ using namespace mlir::tblgen;
 
 using llvm::DagInit;
 using llvm::DefInit;
+using llvm::Init;
+using llvm::ListInit;
 using llvm::Record;
+using llvm::StringInit;
 
-Operator::Operator(const llvm::Record &def)
+Operator::Operator(const Record &def)
     : dialect(def.getValueAsDef("opDialect")), def(def) {
   // The first `_` in the op's TableGen def name is treated as separating the
   // dialect prefix and the op class name. The dialect prefix will be ignored if
@@ -179,7 +182,7 @@ StringRef Operator::getExtraClassDefinition() const {
   return def.getValueAsString(attr);
 }
 
-const llvm::Record &Operator::getDef() const { return def; }
+const Record &Operator::getDef() const { return def; }
 
 bool Operator::skipDefaultBuilders() const {
   return def.getValueAsBit("skipDefaultBuilders");
@@ -429,7 +432,7 @@ void Operator::populateTypeInferenceInfo(
   // Use `AllTypesMatch` and `TypesMatchWith` operation traits to build the
   // result type inference graph.
   for (const Trait &trait : traits) {
-    const llvm::Record &def = trait.getDef();
+    const Record &def = trait.getDef();
 
     // If the infer type op interface was manually added, then treat it as
     // intention that the op needs special handling.
@@ -614,9 +617,8 @@ void Operator::populateOpStructure() {
             def.getLoc(),
             "unsupported attribute modelling, only single class expected");
       }
-      attributes.push_back(
-          {cast<llvm::StringInit>(val.getNameInit())->getValue(),
-           Attribute(cast<DefInit>(val.getValue()))});
+      attributes.push_back({cast<StringInit>(val.getNameInit())->getValue(),
+                            Attribute(cast<DefInit>(val.getValue()))});
     }
   }
 
@@ -701,7 +703,7 @@ void Operator::populateOpStructure() {
   // tablegen is easy, making them unique less so, so dedupe here.
   if (auto *traitList = def.getValueAsListInit("traits")) {
     // This is uniquing based on pointers of the trait.
-    SmallPtrSet<const llvm::Init *, 32> traitSet;
+    SmallPtrSet<const Init *, 32> traitSet;
     traits.reserve(traitSet.size());
 
     // The declaration order of traits imply the verification order of traits.
@@ -721,8 +723,8 @@ void Operator::populateOpStructure() {
                   " to precede it in traits list");
     };
 
-    std::function<void(const llvm::ListInit *)> insert;
-    insert = [&](const llvm::ListInit *traitList) {
+    std::function<void(const ListInit *)> insert;
+    insert = [&](const ListInit *traitList) {
       for (auto *traitInit : *traitList) {
         auto *def = cast<DefInit>(traitInit)->getDef();
         if (def->isSubClassOf("TraitList")) {
@@ -777,11 +779,10 @@ void Operator::populateOpStructure() {
   }
 
   // Populate the builders.
-  auto *builderList =
-      dyn_cast_or_null<llvm::ListInit>(def.getValueInit("builders"));
+  auto *builderList = dyn_cast_or_null<ListInit>(def.getValueInit("builders"));
   if (builderList && !builderList->empty()) {
-    for (const llvm::Init *init : builderList->getValues())
-      builders.emplace_back(cast<llvm::DefInit>(init)->getDef(), def.getLoc());
+    for (const Init *init : builderList->getValues())
+      builders.emplace_back(cast<DefInit>(init)->getDef(), def.getLoc());
   } else if (skipDefaultBuilders()) {
     PrintFatalError(
         def.getLoc(),
@@ -814,13 +815,12 @@ StringRef Operator::getSummary() const {
 
 bool Operator::hasAssemblyFormat() const {
   auto *valueInit = def.getValueInit("assemblyFormat");
-  return isa<llvm::StringInit>(valueInit);
+  return isa<StringInit>(valueInit);
 }
 
 StringRef Operator::getAssemblyFormat() const {
-  return TypeSwitch<const llvm::Init *, StringRef>(
-             def.getValueInit("assemblyFormat"))
-      .Case<llvm::StringInit>([&](auto *init) { return init->getValue(); });
+  return TypeSwitch<const Init *, StringRef>(def.getValueInit("assemblyFormat"))
+      .Case<StringInit>([&](auto *init) { return init->getValue(); });
 }
 
 void Operator::print(llvm::raw_ostream &os) const {
@@ -833,9 +833,9 @@ void Operator::print(llvm::raw_ostream &os) const {
   }
 }
 
-auto Operator::VariableDecoratorIterator::unwrap(const llvm::Init *init)
+auto Operator::VariableDecoratorIterator::unwrap(const Init *init)
     -> VariableDecorator {
-  return VariableDecorator(cast<llvm::DefInit>(init)->getDef());
+  return VariableDecorator(cast<DefInit>(init)->getDef());
 }
 
 auto Operator::getArgToOperandOrAttribute(int index) const
diff --git a/mlir/lib/TableGen/Pattern.cpp b/mlir/lib/TableGen/Pattern.cpp
index bee20354387f..ffa0c067b028 100644
--- a/mlir/lib/TableGen/Pattern.cpp
+++ b/mlir/lib/TableGen/Pattern.cpp
@@ -26,7 +26,12 @@
 using namespace mlir;
 using namespace tblgen;
 
+using llvm::DagInit;
+using llvm::dbgs;
+using llvm::DefInit;
 using llvm::formatv;
+using llvm::IntInit;
+using llvm::Record;
 
 //===----------------------------------------------------------------------===//
 // DagLeaf
@@ -61,31 +66,31 @@ bool DagLeaf::isStringAttr() const { return isa<llvm::StringInit>(def); }
 Constraint DagLeaf::getAsConstraint() const {
   assert((isOperandMatcher() || isAttrMatcher()) &&
          "the DAG leaf must be operand or attribute");
-  return Constraint(cast<llvm::DefInit>(def)->getDef());
+  return Constraint(cast<DefInit>(def)->getDef());
 }
 
 ConstantAttr DagLeaf::getAsConstantAttr() const {
   assert(isConstantAttr() && "the DAG leaf must be constant attribute");
-  return ConstantAttr(cast<llvm::DefInit>(def));
+  return ConstantAttr(cast<DefInit>(def));
 }
 
 EnumAttrCase DagLeaf::getAsEnumAttrCase() const {
   assert(isEnumAttrCase() && "the DAG leaf must be an enum attribute case");
-  return EnumAttrCase(cast<llvm::DefInit>(def));
+  return EnumAttrCase(cast<DefInit>(def));
 }
 
 std::string DagLeaf::getConditionTemplate() const {
   return getAsConstraint().getConditionTemplate();
 }
 
-llvm::StringRef DagLeaf::getNativeCodeTemplate() const {
+StringRef DagLeaf::getNativeCodeTemplate() const {
   assert(isNativeCodeCall() && "the DAG leaf must be NativeCodeCall");
-  return cast<llvm::DefInit>(def)->getDef()->getValueAsString("expression");
+  return cast<DefInit>(def)->getDef()->getValueAsString("expression");
 }
 
 int DagLeaf::getNumReturnsOfNativeCode() const {
   assert(isNativeCodeCall() && "the DAG leaf must be NativeCodeCall");
-  return cast<llvm::DefInit>(def)->getDef()->getValueAsInt("numReturns");
+  return cast<DefInit>(def)->getDef()->getValueAsInt("numReturns");
 }
 
 std::string DagLeaf::getStringAttr() const {
@@ -93,7 +98,7 @@ std::string DagLeaf::getStringAttr() const {
   return def->getAsUnquotedString();
 }
 bool DagLeaf::isSubClassOf(StringRef superclass) const {
-  if (auto *defInit = dyn_cast_or_null<llvm::DefInit>(def))
+  if (auto *defInit = dyn_cast_or_null<DefInit>(def))
     return defInit->getDef()->isSubClassOf(superclass);
   return false;
 }
@@ -108,7 +113,7 @@ void DagLeaf::print(raw_ostream &os) const {
 //===----------------------------------------------------------------------===//
 
 bool DagNode::isNativeCodeCall() const {
-  if (auto *defInit = dyn_cast_or_null<llvm::DefInit>(node->getOperator()))
+  if (auto *defInit = dyn_cast_or_null<DefInit>(node->getOperator()))
     return defInit->getDef()->isSubClassOf("NativeCodeCall");
   return false;
 }
@@ -119,25 +124,24 @@ bool DagNode::isOperation() const {
          !isVariadic();
 }
 
-llvm::StringRef DagNode::getNativeCodeTemplate() const {
+StringRef DagNode::getNativeCodeTemplate() const {
   assert(isNativeCodeCall() && "the DAG leaf must be NativeCodeCall");
-  return cast<llvm::DefInit>(node->getOperator())
+  return cast<DefInit>(node->getOperator())
       ->getDef()
       ->getValueAsString("expression");
 }
 
 int DagNode::getNumReturnsOfNativeCode() const {
   assert(isNativeCodeCall() && "the DAG leaf must be NativeCodeCall");
-  return cast<llvm::DefInit>(node->getOperator())
+  return cast<DefInit>(node->getOperator())
       ->getDef()
       ->getValueAsInt("numReturns");
 }
 
-llvm::StringRef DagNode::getSymbol() const { return node->getNameStr(); }
+StringRef DagNode::getSymbol() const { return node->getNameStr(); }
 
 Operator &DagNode::getDialectOp(RecordOperatorMap *mapper) const {
-  const llvm::Record *opDef =
-      cast<llvm::DefInit>(node->getOperator())->getDef();
+  const Record *opDef = cast<DefInit>(node->getOperator())->getDef();
   auto [it, inserted] = mapper->try_emplace(opDef);
   if (inserted)
     it->second = std::make_unique<Operator>(opDef);
@@ -158,11 +162,11 @@ int DagNode::getNumOps() const {
 int DagNode::getNumArgs() const { return node->getNumArgs(); }
 
 bool DagNode::isNestedDagArg(unsigned index) const {
-  return isa<llvm::DagInit>(node->getArg(index));
+  return isa<DagInit>(node->getArg(index));
 }
 
 DagNode DagNode::getArgAsNestedDag(unsigned index) const {
-  return DagNode(dyn_cast_or_null<llvm::DagInit>(node->getArg(index)));
+  return DagNode(dyn_cast_or_null<DagInit>(node->getArg(index)));
 }
 
 DagLeaf DagNode::getArgAsLeaf(unsigned index) const {
@@ -175,27 +179,27 @@ StringRef DagNode::getArgName(unsigned index) const {
 }
 
 bool DagNode::isReplaceWithValue() const {
-  auto *dagOpDef = cast<llvm::DefInit>(node->getOperator())->getDef();
+  auto *dagOpDef = cast<DefInit>(node->getOperator())->getDef();
   return dagOpDef->getName() == "replaceWithValue";
 }
 
 bool DagNode::isLocationDirective() const {
-  auto *dagOpDef = cast<llvm::DefInit>(node->getOperator())->getDef();
+  auto *dagOpDef = cast<DefInit>(node->getOperator())->getDef();
   return dagOpDef->getName() == "location";
 }
 
 bool DagNode::isReturnTypeDirective() const {
-  auto *dagOpDef = cast<llvm::DefInit>(node->getOperator())->getDef();
+  auto *dagOpDef = cast<DefInit>(node->getOperator())->getDef();
   return dagOpDef->getName() == "returnType";
 }
 
 bool DagNode::isEither() const {
-  auto *dagOpDef = cast<llvm::DefInit>(node->getOperator())->getDef();
+  auto *dagOpDef = cast<DefInit>(node->getOperator())->getDef();
   return dagOpDef->getName() == "either";
 }
 
 bool DagNode::isVariadic() const {
-  auto *dagOpDef = cast<llvm::DefInit>(node->getOperator())->getDef();
+  auto *dagOpDef = cast<DefInit>(node->getOperator())->getDef();
   return dagOpDef->getName() == "variadic";
 }
 
@@ -246,7 +250,7 @@ std::string SymbolInfoMap::SymbolInfo::getVarName(StringRef name) const {
 }
 
 std::string SymbolInfoMap::SymbolInfo::getVarTypeStr(StringRef name) const {
-  LLVM_DEBUG(llvm::dbgs() << "getVarTypeStr for '" << name << "': ");
+  LLVM_DEBUG(dbgs() << "getVarTypeStr for '" << name << "': ");
   switch (kind) {
   case Kind::Attr: {
     if (op)
@@ -277,26 +281,26 @@ std::string SymbolInfoMap::SymbolInfo::getVarTypeStr(StringRef name) const {
 }
 
 std::string SymbolInfoMap::SymbolInfo::getVarDecl(StringRef name) const {
-  LLVM_DEBUG(llvm::dbgs() << "getVarDecl for '" << name << "': ");
+  LLVM_DEBUG(dbgs() << "getVarDecl for '" << name << "': ");
   std::string varInit = kind == Kind::Operand ? "(op0->getOperands())" : "";
   return std::string(
       formatv("{0} {1}{2};\n", getVarTypeStr(name), getVarName(name), varInit));
 }
 
 std::string SymbolInfoMap::SymbolInfo::getArgDecl(StringRef name) const {
-  LLVM_DEBUG(llvm::dbgs() << "getArgDecl for '" << name << "': ");
+  LLVM_DEBUG(dbgs() << "getArgDecl for '" << name << "': ");
   return std::string(
       formatv("{0} &{1}", getVarTypeStr(name), getVarName(name)));
 }
 
 std::string SymbolInfoMap::SymbolInfo::getValueAndRangeUse(
     StringRef name, int index, const char *fmt, const char *separator) const {
-  LLVM_DEBUG(llvm::dbgs() << "getValueAndRangeUse for '" << name << "': ");
+  LLVM_DEBUG(dbgs() << "getValueAndRangeUse for '" << name << "': ");
   switch (kind) {
   case Kind::Attr: {
     assert(index < 0);
     auto repl = formatv(fmt, name);
-    LLVM_DEBUG(llvm::dbgs() << repl << " (Attr)\n");
+    LLVM_DEBUG(dbgs() << repl << " (Attr)\n");
     return std::string(repl);
   }
   case Kind::Operand: {
@@ -307,11 +311,11 @@ std::string SymbolInfoMap::SymbolInfo::getValueAndRangeUse(
     // the value itself.
     if (operand->isVariableLength() && !getVariadicSubIndex().has_value()) {
       auto repl = formatv(fmt, name);
-      LLVM_DEBUG(llvm::dbgs() << repl << " (VariadicOperand)\n");
+      LLVM_DEBUG(dbgs() << repl << " (VariadicOperand)\n");
       return std::string(repl);
     }
     auto repl = formatv(fmt, formatv("(*{0}.begin())", name));
-    LLVM_DEBUG(llvm::dbgs() << repl << " (SingleOperand)\n");
+    LLVM_DEBUG(dbgs() << repl << " (SingleOperand)\n");
     return std::string(repl);
   }
   case Kind::Result: {
@@ -323,14 +327,14 @@ std::string SymbolInfoMap::SymbolInfo::getValueAndRangeUse(
       if (!op->getResult(index).isVariadic())
         v = std::string(formatv("(*{0}.begin())", v));
       auto repl = formatv(fmt, v);
-      LLVM_DEBUG(llvm::dbgs() << repl << " (SingleResult)\n");
+      LLVM_DEBUG(dbgs() << repl << " (SingleResult)\n");
       return std::string(repl);
     }
 
     // If this op has no result at all but still we bind a symbol to it, it
     // means we want to capture the op itself.
     if (op->getNumResults() == 0) {
-      LLVM_DEBUG(llvm::dbgs() << name << " (Op)\n");
+      LLVM_DEBUG(dbgs() << name << " (Op)\n");
       return formatv(fmt, name);
     }
 
@@ -347,14 +351,14 @@ std::string SymbolInfoMap::SymbolInfo::getValueAndRangeUse(
       values.push_back(std::string(formatv(fmt, v)));
     }
     auto repl = llvm::join(values, separator);
-    LLVM_DEBUG(llvm::dbgs() << repl << " (VariadicResult)\n");
+    LLVM_DEBUG(dbgs() << repl << " (VariadicResult)\n");
     return repl;
   }
   case Kind::Value: {
     assert(index < 0);
     assert(op == nullptr);
     auto repl = formatv(fmt, name);
-    LLVM_DEBUG(llvm::dbgs() << repl << " (Value)\n");
+    LLVM_DEBUG(dbgs() << repl << " (Value)\n");
     return std::string(repl);
   }
   case Kind::MultipleValues: {
@@ -363,13 +367,13 @@ std::string SymbolInfoMap::SymbolInfo::getValueAndRangeUse(
     if (index >= 0) {
       std::string repl =
           formatv(fmt, std::string(formatv("{0}[{1}]", name, index)));
-      LLVM_DEBUG(llvm::dbgs() << repl << " (MultipleValues)\n");
+      LLVM_DEBUG(dbgs() << repl << " (MultipleValues)\n");
       return repl;
     }
     // If it doesn't specify certain element, unpack them all.
     auto repl =
         formatv(fmt, std::string(formatv("{0}.begin(), {0}.end()", name)));
-    LLVM_DEBUG(llvm::dbgs() << repl << " (MultipleValues)\n");
+    LLVM_DEBUG(dbgs() << repl << " (MultipleValues)\n");
     return std::string(repl);
   }
   }
@@ -378,19 +382,19 @@ std::string SymbolInfoMap::SymbolInfo::getValueAndRangeUse(
 
 std::string SymbolInfoMap::SymbolInfo::getAllRangeUse(
     StringRef name, int index, const char *fmt, const char *separator) const {
-  LLVM_DEBUG(llvm::dbgs() << "getAllRangeUse for '" << name << "': ");
+  LLVM_DEBUG(dbgs() << "getAllRangeUse for '" << name << "': ");
   switch (kind) {
   case Kind::Attr:
   case Kind::Operand: {
     assert(index < 0 && "only allowed for symbol bound to result");
     auto repl = formatv(fmt, name);
-    LLVM_DEBUG(llvm::dbgs() << repl << " (Operand/Attr)\n");
+    LLVM_DEBUG(dbgs() << repl << " (Operand/Attr)\n");
     return std::string(repl);
   }
   case Kind::Result: {
     if (index >= 0) {
       auto repl = formatv(fmt, formatv("{0}.getODSResults({1})", name, index));
-      LLVM_DEBUG(llvm::dbgs() << repl << " (SingleResult)\n");
+      LLVM_DEBUG(dbgs() << repl << " (SingleResult)\n");
       return std::string(repl);
     }
 
@@ -404,14 +408,14 @@ std::string SymbolInfoMap::SymbolInfo::getAllRangeUse(
           formatv(fmt, formatv("{0}.getODSResults({1})", name, i))));
     }
     auto repl = llvm::join(values, separator);
-    LLVM_DEBUG(llvm::dbgs() << repl << " (VariadicResult)\n");
+    LLVM_DEBUG(dbgs() << repl << " (VariadicResult)\n");
     return repl;
   }
   case Kind::Value: {
     assert(index < 0 && "only allowed for symbol bound to result");
     assert(op == nullptr);
     auto repl = formatv(fmt, formatv("{{{0}}", name));
-    LLVM_DEBUG(llvm::dbgs() << repl << " (Value)\n");
+    LLVM_DEBUG(dbgs() << repl << " (Value)\n");
     return std::string(repl);
   }
   case Kind::MultipleValues: {
@@ -420,12 +424,12 @@ std::string SymbolInfoMap::SymbolInfo::getAllRangeUse(
     if (index >= 0) {
       std::string repl =
           formatv(fmt, std::string(formatv("{0}[{1}]", name, index)));
-      LLVM_DEBUG(llvm::dbgs() << repl << " (MultipleValues)\n");
+      LLVM_DEBUG(dbgs() << repl << " (MultipleValues)\n");
       return repl;
     }
     auto repl =
         formatv(fmt, std::string(formatv("{0}.begin(), {0}.end()", name)));
-    LLVM_DEBUG(llvm::dbgs() << repl << " (MultipleValues)\n");
+    LLVM_DEBUG(dbgs() << repl << " (MultipleValues)\n");
     return std::string(repl);
   }
   }
@@ -614,7 +618,7 @@ void SymbolInfoMap::assignUniqueAlternativeNames() {
 // Pattern
 //==----------------------------------------------------------------------===//
 
-Pattern::Pattern(const llvm::Record *def, RecordOperatorMap *mapper)
+Pattern::Pattern(const Record *def, RecordOperatorMap *mapper)
     : def(*def), recordOpMap(mapper) {}
 
 DagNode Pattern::getSourcePattern() const {
@@ -628,26 +632,26 @@ int Pattern::getNumResultPatterns() const {
 
 DagNode Pattern::getResultPattern(unsigned index) const {
   auto *results = def.getValueAsListInit("resultPatterns");
-  return DagNode(cast<llvm::DagInit>(results->getElement(index)));
+  return DagNode(cast<DagInit>(results->getElement(index)));
 }
 
 void Pattern::collectSourcePatternBoundSymbols(SymbolInfoMap &infoMap) {
-  LLVM_DEBUG(llvm::dbgs() << "start collecting source pattern bound symbols\n");
+  LLVM_DEBUG(dbgs() << "start collecting source pattern bound symbols\n");
   collectBoundSymbols(getSourcePattern(), infoMap, /*isSrcPattern=*/true);
-  LLVM_DEBUG(llvm::dbgs() << "done collecting source pattern bound symbols\n");
+  LLVM_DEBUG(dbgs() << "done collecting source pattern bound symbols\n");
 
-  LLVM_DEBUG(llvm::dbgs() << "start assigning alternative names for symbols\n");
+  LLVM_DEBUG(dbgs() << "start assigning alternative names for symbols\n");
   infoMap.assignUniqueAlternativeNames();
-  LLVM_DEBUG(llvm::dbgs() << "done assigning alternative names for symbols\n");
+  LLVM_DEBUG(dbgs() << "done assigning alternative names for symbols\n");
 }
 
 void Pattern::collectResultPatternBoundSymbols(SymbolInfoMap &infoMap) {
-  LLVM_DEBUG(llvm::dbgs() << "start collecting result pattern bound symbols\n");
+  LLVM_DEBUG(dbgs() << "start collecting result pattern bound symbols\n");
   for (int i = 0, e = getNumResultPatterns(); i < e; ++i) {
     auto pattern = getResultPattern(i);
     collectBoundSymbols(pattern, infoMap, /*isSrcPattern=*/false);
   }
-  LLVM_DEBUG(llvm::dbgs() << "done collecting result pattern bound symbols\n");
+  LLVM_DEBUG(dbgs() << "done collecting result pattern bound symbols\n");
 }
 
 const Operator &Pattern::getSourceRootOp() {
@@ -664,7 +668,7 @@ std::vector<AppliedConstraint> Pattern::getConstraints() const {
   ret.reserve(listInit->size());
 
   for (auto *it : *listInit) {
-    auto *dagInit = dyn_cast<llvm::DagInit>(it);
+    auto *dagInit = dyn_cast<DagInit>(it);
     if (!dagInit)
       PrintFatalError(&def, "all elements in Pattern multi-entity "
                             "constraints should be DAG nodes");
@@ -680,7 +684,7 @@ std::vector<AppliedConstraint> Pattern::getConstraints() const {
       entities.emplace_back(argName->getValue());
     }
 
-    ret.emplace_back(cast<llvm::DefInit>(dagInit->getOperator())->getDef(),
+    ret.emplace_back(cast<DefInit>(dagInit->getOperator())->getDef(),
                      dagInit->getNameStr(), std::move(entities));
   }
   return ret;
@@ -693,19 +697,19 @@ int Pattern::getNumSupplementalPatterns() const {
 
 DagNode Pattern::getSupplementalPattern(unsigned index) const {
   auto *results = def.getValueAsListInit("supplementalPatterns");
-  return DagNode(cast<llvm::DagInit>(results->getElement(index)));
+  return DagNode(cast<DagInit>(results->getElement(index)));
 }
 
 int Pattern::getBenefit() const {
   // The initial benefit value is a heuristic with number of ops in the source
   // pattern.
   int initBenefit = getSourcePattern().getNumOps();
-  const llvm::DagInit *delta = def.getValueAsDag("benefitDelta");
-  if (delta->getNumArgs() != 1 || !isa<llvm::IntInit>(delta->getArg(0))) {
+  const DagInit *delta = def.getValueAsDag("benefitDelta");
+  if (delta->getNumArgs() != 1 || !isa<IntInit>(delta->getArg(0))) {
     PrintFatalError(&def,
                     "The 'addBenefit' takes and only takes one integer value");
   }
-  return initBenefit + dyn_cast<llvm::IntInit>(delta->getArg(0))->getValue();
+  return initBenefit + dyn_cast<IntInit>(delta->getArg(0))->getValue();
 }
 
 std::vector<Pattern::IdentifierLine> Pattern::getLocation() const {
@@ -736,8 +740,8 @@ void Pattern::collectBoundSymbols(DagNode tree, SymbolInfoMap &infoMap,
   if (tree.isNativeCodeCall()) {
     if (!treeName.empty()) {
       if (!isSrcPattern) {
-        LLVM_DEBUG(llvm::dbgs() << "found symbol bound to NativeCodeCall: "
-                                << treeName << '\n');
+        LLVM_DEBUG(dbgs() << "found symbol bound to NativeCodeCall: "
+                          << treeName << '\n');
         verifyBind(
             infoMap.bindValues(treeName, tree.getNumReturnsOfNativeCode()),
             treeName);
@@ -820,8 +824,8 @@ void Pattern::collectBoundSymbols(DagNode tree, SymbolInfoMap &infoMap,
     // The name attached to the DAG node's operator is for representing the
     // results generated from this op. It should be remembered as bound results.
     if (!treeName.empty()) {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "found symbol bound to op result: " << treeName << '\n');
+      LLVM_DEBUG(dbgs() << "found symbol bound to op result: " << treeName
+                        << '\n');
       verifyBind(infoMap.bindOpResult(treeName, op), treeName);
     }
 
@@ -896,8 +900,8 @@ void Pattern::collectBoundSymbols(DagNode tree, SymbolInfoMap &infoMap,
         auto treeArgName = tree.getArgName(i);
         // `$_` is a special symbol meaning ignore the current argument.
         if (!treeArgName.empty() && treeArgName != "_") {
-          LLVM_DEBUG(llvm::dbgs() << "found symbol bound to op argument: "
-                                  << treeArgName << '\n');
+          LLVM_DEBUG(dbgs() << "found symbol bound to op argument: "
+                            << treeArgName << '\n');
           verifyBind(infoMap.bindOpArgument(tree, treeArgName, op, opArgIdx),
                      treeArgName);
         }
diff --git a/mlir/lib/TableGen/Predicate.cpp b/mlir/lib/TableGen/Predicate.cpp
index 0e38dab8491c..f71dd0bd35f8 100644
--- a/mlir/lib/TableGen/Predicate.cpp
+++ b/mlir/lib/TableGen/Predicate.cpp
@@ -20,15 +20,18 @@
 
 using namespace mlir;
 using namespace tblgen;
+using llvm::Init;
+using llvm::Record;
+using llvm::SpecificBumpPtrAllocator;
 
 // Construct a Predicate from a record.
-Pred::Pred(const llvm::Record *record) : def(record) {
+Pred::Pred(const Record *record) : def(record) {
   assert(def->isSubClassOf("Pred") &&
          "must be a subclass of TableGen 'Pred' class");
 }
 
 // Construct a Predicate from an initializer.
-Pred::Pred(const llvm::Init *init) {
+Pred::Pred(const Init *init) {
   if (const auto *defInit = dyn_cast_or_null<llvm::DefInit>(init))
     def = defInit->getDef();
 }
@@ -48,12 +51,12 @@ bool Pred::isCombined() const {
 
 ArrayRef<SMLoc> Pred::getLoc() const { return def->getLoc(); }
 
-CPred::CPred(const llvm::Record *record) : Pred(record) {
+CPred::CPred(const Record *record) : Pred(record) {
   assert(def->isSubClassOf("CPred") &&
          "must be a subclass of Tablegen 'CPred' class");
 }
 
-CPred::CPred(const llvm::Init *init) : Pred(init) {
+CPred::CPred(const Init *init) : Pred(init) {
   assert((!def || def->isSubClassOf("CPred")) &&
          "must be a subclass of Tablegen 'CPred' class");
 }
@@ -64,22 +67,22 @@ std::string CPred::getConditionImpl() const {
   return std::string(def->getValueAsString("predExpr"));
 }
 
-CombinedPred::CombinedPred(const llvm::Record *record) : Pred(record) {
+CombinedPred::CombinedPred(const Record *record) : Pred(record) {
   assert(def->isSubClassOf("CombinedPred") &&
          "must be a subclass of Tablegen 'CombinedPred' class");
 }
 
-CombinedPred::CombinedPred(const llvm::Init *init) : Pred(init) {
+CombinedPred::CombinedPred(const Init *init) : Pred(init) {
   assert((!def || def->isSubClassOf("CombinedPred")) &&
          "must be a subclass of Tablegen 'CombinedPred' class");
 }
 
-const llvm::Record *CombinedPred::getCombinerDef() const {
+const Record *CombinedPred::getCombinerDef() const {
   assert(def->getValue("kind") && "CombinedPred must have a value 'kind'");
   return def->getValueAsDef("kind");
 }
 
-std::vector<const llvm::Record *> CombinedPred::getChildren() const {
+std::vector<const Record *> CombinedPred::getChildren() const {
   assert(def->getValue("children") &&
          "CombinedPred must have a value 'children'");
   return def->getValueAsListOfDefs("children");
@@ -156,7 +159,7 @@ static void performSubstitutions(std::string &str,
 // All nodes are created within "allocator".
 static PredNode *
 buildPredicateTree(const Pred &root,
-                   llvm::SpecificBumpPtrAllocator<PredNode> &allocator,
+                   SpecificBumpPtrAllocator<PredNode> &allocator,
                    ArrayRef<Subst> substitutions) {
   auto *rootNode = allocator.Allocate();
   new (rootNode) PredNode;
@@ -351,7 +354,7 @@ static std::string getCombinedCondition(const PredNode &root) {
 }
 
 std::string CombinedPred::getConditionImpl() const {
-  llvm::SpecificBumpPtrAllocator<PredNode> allocator;
+  SpecificBumpPtrAllocator<PredNode> allocator;
   auto *predicateTree = buildPredicateTree(*this, allocator, {});
   predicateTree =
       propagateGroundTruth(predicateTree,
diff --git a/mlir/lib/TableGen/Type.cpp b/mlir/lib/TableGen/Type.cpp
index c3b813ec598d..4f74056947ab 100644
--- a/mlir/lib/TableGen/Type.cpp
+++ b/mlir/lib/TableGen/Type.cpp
@@ -18,6 +18,7 @@
 
 using namespace mlir;
 using namespace mlir::tblgen;
+using llvm::Record;
 
 TypeConstraint::TypeConstraint(const llvm::DefInit *init)
     : TypeConstraint(init->getDef()) {}
@@ -42,7 +43,7 @@ StringRef TypeConstraint::getVariadicOfVariadicSegmentSizeAttr() const {
 // Returns the builder call for this constraint if this is a buildable type,
 // returns std::nullopt otherwise.
 std::optional<StringRef> TypeConstraint::getBuilderCall() const {
-  const llvm::Record *baseType = def;
+  const Record *baseType = def;
   if (isVariableLength())
     baseType = baseType->getValueAsDef("baseType");
 
@@ -64,7 +65,7 @@ StringRef TypeConstraint::getCppType() const {
   return def->getValueAsString("cppType");
 }
 
-Type::Type(const llvm::Record *record) : TypeConstraint(record) {}
+Type::Type(const Record *record) : TypeConstraint(record) {}
 
 Dialect Type::getDialect() const {
   return Dialect(def->getValueAsDef("dialect"));
diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp
index 1baddd881f6a..b8d0329906a8 100644
--- a/mlir/lib/Transforms/Utils/DialectConversion.cpp
+++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp
@@ -1045,9 +1045,8 @@ UnresolvedMaterializationRewrite::UnresolvedMaterializationRewrite(
     const TypeConverter *converter, MaterializationKind kind, Type originalType)
     : OperationRewrite(Kind::UnresolvedMaterialization, rewriterImpl, op),
       converterAndKind(converter, kind), originalType(originalType) {
-  assert(!originalType ||
-         kind == MaterializationKind::Target &&
-             "original type is valid only for target materializations");
+  assert((!originalType || kind == MaterializationKind::Target) &&
+         "original type is valid only for target materializations");
   rewriterImpl.unresolvedMaterializations[op] = this;
 }
 
@@ -1337,9 +1336,8 @@ Value ConversionPatternRewriterImpl::buildUnresolvedMaterialization(
     MaterializationKind kind, OpBuilder::InsertPoint ip, Location loc,
     ValueRange inputs, Type outputType, Type originalType,
     const TypeConverter *converter) {
-  assert(!originalType ||
-         kind == MaterializationKind::Target &&
-             "original type is valid only for target materializations");
+  assert((!originalType || kind == MaterializationKind::Target) &&
+         "original type is valid only for target materializations");
 
   // Avoid materializing an unnecessary cast.
   if (inputs.size() == 1 && inputs.front().getType() == outputType)
diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir
index 241b8a486c01..4e1035e038ca 100644
--- a/mlir/test/Dialect/Linalg/hoisting.mlir
+++ b/mlir/test/Dialect/Linalg/hoisting.mlir
@@ -308,6 +308,134 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+// CHECK-LABEL:  func.func @no_hoisting_unknown_bound_loop
+func.func @no_hoisting_unknown_bound_loop(%memref0: memref<20xi32>, %lb: index, %ub: index) {
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+
+  // %lb and %ub are unbounded, so do not hoist.
+  // CHECK:       scf.for {{.*}} {
+  // CHECK-NEXT:    vector.transfer_read
+  // CHECK-NEXT:    "test.some_use"
+  scf.for %arg2 = %lb to %ub step %c1 {
+    %read = vector.transfer_read %memref0[%c0], %c0_i32 {in_bounds = [true]} : memref<20xi32>, vector<4xi32>
+    "test.some_use"(%read) : (vector<4xi32>) ->()
+  }
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["func.func"]} in %arg1
+      : (!transform.any_op) -> !transform.any_op
+    transform.structured.hoist_redundant_vector_transfers %0 { verify_non_zero_trip }
+      : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+// CHECK-LABEL:  func.func @no_hoisting_possibly_zero_trip_loop
+func.func @no_hoisting_possibly_zero_trip_loop(%memref0: memref<20xi32>, %lb: index, %ub: index) {
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+
+  // %lb_0 is in range [%lb, 8], and %ub_0 is in range [4, %ub].
+  // Since %lb_0 could be greater than %ub_0, do not hoist.
+  %lb_0 = affine.min affine_map<(d0) -> (d0, 8)>(%lb)
+  %ub_0 = affine.max affine_map<(d0) -> (d0, 4)>(%ub)
+
+  // CHECK:       scf.for {{.*}} {
+  // CHECK-NEXT:    vector.transfer_read
+  // CHECK-NEXT:    "test.some_use"
+  scf.for %arg2 = %lb_0 to %ub_0 step %c1 {
+    %read = vector.transfer_read %memref0[%c0], %c0_i32 {in_bounds = [true]} : memref<20xi32>, vector<4xi32>
+    "test.some_use"(%read) : (vector<4xi32>) ->()
+  }
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["func.func"]} in %arg1
+      : (!transform.any_op) -> !transform.any_op
+    transform.structured.hoist_redundant_vector_transfers %0 { verify_non_zero_trip }
+      : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+// CHECK-LABEL:  func.func @no_hoisting_possibly_zero_trip_loop_eq_lb_and_ub
+func.func @no_hoisting_possibly_zero_trip_loop_eq_lb_and_ub(%memref0: memref<20xi32>, %lb: index, %ub: index) {
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+
+  // %lb_0 is in range [%lb, 8], and %ub_0 is in range [8, %ub].
+  // Since %lb_0 could be equal to %ub_0, do not hoist.
+  %lb_0 = affine.min affine_map<(d0) -> (d0, 8)>(%lb)
+  %ub_0 = affine.max affine_map<(d0) -> (d0, 8)>(%ub)
+
+  // CHECK:       scf.for {{.*}} {
+  // CHECK-NEXT:    vector.transfer_read
+  // CHECK-NEXT:    "test.some_use"
+  scf.for %arg2 = %lb_0 to %ub_0 step %c1 {
+    %read = vector.transfer_read %memref0[%c0], %c0_i32 {in_bounds = [true]} : memref<20xi32>, vector<4xi32>
+    "test.some_use"(%read) : (vector<4xi32>) ->()
+  }
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["func.func"]} in %arg1
+      : (!transform.any_op) -> !transform.any_op
+    transform.structured.hoist_redundant_vector_transfers %0 { verify_non_zero_trip }
+      : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+// CHECK-LABEL:  func.func @hoisting_non_zero_trip_loop
+func.func @hoisting_non_zero_trip_loop(%memref0: memref<20xi32>, %lb: index, %ub: index) {
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+
+  // %lb_0 is in range [%lb, 4], and %ub_0 is in range [8, %ub].
+  // Since %lb_0 is guaranteed to be less than %ub_0, hoisting is possible.
+  %lb_0 = affine.min affine_map<(d0) -> (d0, 4)>(%lb)
+  %ub_0 = affine.max affine_map<(d0) -> (d0, 8)>(%ub)
+
+  // CHECK:       vector.transfer_read
+  // CHECK:       scf.for {{.*}} {
+  // CHECK-NEXT:    "test.some_use"
+  scf.for %arg2 = %lb_0 to %ub_0 step %c1 {
+    %read = vector.transfer_read %memref0[%c0], %c0_i32 {in_bounds = [true]} : memref<20xi32>, vector<4xi32>
+    "test.some_use"(%read) : (vector<4xi32>) ->()
+  }
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["func.func"]} in %arg1
+      : (!transform.any_op) -> !transform.any_op
+    transform.structured.hoist_redundant_vector_transfers %0 { verify_non_zero_trip }
+      : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
 // Regression test - `vector.transfer_read` below should not be hoisted.
 // Indeed, %collapse_shape (written to by `vector.transfer_write`) and %alloca
 // (read by `vector.transfer_read`) alias.
@@ -366,7 +494,7 @@ func.func @no_hoisting_collapse_shape_2(%vec: vector<1x12x1xi32>) {
     %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x12x1xi32> into memref<12xi32>
     vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x12x1xi32>, memref<1x12x1xi32>
     %read = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32>
-    "prevent.dce"(%read) : (vector<12xi32>) ->()
+    "test.some_use"(%read) : (vector<12xi32>) ->()
   }
   return
 }
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
index e2169fe1404c..dc4306b8316a 100644
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -213,6 +213,21 @@ func.func @rank_reducing_parallel_insert_slice(%in: tensor<100xf32>, %out: tenso
 
 // -----
 
+// CHECK-LABEL: func.func @parallel_insert_full_slice_in_place
+// CHECK-NOT:     memref.alloc()
+func.func @parallel_insert_full_slice_in_place(%2: tensor<2xf32>) -> tensor<2xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %3 = scf.forall (%arg0) in (1) shared_outs(%arg2 = %2) -> (tensor<2xf32>) {
+    %fill = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<2xf32>) -> tensor<2xf32>
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %fill into %arg2[0] [2] [1] : tensor<2xf32> into tensor<2xf32>
+    }
+  } {mapping = [#gpu.thread<linear_dim_0>]}
+  return %3 : tensor<2xf32>
+}
+
+// -----
+
 // This test case could bufferize in-place with a better analysis. However, it
 // is simpler to let the canonicalizer fold away the tensor.insert_slice.
 
diff --git a/mlir/unittests/Bytecode/BytecodeTest.cpp b/mlir/unittests/Bytecode/BytecodeTest.cpp
index 0342f294f38d..baf5d5c650e3 100644
--- a/mlir/unittests/Bytecode/BytecodeTest.cpp
+++ b/mlir/unittests/Bytecode/BytecodeTest.cpp
@@ -54,7 +54,7 @@ TEST(Bytecode, MultiModuleWithResource) {
   constexpr size_t kAlignment = 0x20;
   size_t bufferSize = buffer.size();
   buffer.reserve(bufferSize + kAlignment - 1);
-  size_t pad = ~(uintptr_t)buffer.data() + 1 & kAlignment - 1;
+  size_t pad = (~(uintptr_t)buffer.data() + 1) & (kAlignment - 1);
   buffer.insert(0, pad, ' ');
   StringRef alignedBuffer(buffer.data() + pad, bufferSize);
 
diff --git a/mlir/unittests/Support/CyclicReplacerCacheTest.cpp b/mlir/unittests/Support/CyclicReplacerCacheTest.cpp
index 64a8ab72b69b..26f0709f7d83 100644
--- a/mlir/unittests/Support/CyclicReplacerCacheTest.cpp
+++ b/mlir/unittests/Support/CyclicReplacerCacheTest.cpp
@@ -225,7 +225,8 @@ public:
     /// Add a recursive-self-node, i.e. a duplicate of the original node that is
     /// meant to represent an indirection to it.
     std::pair<Node, int64_t> addRecursiveSelfNode(Graph::Node originalId) {
-      return {addNode(originalId, nextRecursionId), nextRecursionId++};
+      auto node = addNode(originalId, nextRecursionId);
+      return {node, nextRecursionId++};
     }
     void addEdge(Node src, Node sink) { connections.addEdge(src, sink); }
 
diff --git a/openmp/runtime/doc/doxygen/config b/openmp/runtime/doc/doxygen/config
index 8d79dc143cc1..04c966766ba6 100644
--- a/openmp/runtime/doc/doxygen/config
+++ b/openmp/runtime/doc/doxygen/config
@@ -1,1822 +1,1822 @@
-# Doxyfile 1.o8.2
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project.
-#
-# All text after a hash (#) is considered a comment and will be ignored.
-# The format is:
-#       TAG = value [value, ...]
-# For lists items can also be appended using:
-#       TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (" ").
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all
-# text before the first occurrence of this tag. Doxygen uses libiconv (or the
-# iconv built into libc) for the transcoding. See
-# http://www.gnu.org/software/libiconv for the list of possible encodings.
-
-DOXYFILE_ENCODING      = UTF-8
-
-# The PROJECT_NAME tag is a single word (or sequence of words) that should
-# identify the project. Note that if you do not use Doxywizard you need
-# to put quotes around the project name if it contains spaces.
-
-PROJECT_NAME           = "LLVM OpenMP* Runtime Library"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number.
-# This could be handy for archiving the generated documentation or
-# if some version control system is used.
-
-PROJECT_NUMBER         =
-
-# Using the PROJECT_BRIEF tag one can provide an optional one line description
-# for a project that appears at the top of each page and should give viewer
-# a quick idea about the purpose of the project. Keep the description short.
-
-PROJECT_BRIEF          =
-
-# With the PROJECT_LOGO tag one can specify an logo or icon that is
-# included in the documentation. The maximum height of the logo should not
-# exceed 55 pixels and the maximum width should not exceed 200 pixels.
-# Doxygen will copy the logo to the output directory.
-
-PROJECT_LOGO           =
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
-# base path where the generated documentation will be put.
-# If a relative path is entered, it will be relative to the location
-# where doxygen was started. If left blank the current directory will be used.
-
-OUTPUT_DIRECTORY       = doc/doxygen/generated
-
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
-# 4096 sub-directories (in 2 levels) under the output directory of each output
-# format and will distribute the generated files over these directories.
-# Enabling this option can be useful when feeding doxygen a huge amount of
-# source files, where putting all generated files in the same directory would
-# otherwise cause performance problems for the file system.
-
-CREATE_SUBDIRS         = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# The default language is English, other supported languages are:
-# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
-# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
-# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
-# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
-# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
-# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
-
-OUTPUT_LANGUAGE        = English
-
-# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
-# include brief member descriptions after the members that are listed in
-# the file and class documentation (similar to JavaDoc).
-# Set to NO to disable this.
-
-BRIEF_MEMBER_DESC      = YES
-
-# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
-# the brief description of a member or function before the detailed description.
-# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-
-REPEAT_BRIEF           = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator
-# that is used to form the text in various listings. Each string
-# in this list, if found as the leading text of the brief description, will be
-# stripped from the text and the result after processing the whole list, is
-# used as the annotated text. Otherwise, the brief description is used as-is.
-# If left blank, the following values are used ("$name" is automatically
-# replaced with the name of the entity): "The $name class" "The $name widget"
-# "The $name file" "is" "provides" "specifies" "contains"
-# "represents" "a" "an" "the"
-
-ABBREVIATE_BRIEF       =
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# Doxygen will generate a detailed section even if there is only a brief
-# description.
-
-ALWAYS_DETAILED_SEC    = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-
-INLINE_INHERITED_MEMB  = NO
-
-# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
-# path before files name in the file list and in the header files. If set
-# to NO the shortest path that makes the file name unique will be used.
-
-FULL_PATH_NAMES        = NO
-
-# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
-# can be used to strip a user-defined part of the path. Stripping is
-# only done if one of the specified strings matches the left-hand part of
-# the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the
-# path to strip. Note that you specify absolute paths here, but also
-# relative paths, which will be relative from the directory where doxygen is
-# started.
-
-STRIP_FROM_PATH        =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
-# the path mentioned in the documentation of a class, which tells
-# the reader which header file to include in order to use a class.
-# If left blank only the name of the header file containing the class
-# definition is used. Otherwise one should specify the include paths that
-# are normally passed to the compiler using the -I flag.
-
-STRIP_FROM_INC_PATH    =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
-# (but less readable) file names. This can be useful if your file system
-# doesn't support long names like on DOS, Mac, or CD-ROM.
-
-SHORT_NAMES            = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
-# will interpret the first line (until the first dot) of a JavaDoc-style
-# comment as the brief description. If set to NO, the JavaDoc
-# comments will behave just like regular Qt-style comments
-# (thus requiring an explicit @brief command for a brief description.)
-
-JAVADOC_AUTOBRIEF      = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
-# interpret the first line (until the first dot) of a Qt-style
-# comment as the brief description. If set to NO, the comments
-# will behave just like regular Qt-style comments (thus requiring
-# an explicit \brief command for a brief description.)
-
-QT_AUTOBRIEF           = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
-# treat a multi-line C++ special comment block (i.e. a block of //! or ///
-# comments) as a brief description. This used to be the default behaviour.
-# The new default is to treat a multi-line C++ comment block as a detailed
-# description. Set this tag to YES if you prefer the old behaviour instead.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
-# member inherits the documentation from any documented member that it
-# re-implements.
-
-INHERIT_DOCS           = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
-# a new page for each member. If set to NO, the documentation of a member will
-# be part of the file/class/namespace that contains it.
-
-SEPARATE_MEMBER_PAGES  = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab.
-# Doxygen uses this value to replace tabs by spaces in code fragments.
-
-TAB_SIZE               = 8
-
-# This tag can be used to specify a number of aliases that acts
-# as commands in the documentation. An alias has the form "name=value".
-# For example adding "sideeffect=\par Side Effects:\n" will allow you to
-# put the command \sideeffect (or @sideeffect) in the documentation, which
-# will result in a user-defined paragraph with heading "Side Effects:".
-# You can put \n's in the value part of an alias to insert newlines.
-
-ALIASES                = "other=<sup>*</sup>"
-
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding
-# "class=itcl::class" will allow you to use the command class in the
-# itcl::class meaning.
-
-TCL_SUBST              =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
-# sources only. Doxygen will then generate output that is more tailored for C.
-# For instance, some of the names that are used will be different. The list
-# of all members will be omitted, etc.
-
-OPTIMIZE_OUTPUT_FOR_C  = NO
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
-# sources only. Doxygen will then generate output that is more tailored for
-# Java. For instance, namespaces will be presented as packages, qualified
-# scopes will look different, etc.
-
-OPTIMIZE_OUTPUT_JAVA   = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources only. Doxygen will then generate output that is more tailored for
-# Fortran.
-
-OPTIMIZE_FOR_FORTRAN   = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for
-# VHDL.
-
-OPTIMIZE_OUTPUT_VHDL   = NO
-
-# Doxygen selects the parser to use depending on the extension of the files it
-# parses. With this tag you can assign which parser to use for a given
-# extension. Doxygen has a built-in mapping, but you can override or extend it
-# using this tag. The format is ext=language, where ext is a file extension,
-# and language is one of the parsers supported by doxygen: IDL, Java,
-# Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C,
-# C++. For instance to make doxygen treat .inc files as Fortran files (default
-# is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note
-# that for custom extensions you also need to set FILE_PATTERNS otherwise the
-# files are not read by doxygen.
-
-EXTENSION_MAPPING      =
-
-# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all
-# comments according to the Markdown format, which allows for more readable
-# documentation. See http://daringfireball.net/projects/markdown/ for details.
-# The output of markdown processing is further processed by doxygen, so you
-# can mix doxygen, HTML, and XML commands with Markdown formatting.
-# Disable only in case of backward compatibilities issues.
-
-MARKDOWN_SUPPORT       = YES
-
-# When enabled doxygen tries to link words that correspond to documented classes,
-# or namespaces to their corresponding documentation. Such a link can be
-# prevented in individual cases by by putting a % sign in front of the word or
-# globally by setting AUTOLINK_SUPPORT to NO.
-
-AUTOLINK_SUPPORT       = YES
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should
-# set this tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
-# func(std::string) {}). This also makes the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-
-BUILTIN_STL_SUPPORT    = NO
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-
-CPP_CLI_SUPPORT        = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
-# Doxygen will parse them like normal C++ but will assume all classes use public
-# instead of private inheritance when no explicit protection keyword is present.
-
-SIP_SUPPORT            = NO
-
-# For Microsoft's IDL there are propget and propput attributes to
-# indicate getter and setter methods for a property. Setting this
-# option to YES (the default) will make doxygen replace the get and
-# set methods by a property in the documentation. This will only work
-# if the methods are indeed getting or setting a simple type. If this
-# is not the case, or you want to show the methods anyway, you should
-# set this option to NO.
-
-IDL_PROPERTY_SUPPORT   = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES, then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-
-DISTRIBUTE_GROUP_DOC   = NO
-
-# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
-# the same type (for instance a group of public functions) to be put as a
-# subgroup of that type (e.g. under the Public Functions section). Set it to
-# NO to prevent subgrouping. Alternatively, this can be done per class using
-# the \nosubgrouping command.
-
-SUBGROUPING            = YES
-
-# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
-# unions are shown inside the group in which they are included (e.g. using
-# @ingroup) instead of on a separate page (for HTML and Man pages) or
-# section (for LaTeX and RTF).
-
-INLINE_GROUPED_CLASSES = NO
-
-# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
-# unions with only public data fields will be shown inline in the documentation
-# of the scope in which they are defined (i.e. file, namespace, or group
-# documentation), provided this scope is documented. If set to NO (the default),
-# structs, classes, and unions are shown on a separate page (for HTML and Man
-# pages) or section (for LaTeX and RTF).
-
-INLINE_SIMPLE_STRUCTS  = NO
-
-# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
-# is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically
-# be useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-
-TYPEDEF_HIDES_STRUCT   = NO
-
-# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
-# determine which symbols to keep in memory and which to flush to disk.
-# When the cache is full, less often used symbols will be written to disk.
-# For small to medium size projects (<1000 input files) the default value is
-# probably good enough. For larger projects a too small cache size can cause
-# doxygen to be busy swapping symbols to and from disk most of the time
-# causing a significant performance penalty.
-# If the system has enough physical memory increasing the cache will improve the
-# performance by keeping more symbols in memory. Note that the value works on
-# a logarithmic scale so increasing the size by one will roughly double the
-# memory usage. The cache size is given by this formula:
-# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
-# corresponding to a cache size of 2^16 = 65536 symbols.
-
-SYMBOL_CACHE_SIZE      = 0
-
-# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
-# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
-# their name and scope. Since this can be an expensive process and often the
-# same symbol appear multiple times in the code, doxygen keeps a cache of
-# pre-resolved symbols. If the cache is too small doxygen will become slower.
-# If the cache is too large, memory is wasted. The cache size is given by this
-# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0,
-# corresponding to a cache size of 2^16 = 65536 symbols.
-
-LOOKUP_CACHE_SIZE      = 0
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
-# documentation are documented, even if no documentation was available.
-# Private class members and static file members will be hidden unless
-# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
-
-EXTRACT_ALL            = NO
-
-# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
-# will be included in the documentation.
-
-EXTRACT_PRIVATE        = YES
-
-# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
-# scope will be included in the documentation.
-
-EXTRACT_PACKAGE        = NO
-
-# If the EXTRACT_STATIC tag is set to YES all static members of a file
-# will be included in the documentation.
-
-EXTRACT_STATIC         = YES
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
-# defined locally in source files will be included in the documentation.
-# If set to NO only classes defined in header files are included.
-
-EXTRACT_LOCAL_CLASSES  = YES
-
-# This flag is only useful for Objective-C code. When set to YES local
-# methods, which are defined in the implementation section but not in
-# the interface are included in the documentation.
-# If set to NO (the default) only methods in the interface are included.
-
-EXTRACT_LOCAL_METHODS  = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base
-# name of the file that contains the anonymous namespace. By default
-# anonymous namespaces are hidden.
-
-EXTRACT_ANON_NSPACES   = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
-# undocumented members of documented classes, files or namespaces.
-# If set to NO (the default) these members will be included in the
-# various overviews, but no documentation section is generated.
-# This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_MEMBERS     = YES
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy.
-# If set to NO (the default) these classes will be included in the various
-# overviews. This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_CLASSES     = YES
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
-# friend (class|struct|union) declarations.
-# If set to NO (the default) these declarations will be included in the
-# documentation.
-
-HIDE_FRIEND_COMPOUNDS  = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
-# documentation blocks found inside the body of a function.
-# If set to NO (the default) these blocks will be appended to the
-# function's detailed documentation block.
-
-HIDE_IN_BODY_DOCS      = NO
-
-# The INTERNAL_DOCS tag determines if documentation
-# that is typed after a \internal command is included. If the tag is set
-# to NO (the default) then the documentation will be excluded.
-# Set it to YES to include the internal documentation.
-
-INTERNAL_DOCS          = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
-# file names in lower-case letters. If set to YES upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-
-CASE_SENSE_NAMES       = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
-# will show members with their full class and namespace scopes in the
-# documentation. If set to YES the scope will be hidden.
-
-HIDE_SCOPE_NAMES       = NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
-# will put a list of the files that are included by a file in the documentation
-# of that file.
-
-SHOW_INCLUDE_FILES     = YES
-
-# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
-# will list include files with double quotes in the documentation
-# rather than with sharp brackets.
-
-FORCE_LOCAL_INCLUDES   = NO
-
-# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
-# is inserted in the documentation for inline members.
-
-INLINE_INFO            = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
-# will sort the (detailed) documentation of file and class members
-# alphabetically by member name. If set to NO the members will appear in
-# declaration order.
-
-SORT_MEMBER_DOCS       = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
-# brief documentation of file, namespace and class members alphabetically
-# by member name. If set to NO (the default) the members will appear in
-# declaration order.
-
-SORT_BRIEF_DOCS        = NO
-
-# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
-# will sort the (brief and detailed) documentation of class members so that
-# constructors and destructors are listed first. If set to NO (the default)
-# the constructors will appear in the respective orders defined by
-# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
-# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
-# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
-
-SORT_MEMBERS_CTORS_1ST = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
-# hierarchy of group names into alphabetical order. If set to NO (the default)
-# the group names will appear in their defined order.
-
-SORT_GROUP_NAMES       = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
-# sorted by fully-qualified names, including namespaces. If set to
-# NO (the default), the class list will be sorted only by class name,
-# not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the
-# alphabetical list.
-
-SORT_BY_SCOPE_NAME     = NO
-
-# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
-# do proper type resolution of all parameters of a function it will reject a
-# match between the prototype and the implementation of a member function even
-# if there is only one candidate or it is obvious which candidate to choose
-# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
-# will still accept a match between prototype and implementation in such cases.
-
-STRICT_PROTO_MATCHING  = NO
-
-# The GENERATE_TODOLIST tag can be used to enable (YES) or
-# disable (NO) the todo list. This list is created by putting \todo
-# commands in the documentation.
-
-GENERATE_TODOLIST      = YES
-
-# The GENERATE_TESTLIST tag can be used to enable (YES) or
-# disable (NO) the test list. This list is created by putting \test
-# commands in the documentation.
-
-GENERATE_TESTLIST      = YES
-
-# The GENERATE_BUGLIST tag can be used to enable (YES) or
-# disable (NO) the bug list. This list is created by putting \bug
-# commands in the documentation.
-
-GENERATE_BUGLIST       = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
-# disable (NO) the deprecated list. This list is created by putting
-# \deprecated commands in the documentation.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional
-# documentation sections, marked by \if sectionname ... \endif.
-
-ENABLED_SECTIONS       =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
-# the initial value of a variable or macro consists of for it to appear in
-# the documentation. If the initializer consists of more lines than specified
-# here it will be hidden. Use a value of 0 to hide initializers completely.
-# The appearance of the initializer of individual variables and macros in the
-# documentation can be controlled using \showinitializer or \hideinitializer
-# command in the documentation regardless of this setting.
-
-MAX_INITIALIZER_LINES  = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
-# at the bottom of the documentation of classes and structs. If set to YES the
-# list will mention the files that were used to generate the documentation.
-
-SHOW_USED_FILES        = YES
-
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
-# This will remove the Files entry from the Quick Index and from the
-# Folder Tree View (if specified). The default is YES.
-
-# We probably will want this, but we have no file documentation yet so it's simpler to remove
-# it for now.
-SHOW_FILES             = NO
-
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
-# Namespaces page.
-# This will remove the Namespaces entry from the Quick Index
-# and from the Folder Tree View (if specified). The default is YES.
-
-SHOW_NAMESPACES        = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from
-# the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command <command> <input-file>, where <command> is the value of
-# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
-# provided by doxygen. Whatever the program writes to standard output
-# is used as the file version. See the manual for examples.
-
-FILE_VERSION_FILTER    =
-
-# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
-# by doxygen. The layout file controls the global structure of the generated
-# output files in an output format independent way. To create the layout file
-# that represents doxygen's defaults, run doxygen with the -l option.
-# You can optionally specify a file name after the option, if omitted
-# DoxygenLayout.xml will be used as the name of the layout file.
-
-LAYOUT_FILE            =
-
-# The CITE_BIB_FILES tag can be used to specify one or more bib files
-# containing the references data. This must be a list of .bib files. The
-# .bib extension is automatically appended if omitted. Using this command
-# requires the bibtex tool to be installed. See also
-# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
-# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
-# feature you need bibtex and perl available in the search path.
-
-CITE_BIB_FILES         =
-
-#---------------------------------------------------------------------------
-# configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated
-# by doxygen. Possible values are YES and NO. If left blank NO is used.
-
-QUIET                  = NO
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated by doxygen. Possible values are YES and NO. If left blank
-# NO is used.
-
-WARNINGS               = YES
-
-# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
-# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
-# automatically be disabled.
-
-WARN_IF_UNDOCUMENTED   = YES
-
-# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some
-# parameters in a documented function, or documenting parameters that
-# don't exist or using markup commands wrongly.
-
-WARN_IF_DOC_ERROR      = YES
-
-# The WARN_NO_PARAMDOC option can be enabled to get warnings for
-# functions that are documented, but have no documentation for their parameters
-# or return value. If set to NO (the default) doxygen will only warn about
-# wrong or incomplete parameter documentation, but not about the absence of
-# documentation.
-
-WARN_NO_PARAMDOC       = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that
-# doxygen can produce. The string should contain the $file, $line, and $text
-# tags, which will be replaced by the file and line number from which the
-# warning originated and the warning text. Optionally the format may contain
-# $version, which will be replaced by the version of the file (if it could
-# be obtained via FILE_VERSION_FILTER)
-
-WARN_FORMAT            =
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning
-# and error messages should be written. If left blank the output is written
-# to stderr.
-
-WARN_LOGFILE           =
-
-#---------------------------------------------------------------------------
-# configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag can be used to specify the files and/or directories that contain
-# documented source files. You may enter file names like "myfile.cpp" or
-# directories like "/usr/src/myproject". Separate the files or directories
-# with spaces.
-
-INPUT                  = src  doc/doxygen/libomp_interface.h
-# The ittnotify code also has doxygen documentation, but if we include it here
-# it takes over from us!
-# src/thirdparty/ittnotify
-
-# This tag can be used to specify the character encoding of the source files
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
-# also the default input encoding. Doxygen uses libiconv (or the iconv built
-# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
-# the list of possible encodings.
-
-INPUT_ENCODING         = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank the following patterns are tested:
-# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
-# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
-# *.f90 *.f *.for *.vhd *.vhdl
-
-FILE_PATTERNS          = *.c *.h *.cpp
-# We may also want to include the asm files with appropriate ifdef to ensure
-# doxygen doesn't see the content, just the documentation...
-
-# The RECURSIVE tag can be used to turn specify whether or not subdirectories
-# should be searched for input files as well. Possible values are YES and NO.
-# If left blank NO is used.
-
-# Only look in the one directory.
-RECURSIVE              = NO
-
-# The EXCLUDE tag can be used to specify files and/or directories that should be
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-# Note that relative paths are relative to the directory from which doxygen is
-# run.
-
-EXCLUDE                = src/test-touch.c
-
-# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
-# directories that are symbolic links (a Unix file system feature) are excluded
-# from the input.
-
-EXCLUDE_SYMLINKS       = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories. Note that the wildcards are matched
-# against the file with absolute path, so to exclude all test directories
-# for example use the pattern */test/*
-
-EXCLUDE_PATTERNS       =
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the
-# output. The symbol name can be a fully qualified name, a word, or if the
-# wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-
-EXCLUDE_SYMBOLS        =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or
-# directories that contain example code fragments that are included (see
-# the \include command).
-
-EXAMPLE_PATH           =
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank all files are included.
-
-EXAMPLE_PATTERNS       =
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude
-# commands irrespective of the value of the RECURSIVE tag.
-# Possible values are YES and NO. If left blank NO is used.
-
-EXAMPLE_RECURSIVE      = NO
-
-# The IMAGE_PATH tag can be used to specify one or more files or
-# directories that contain image that are included in the documentation (see
-# the \image command).
-
-IMAGE_PATH             =
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command <filter> <input-file>, where <filter>
-# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
-# input file. Doxygen will then use the output that the filter program writes
-# to standard output.
-# If FILTER_PATTERNS is specified, this tag will be
-# ignored.
-
-INPUT_FILTER           =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis.
-# Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match.
-# The filters are a list of the form:
-# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
-# info on how filters are used. If FILTER_PATTERNS is empty or if
-# non of the patterns match the file name, INPUT_FILTER is applied.
-
-FILTER_PATTERNS        =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER) will be used to filter the input files when producing source
-# files to browse (i.e. when SOURCE_BROWSER is set to YES).
-
-FILTER_SOURCE_FILES    = NO
-
-# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
-# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
-# and it is also possible to disable source filtering for a specific pattern
-# using *.ext= (so without naming a filter). This option only has effect when
-# FILTER_SOURCE_FILES is enabled.
-
-FILTER_SOURCE_PATTERNS =
-
-#---------------------------------------------------------------------------
-# configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will
-# be generated. Documented entities will be cross-referenced with these sources.
-# Note: To get rid of all source code in the generated output, make sure also
-# VERBATIM_HEADERS is set to NO.
-
-SOURCE_BROWSER         = YES
-
-# Setting the INLINE_SOURCES tag to YES will include the body
-# of functions and classes directly in the documentation.
-
-INLINE_SOURCES         = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
-# doxygen to hide any special comment blocks from generated source code
-# fragments. Normal C, C++ and Fortran comments will always remain visible.
-
-STRIP_CODE_COMMENTS    = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES
-# then for each documented function all documented
-# functions referencing it will be listed.
-
-REFERENCED_BY_RELATION = YES
-
-# If the REFERENCES_RELATION tag is set to YES
-# then for each documented function all documented entities
-# called/used by that function will be listed.
-
-REFERENCES_RELATION    = NO
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
-# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
-# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
-# link to the source code.
-# Otherwise they will link to the documentation.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code
-# will point to the HTML generated by the htags(1) tool instead of doxygen
-# built-in source browser. The htags tool is part of GNU's global source
-# tagging system (see http://www.gnu.org/software/global/global.html). You
-# will need version 4.8.6 or higher.
-
-USE_HTAGS              = NO
-
-# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
-# will generate a verbatim copy of the header file for each class for
-# which an include is specified. Set to NO to disable this.
-
-VERBATIM_HEADERS       = YES
-
-#---------------------------------------------------------------------------
-# configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
-# of all compounds will be generated. Enable this if the project
-# contains a lot of classes, structs, unions or interfaces.
-
-ALPHABETICAL_INDEX     = YES
-
-# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
-# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
-# in which this list will be split (can be a number in the range [1..20])
-
-COLS_IN_ALPHA_INDEX    = 5
-
-# In case all classes in a project start with a common prefix, all
-# classes will be put under the same header in the alphabetical index.
-# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
-# should be ignored while generating the index headers.
-
-IGNORE_PREFIX          =
-
-#---------------------------------------------------------------------------
-# configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
-# generate HTML output.
-
-GENERATE_HTML          = YES
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `html' will be used as the default path.
-
-HTML_OUTPUT            =
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
-# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
-# doxygen will generate files with .html extension.
-
-HTML_FILE_EXTENSION    = .html
-
-# The HTML_HEADER tag can be used to specify a personal HTML header for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard header. Note that when using a custom header you are responsible
-#  for the proper inclusion of any scripts and style sheets that doxygen
-# needs, which is dependent on the configuration options used.
-# It is advised to generate a default header using "doxygen -w html
-# header.html footer.html stylesheet.css YourConfigFile" and then modify
-# that header. Note that the header is subject to change so you typically
-# have to redo this when upgrading to a newer version of doxygen or when
-# changing the value of configuration settings such as GENERATE_TREEVIEW!
-
-HTML_HEADER            =
-
-# The HTML_FOOTER tag can be used to specify a personal HTML footer for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard footer.
-
-HTML_FOOTER            =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
-# style sheet that is used by each HTML page. It can be used to
-# fine-tune the look of the HTML output. If left blank doxygen will
-# generate a default style sheet. Note that it is recommended to use
-# HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this
-# tag will in the future become obsolete.
-
-HTML_STYLESHEET        =
-
-# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional
-# user-defined cascading style sheet that is included after the standard
-# style sheets created by doxygen. Using this option one can overrule
-# certain style aspects. This is preferred over using HTML_STYLESHEET
-# since it does not replace the standard style sheet and is therefor more
-# robust against future updates. Doxygen will copy the style sheet file to
-# the output directory.
-
-HTML_EXTRA_STYLESHEET  =
-
-# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
-# other source files which should be copied to the HTML output directory. Note
-# that these files will be copied to the base HTML output directory. Use the
-# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
-# files. In the HTML_STYLESHEET file, use the file name only. Also note that
-# the files will be copied as-is; there are no commands or markers available.
-
-HTML_EXTRA_FILES       =
-
-# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
-# Doxygen will adjust the colors in the style sheet and background images
-# according to this color. Hue is specified as an angle on a colorwheel,
-# see http://en.wikipedia.org/wiki/Hue for more information.
-# For instance the value 0 represents red, 60 is yellow, 120 is green,
-# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
-# The allowed range is 0 to 359.
-
-HTML_COLORSTYLE_HUE    = 220
-
-# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
-# the colors in the HTML output. For a value of 0 the output will use
-# grayscales only. A value of 255 will produce the most vivid colors.
-
-HTML_COLORSTYLE_SAT    = 100
-
-# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
-# the luminance component of the colors in the HTML output. Values below
-# 100 gradually make the output lighter, whereas values above 100 make
-# the output darker. The value divided by 100 is the actual gamma applied,
-# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
-# and 100 does not change the gamma.
-
-HTML_COLORSTYLE_GAMMA  = 80
-
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting
-# this to NO can help when comparing the output of multiple runs.
-
-HTML_TIMESTAMP         = NO
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded.
-
-HTML_DYNAMIC_SECTIONS  = NO
-
-# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of
-# entries shown in the various tree structured indices initially; the user
-# can expand and collapse entries dynamically later on. Doxygen will expand
-# the tree to such a level that at most the specified number of entries are
-# visible (unless a fully collapsed tree already exceeds this amount).
-# So setting the number of entries 1 will produce a full collapsed tree by
-# default. 0 is a special value representing an infinite number of entries
-# and will result in a full expanded tree by default.
-
-HTML_INDEX_NUM_ENTRIES = 100
-
-# If the GENERATE_DOCSET tag is set to YES, additional index files
-# will be generated that can be used as input for Apple's Xcode 3
-# integrated development environment, introduced with OSX 10.5 (Leopard).
-# To create a documentation set, doxygen will generate a Makefile in the
-# HTML output directory. Running make will produce the docset in that
-# directory and running "make install" will install the docset in
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
-# it at startup.
-# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
-# for more information.
-
-GENERATE_DOCSET        = NO
-
-# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
-# feed. A documentation feed provides an umbrella under which multiple
-# documentation sets from a single provider (such as a company or product suite)
-# can be grouped.
-
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-
-# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
-# should uniquely identify the documentation set bundle. This should be a
-# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
-# will append .docset to the name.
-
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-
-# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely
-# identify the documentation publisher. This should be a reverse domain-name
-# style string, e.g. com.mycompany.MyDocSet.documentation.
-
-DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
-
-# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
-
-DOCSET_PUBLISHER_NAME  = Publisher
-
-# If the GENERATE_HTMLHELP tag is set to YES, additional index files
-# will be generated that can be used as input for tools like the
-# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
-# of the generated HTML documentation.
-
-GENERATE_HTMLHELP      = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
-# be used to specify the file name of the resulting .chm file. You
-# can add a path in front of the file if the result should not be
-# written to the html output directory.
-
-CHM_FILE               =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
-# be used to specify the location (absolute path including file name) of
-# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
-# the HTML help compiler on the generated index.hhp.
-
-HHC_LOCATION           =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
-# controls if a separate .chi index file is generated (YES) or that
-# it should be included in the main .chm file (NO).
-
-GENERATE_CHI           = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
-# is used to encode HtmlHelp index (hhk), content (hhc) and project file
-# content.
-
-CHM_INDEX_ENCODING     =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
-# controls whether a binary table of contents is generated (YES) or a
-# normal table of contents (NO) in the .chm file.
-
-BINARY_TOC             = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members
-# to the contents of the HTML help documentation and to the tree view.
-
-TOC_EXPAND             = NO
-
-# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
-# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
-# that can be used as input for Qt's qhelpgenerator to generate a
-# Qt Compressed Help (.qch) of the generated HTML documentation.
-
-GENERATE_QHP           = NO
-
-# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
-# be used to specify the file name of the resulting .qch file.
-# The path specified is relative to the HTML output folder.
-
-QCH_FILE               =
-
-# The QHP_NAMESPACE tag specifies the namespace to use when generating
-# Qt Help Project output. For more information please see
-# http://doc.trolltech.com/qthelpproject.html#namespace
-
-QHP_NAMESPACE          = org.doxygen.Project
-
-# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
-# Qt Help Project output. For more information please see
-# http://doc.trolltech.com/qthelpproject.html#virtual-folders
-
-QHP_VIRTUAL_FOLDER     = doc
-
-# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
-# add. For more information please see
-# http://doc.trolltech.com/qthelpproject.html#custom-filters
-
-QHP_CUST_FILTER_NAME   =
-
-# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
-# custom filter to add. For more information please see
-# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
-# Qt Help Project / Custom Filters</a>.
-
-QHP_CUST_FILTER_ATTRS  =
-
-# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
-# project's
-# filter section matches.
-# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
-# Qt Help Project / Filter Attributes</a>.
-
-QHP_SECT_FILTER_ATTRS  =
-
-# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
-# be used to specify the location of Qt's qhelpgenerator.
-# If non-empty doxygen will try to run qhelpgenerator on the generated
-# .qhp file.
-
-QHG_LOCATION           =
-
-# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
-#  will be generated, which together with the HTML files, form an Eclipse help
-# plugin. To install this plugin and make it available under the help contents
-# menu in Eclipse, the contents of the directory containing the HTML and XML
-# files needs to be copied into the plugins directory of eclipse. The name of
-# the directory within the plugins directory should be the same as
-# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
-# the help appears.
-
-GENERATE_ECLIPSEHELP   = NO
-
-# A unique identifier for the eclipse help plugin. When installing the plugin
-# the directory name containing the HTML and XML files should also have
-# this name.
-
-ECLIPSE_DOC_ID         = org.doxygen.Project
-
-# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
-# at top of each HTML page. The value NO (the default) enables the index and
-# the value YES disables it. Since the tabs have the same information as the
-# navigation tree you can set this option to NO if you already set
-# GENERATE_TREEVIEW to YES.
-
-DISABLE_INDEX          = NO
-
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
-# structure should be generated to display hierarchical information.
-# If the tag value is set to YES, a side panel will be generated
-# containing a tree-like index structure (just like the one that
-# is generated for HTML Help). For this to work a browser that supports
-# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
-# Windows users are probably better off using the HTML help feature.
-# Since the tree basically has the same information as the tab index you
-# could consider to set DISABLE_INDEX to NO when enabling this option.
-
-GENERATE_TREEVIEW      = NO
-
-# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
-# (range [0,1..20]) that doxygen will group on one line in the generated HTML
-# documentation. Note that a value of 0 will completely suppress the enum
-# values from appearing in the overview section.
-
-ENUM_VALUES_PER_LINE   = 4
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
-# used to set the initial width (in pixels) of the frame in which the tree
-# is shown.
-
-TREEVIEW_WIDTH         = 250
-
-# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
-# links to external symbols imported via tag files in a separate window.
-
-EXT_LINKS_IN_WINDOW    = NO
-
-# Use this tag to change the font size of Latex formulas included
-# as images in the HTML documentation. The default is 10. Note that
-# when you change the font size after a successful doxygen run you need
-# to manually remove any form_*.png images from the HTML output directory
-# to force them to be regenerated.
-
-FORMULA_FONTSIZE       = 10
-
-# Use the FORMULA_TRANPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are
-# not supported properly for IE 6.0, but are supported on all modern browsers.
-# Note that when changing this option you need to delete any form_*.png files
-# in the HTML output before the changes have effect.
-
-FORMULA_TRANSPARENT    = YES
-
-# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
-# (see http://www.mathjax.org) which uses client side Javascript for the
-# rendering instead of using prerendered bitmaps. Use this if you do not
-# have LaTeX installed or if you want to formulas look prettier in the HTML
-# output. When enabled you may also need to install MathJax separately and
-# configure the path to it using the MATHJAX_RELPATH option.
-
-USE_MATHJAX            = NO
-
-# When MathJax is enabled you need to specify the location relative to the
-# HTML output directory using the MATHJAX_RELPATH option. The destination
-# directory should contain the MathJax.js script. For instance, if the mathjax
-# directory is located at the same level as the HTML output directory, then
-# MATHJAX_RELPATH should be ../mathjax. The default value points to
-# the MathJax Content Delivery Network so you can quickly see the result without
-# installing MathJax.
-# However, it is strongly recommended to install a local
-# copy of MathJax from http://www.mathjax.org before deployment.
-
-MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
-
-# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
-# names that should be enabled during MathJax rendering.
-
-MATHJAX_EXTENSIONS     =
-
-# When the SEARCHENGINE tag is enabled doxygen will generate a search box
-# for the HTML output. The underlying search engine uses javascript
-# and DHTML and should work on any modern browser. Note that when using
-# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
-# (GENERATE_DOCSET) there is already a search function so this one should
-# typically be disabled. For large projects the javascript based search engine
-# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
-
-SEARCHENGINE           = YES
-
-# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
-# implemented using a PHP enabled web server instead of at the web client
-# using Javascript. Doxygen will generate the search PHP script and index
-# file to put on the web server. The advantage of the server
-# based approach is that it scales better to large projects and allows
-# full text search. The disadvantages are that it is more difficult to setup
-# and does not have live searching capabilities.
-
-SERVER_BASED_SEARCH    = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
-# generate Latex output.
-
-GENERATE_LATEX         = YES
-
-# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `latex' will be used as the default path.
-
-LATEX_OUTPUT           =
-
-# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
-# invoked. If left blank `latex' will be used as the default command name.
-# Note that when enabling USE_PDFLATEX this option is only used for
-# generating bitmaps for formulas in the HTML output, but not in the
-# Makefile that is written to the output directory.
-
-LATEX_CMD_NAME         = latex
-
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
-# generate index for LaTeX. If left blank `makeindex' will be used as the
-# default command name.
-
-MAKEINDEX_CMD_NAME     = makeindex
-
-# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
-# LaTeX documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_LATEX          = NO
-
-# The PAPER_TYPE tag can be used to set the paper type that is used
-# by the printer. Possible values are: a4, letter, legal and
-# executive. If left blank a4wide will be used.
-
-PAPER_TYPE             = a4wide
-
-# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
-# packages that should be included in the LaTeX output.
-
-EXTRA_PACKAGES         =
-
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
-# the generated latex document. The header should contain everything until
-# the first chapter. If it is left blank doxygen will generate a
-# standard header. Notice: only use this tag if you know what you are doing!
-
-LATEX_HEADER           = doc/doxygen/header.tex
-
-# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
-# the generated latex document. The footer should contain everything after
-# the last chapter. If it is left blank doxygen will generate a
-# standard footer. Notice: only use this tag if you know what you are doing!
-
-LATEX_FOOTER           =
-
-# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
-# is prepared for conversion to pdf (using ps2pdf). The pdf file will
-# contain links (just like the HTML output) instead of page references
-# This makes the output suitable for online browsing using a pdf viewer.
-
-PDF_HYPERLINKS         = YES
-
-# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
-# plain latex in the generated Makefile. Set this option to YES to get a
-# higher quality PDF documentation.
-
-USE_PDFLATEX           = YES
-
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
-# command to the generated LaTeX files. This will instruct LaTeX to keep
-# running if errors occur, instead of asking the user for help.
-# This option is also used when generating formulas in HTML.
-
-LATEX_BATCHMODE        = NO
-
-# If LATEX_HIDE_INDICES is set to YES then doxygen will not
-# include the index chapters (such as File Index, Compound Index, etc.)
-# in the output.
-
-LATEX_HIDE_INDICES     = NO
-
-# If LATEX_SOURCE_CODE is set to YES then doxygen will include
-# source code with syntax highlighting in the LaTeX output.
-# Note that which sources are shown also depends on other settings
-# such as SOURCE_BROWSER.
-
-LATEX_SOURCE_CODE      = NO
-
-# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
-# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
-# http://en.wikipedia.org/wiki/BibTeX for more info.
-
-LATEX_BIB_STYLE        = plain
-
-#---------------------------------------------------------------------------
-# configuration options related to the RTF output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
-# The RTF output is optimized for Word 97 and may not look very pretty with
-# other RTF readers or editors.
-
-GENERATE_RTF           = NO
-
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `rtf' will be used as the default path.
-
-RTF_OUTPUT             =
-
-# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
-# RTF documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_RTF            = NO
-
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
-# will contain hyperlink fields. The RTF file will
-# contain links (just like the HTML output) instead of page references.
-# This makes the output suitable for online browsing using WORD or other
-# programs which support those fields.
-# Note: wordpad (write) and others do not support links.
-
-RTF_HYPERLINKS         = NO
-
-# Load style sheet definitions from file. Syntax is similar to doxygen's
-# config file, i.e. a series of assignments. You only have to provide
-# replacements, missing definitions are set to their default value.
-
-RTF_STYLESHEET_FILE    =
-
-# Set optional variables used in the generation of an rtf document.
-# Syntax is similar to doxygen's config file.
-
-RTF_EXTENSIONS_FILE    =
-
-#---------------------------------------------------------------------------
-# configuration options related to the man page output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
-# generate man pages
-
-GENERATE_MAN           = NO
-
-# The MAN_OUTPUT tag is used to specify where the man pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `man' will be used as the default path.
-
-MAN_OUTPUT             =
-
-# The MAN_EXTENSION tag determines the extension that is added to
-# the generated man pages (default is the subroutine's section .3)
-
-MAN_EXTENSION          =
-
-# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
-# then it will generate one additional man file for each entity
-# documented in the real man page(s). These additional files
-# only source the real man page, but without them the man command
-# would be unable to find the correct page. The default is NO.
-
-MAN_LINKS              = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the XML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_XML tag is set to YES Doxygen will
-# generate an XML file that captures the structure of
-# the code including all documentation.
-
-GENERATE_XML           = NO
-
-# The XML_OUTPUT tag is used to specify where the XML pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `xml' will be used as the default path.
-
-XML_OUTPUT             = xml
-
-# The XML_SCHEMA tag can be used to specify an XML schema,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_SCHEMA             =
-
-# The XML_DTD tag can be used to specify an XML DTD,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_DTD                =
-
-# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
-# dump the program listings (including syntax highlighting
-# and cross-referencing information) to the XML output. Note that
-# enabling this will significantly increase the size of the XML output.
-
-XML_PROGRAMLISTING     = YES
-
-#---------------------------------------------------------------------------
-# configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
-# generate an AutoGen Definitions (see autogen.sf.net) file
-# that captures the structure of the code including all
-# documentation. Note that this feature is still experimental
-# and incomplete at the moment.
-
-GENERATE_AUTOGEN_DEF   = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_PERLMOD tag is set to YES Doxygen will
-# generate a Perl module file that captures the structure of
-# the code including all documentation. Note that this
-# feature is still experimental and incomplete at the
-# moment.
-
-GENERATE_PERLMOD       = NO
-
-# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
-# the necessary Makefile rules, Perl scripts and LaTeX code to be able
-# to generate PDF and DVI output from the Perl module output.
-
-PERLMOD_LATEX          = NO
-
-# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
-# nicely formatted so it can be parsed by a human reader.
-# This is useful
-# if you want to understand what is going on.
-# On the other hand, if this
-# tag is set to NO the size of the Perl module output will be much smaller
-# and Perl will parse it just the same.
-
-PERLMOD_PRETTY         = YES
-
-# The names of the make variables in the generated doxyrules.make file
-# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
-# This is useful so different doxyrules.make files included by the same
-# Makefile don't overwrite each other's variables.
-
-PERLMOD_MAKEVAR_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-
-# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
-# evaluate all C-preprocessor directives found in the sources and include
-# files.
-
-ENABLE_PREPROCESSING   = YES
-
-# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
-# names in the source code. If set to NO (the default) only conditional
-# compilation will be performed. Macro expansion can be done in a controlled
-# way by setting EXPAND_ONLY_PREDEF to YES.
-
-MACRO_EXPANSION        = YES
-
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
-# then the macro expansion is limited to the macros specified with the
-# PREDEFINED and EXPAND_AS_DEFINED tags.
-
-EXPAND_ONLY_PREDEF     = YES
-
-# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
-# pointed to by INCLUDE_PATH will be searched when a #include is found.
-
-SEARCH_INCLUDES        = YES
-
-# The INCLUDE_PATH tag can be used to specify one or more directories that
-# contain include files that are not input files but should be processed by
-# the preprocessor.
-
-INCLUDE_PATH           =
-
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
-# patterns (like *.h and *.hpp) to filter out the header-files in the
-# directories. If left blank, the patterns specified with FILE_PATTERNS will
-# be used.
-
-INCLUDE_FILE_PATTERNS  =
-
-# The PREDEFINED tag can be used to specify one or more macro names that
-# are defined before the preprocessor is started (similar to the -D option of
-# gcc). The argument of the tag is a list of macros of the form: name
-# or name=definition (no spaces). If the definition and the = are
-# omitted =1 is assumed. To prevent a macro definition from being
-# undefined via #undef or recursively expanded use the := operator
-# instead of the = operator.
-
-PREDEFINED             = OMP_30_ENABLED=1, OMP_40_ENABLED=1, KMP_STATS_ENABLED=1
-
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
-# this tag can be used to specify a list of macro names that should be expanded.
-# The macro definition that is found in the sources will be used.
-# Use the PREDEFINED tag if you want to use a different macro definition that
-# overrules the definition found in the source code.
-
-EXPAND_AS_DEFINED      =
-
-# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
-# doxygen's preprocessor will remove all references to function-like macros
-# that are alone on a line, have an all uppercase name, and do not end with a
-# semicolon, because these will confuse the parser if not removed.
-
-SKIP_FUNCTION_MACROS   = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to external references
-#---------------------------------------------------------------------------
-
-# The TAGFILES option can be used to specify one or more tagfiles. For each
-# tag file the location of the external documentation should be added. The
-# format of a tag file without this location is as follows:
-#
-# TAGFILES = file1 file2 ...
-# Adding location for the tag files is done as follows:
-#
-# TAGFILES = file1=loc1 "file2 = loc2" ...
-# where "loc1" and "loc2" can be relative or absolute paths
-# or URLs. Note that each tag file must have a unique name (where the name does
-# NOT include the path). If a tag file is not located in the directory in which
-# doxygen is run, you must also specify the path to the tagfile here.
-
-TAGFILES               =
-
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create
-# a tag file that is based on the input files it reads.
-
-GENERATE_TAGFILE       =
-
-# If the ALLEXTERNALS tag is set to YES all external classes will be listed
-# in the class index. If set to NO only the inherited external classes
-# will be listed.
-
-ALLEXTERNALS           = NO
-
-# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will
-# be listed.
-
-EXTERNAL_GROUPS        = YES
-
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of `which perl').
-
-PERL_PATH              =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-
-# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
-# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
-# or super classes. Setting the tag to NO turns the diagrams off. Note that
-# this option also works with HAVE_DOT disabled, but it is recommended to
-# install and use dot, since it yields more powerful graphs.
-
-CLASS_DIAGRAMS         = YES
-
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see
-# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH            =
-
-# If set to YES, the inheritance and collaboration graphs will hide
-# inheritance and usage relations if the target is undocumented
-# or is not a class.
-
-HIDE_UNDOC_RELATIONS   = YES
-
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
-# available from the path. This tool is part of Graphviz, a graph visualization
-# toolkit from AT&T and Lucent Bell Labs. The other options in this section
-# have no effect if this option is set to NO (the default)
-
-HAVE_DOT               = NO
-
-# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
-# allowed to run in parallel. When set to 0 (the default) doxygen will
-# base this on the number of processors available in the system. You can set it
-# explicitly to a value larger than 0 to get control over the balance
-# between CPU load and processing speed.
-
-DOT_NUM_THREADS        = 0
-
-# By default doxygen will use the Helvetica font for all dot files that
-# doxygen generates. When you want a differently looking font you can specify
-# the font name using DOT_FONTNAME. You need to make sure dot is able to find
-# the font, which can be done by putting it in a standard location or by setting
-# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
-# directory containing the font.
-
-DOT_FONTNAME           = Helvetica
-
-# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
-# The default size is 10pt.
-
-DOT_FONTSIZE           = 10
-
-# By default doxygen will tell dot to use the Helvetica font.
-# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
-# set the path where dot can find it.
-
-DOT_FONTPATH           =
-
-# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect inheritance relations. Setting this tag to YES will force the
-# CLASS_DIAGRAMS tag to NO.
-
-CLASS_GRAPH            = YES
-
-# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect implementation dependencies (inheritance, containment, and
-# class references variables) of the class with other documented classes.
-
-COLLABORATION_GRAPH    = NO
-
-# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for groups, showing the direct groups dependencies
-
-GROUP_GRAPHS           = YES
-
-# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
-# collaboration diagrams in a style similar to the OMG's Unified Modeling
-# Language.
-
-UML_LOOK               = NO
-
-# If the UML_LOOK tag is enabled, the fields and methods are shown inside
-# the class node. If there are many fields or methods and many nodes the
-# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS
-# threshold limits the number of items for each type to make the size more
-# manageable. Set this to 0 for no limit. Note that the threshold may be
-# exceeded by 50% before the limit is enforced.
-
-UML_LIMIT_NUM_FIELDS   = 10
-
-# If set to YES, the inheritance and collaboration graphs will show the
-# relations between templates and their instances.
-
-TEMPLATE_RELATIONS     = YES
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
-# tags are set to YES then doxygen will generate a graph for each documented
-# file showing the direct and indirect include dependencies of the file with
-# other documented files.
-
-INCLUDE_GRAPH          = NO
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
-# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
-# documented header file showing the documented files that directly or
-# indirectly include this file.
-
-INCLUDED_BY_GRAPH      = NO
-
-# If the CALL_GRAPH and HAVE_DOT options are set to YES then
-# doxygen will generate a call dependency graph for every global function
-# or class method. Note that enabling this option will significantly increase
-# the time of a run. So in most cases it will be better to enable call graphs
-# for selected functions only using the \callgraph command.
-
-CALL_GRAPH             = NO
-
-# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
-# doxygen will generate a caller dependency graph for every global function
-# or class method. Note that enabling this option will significantly increase
-# the time of a run. So in most cases it will be better to enable caller
-# graphs for selected functions only using the \callergraph command.
-
-CALLER_GRAPH           = NO
-
-# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
-# will generate a graphical hierarchy of all classes instead of a textual one.
-
-GRAPHICAL_HIERARCHY    = YES
-
-# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES
-# then doxygen will show the dependencies a directory has on other directories
-# in a graphical way. The dependency relations are determined by the #include
-# relations between the files in the directories.
-
-DIRECTORY_GRAPH        = YES
-
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot. Possible values are svg, png, jpg, or gif.
-# If left blank png will be used. If you choose svg you need to set
-# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
-# visible in IE 9+ (other browsers do not have this requirement).
-
-DOT_IMAGE_FORMAT       = png
-
-# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
-# enable generation of interactive SVG images that allow zooming and panning.
-# Note that this requires a modern browser other than Internet Explorer.
-# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
-# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
-# visible. Older versions of IE do not have SVG support.
-
-INTERACTIVE_SVG        = NO
-
-# The tag DOT_PATH can be used to specify the path where the dot tool can be
-# found. If left blank, it is assumed the dot tool can be found in the path.
-
-DOT_PATH               =
-
-# The DOTFILE_DIRS tag can be used to specify one or more directories that
-# contain dot files that are included in the documentation (see the
-# \dotfile command).
-
-DOTFILE_DIRS           =
-
-# The MSCFILE_DIRS tag can be used to specify one or more directories that
-# contain msc files that are included in the documentation (see the
-# \mscfile command).
-
-MSCFILE_DIRS           =
-
-# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
-# nodes that will be shown in the graph. If the number of nodes in a graph
-# becomes larger than this value, doxygen will truncate the graph, which is
-# visualized by representing a node as a red box. Note that doxygen if the
-# number of direct children of the root node in a graph is already larger than
-# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
-# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
-
-DOT_GRAPH_MAX_NODES    = 50
-
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
-# graphs generated by dot. A depth value of 3 means that only nodes reachable
-# from the root by following a path via at most 3 edges will be shown. Nodes
-# that lay further from the root node will be omitted. Note that setting this
-# option to 1 or 2 may greatly reduce the computation time needed for large
-# code bases. Also note that the size of a graph can be further restricted by
-# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
-
-MAX_DOT_GRAPH_DEPTH    = 0
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, because dot on Windows does not
-# seem to support this out of the box. Warning: Depending on the platform used,
-# enabling this option may lead to badly anti-aliased labels on the edges of
-# a graph (i.e. they become hard to read).
-
-DOT_TRANSPARENT        = NO
-
-# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
-# files in one run (i.e. multiple -o and -T options on the command line). This
-# makes dot run faster, but since only newer versions of dot (>1.8.10)
-# support this, this feature is disabled by default.
-
-DOT_MULTI_TARGETS      = NO
-
-# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
-# generate a legend page explaining the meaning of the various boxes and
-# arrows in the dot generated graphs.
-
-GENERATE_LEGEND        = YES
-
-# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
-# remove the intermediate dot files that are used to generate
-# the various graphs.
-
-DOT_CLEANUP            = YES
+# Doxyfile 1.o8.2
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a hash (#) is considered a comment and will be ignored.
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or sequence of words) that should
+# identify the project. Note that if you do not use Doxywizard you need
+# to put quotes around the project name if it contains spaces.
+
+PROJECT_NAME           = "LLVM OpenMP* Runtime Library"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER         =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer
+# a quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          =
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is
+# included in the documentation. The maximum height of the logo should not
+# exceed 55 pixels and the maximum width should not exceed 200 pixels.
+# Doxygen will copy the logo to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = doc/doxygen/generated
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip. Note that you specify absolute paths here, but also
+# relative paths, which will be relative from the directory where doxygen is
+# started.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful if your file system
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                = "other=<sup>*</sup>"
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding
+# "class=itcl::class" will allow you to use the command class in the
+# itcl::class meaning.
+
+TCL_SUBST              =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension,
+# and language is one of the parsers supported by doxygen: IDL, Java,
+# Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C,
+# C++. For instance to make doxygen treat .inc files as Fortran files (default
+# is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note
+# that for custom extensions you also need to set FILE_PATTERNS otherwise the
+# files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all
+# comments according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you
+# can mix doxygen, HTML, and XML commands with Markdown formatting.
+# Disable only in case of backward compatibilities issues.
+
+MARKDOWN_SUPPORT       = YES
+
+# When enabled doxygen tries to link words that correspond to documented classes,
+# or namespaces to their corresponding documentation. Such a link can be
+# prevented in individual cases by by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also makes the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to
+# indicate getter and setter methods for a property. Setting this
+# option to YES (the default) will make doxygen replace the get and
+# set methods by a property in the documentation. This will only work
+# if the methods are indeed getting or setting a simple type. If this
+# is not the case, or you want to show the methods anyway, you should
+# set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
+# unions are shown inside the group in which they are included (e.g. using
+# @ingroup) instead of on a separate page (for HTML and Man pages) or
+# section (for LaTeX and RTF).
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
+# unions with only public data fields will be shown inline in the documentation
+# of the scope in which they are defined (i.e. file, namespace, or group
+# documentation), provided this scope is documented. If set to NO (the default),
+# structs, classes, and unions are shown on a separate page (for HTML and Man
+# pages) or section (for LaTeX and RTF).
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penalty.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will roughly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols.
+
+SYMBOL_CACHE_SIZE      = 0
+
+# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
+# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
+# their name and scope. Since this can be an expensive process and often the
+# same symbol appear multiple times in the code, doxygen keeps a cache of
+# pre-resolved symbols. If the cache is too small doxygen will become slower.
+# If the cache is too large, memory is wasted. The cache size is given by this
+# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
+# scope will be included in the documentation.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespaces are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = YES
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = YES
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
+# do proper type resolution of all parameters of a function it will reject a
+# match between the prototype and the implementation of a member function even
+# if there is only one candidate or it is obvious which candidate to choose
+# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
+# will still accept a match between prototype and implementation in such cases.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or macro consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and macros in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+# We probably will want this, but we have no file documentation yet so it's simpler to remove
+# it for now.
+SHOW_FILES             = NO
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option.
+# You can optionally specify a file name after the option, if omitted
+# DoxygenLayout.xml will be used as the name of the layout file.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files
+# containing the references data. This must be a list of .bib files. The
+# .bib extension is automatically appended if omitted. Using this command
+# requires the bibtex tool to be installed. See also
+# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
+# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
+# feature you need bibtex and perl available in the search path.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# The WARN_NO_PARAMDOC option can be enabled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            =
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  = src  doc/doxygen/libomp_interface.h
+# The ittnotify code also has doxygen documentation, but if we include it here
+# it takes over from us!
+# src/thirdparty/ittnotify
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
+# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
+# *.f90 *.f *.for *.vhd *.vhdl
+
+FILE_PATTERNS          = *.c *.h *.cpp
+# We may also want to include the asm files with appropriate ifdef to ensure
+# doxygen doesn't see the content, just the documentation...
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+# Only look in the one directory.
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                = src/test-touch.c
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS       =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty or if
+# non of the patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
+# and it is also possible to disable source filtering for a specific pattern
+# using *.ext= (so without naming a filter). This option only has effect when
+# FILTER_SOURCE_FILES is enabled.
+
+FILTER_SOURCE_PATTERNS =
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C, C++ and Fortran comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            =
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header. Note that when using a custom header you are responsible
+#  for the proper inclusion of any scripts and style sheets that doxygen
+# needs, which is dependent on the configuration options used.
+# It is advised to generate a default header using "doxygen -w html
+# header.html footer.html stylesheet.css YourConfigFile" and then modify
+# that header. Note that the header is subject to change so you typically
+# have to redo this when upgrading to a newer version of doxygen or when
+# changing the value of configuration settings such as GENERATE_TREEVIEW!
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If left blank doxygen will
+# generate a default style sheet. Note that it is recommended to use
+# HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this
+# tag will in the future become obsolete.
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional
+# user-defined cascading style sheet that is included after the standard
+# style sheets created by doxygen. Using this option one can overrule
+# certain style aspects. This is preferred over using HTML_STYLESHEET
+# since it does not replace the standard style sheet and is therefor more
+# robust against future updates. Doxygen will copy the style sheet file to
+# the output directory.
+
+HTML_EXTRA_STYLESHEET  =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that
+# the files will be copied as-is; there are no commands or markers available.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
+# Doxygen will adjust the colors in the style sheet and background images
+# according to this color. Hue is specified as an angle on a colorwheel,
+# see http://en.wikipedia.org/wiki/Hue for more information.
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
+# The allowed range is 0 to 359.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
+# the colors in the HTML output. For a value of 0 the output will use
+# grayscales only. A value of 255 will produce the most vivid colors.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
+# the luminance component of the colors in the HTML output. Values below
+# 100 gradually make the output lighter, whereas values above 100 make
+# the output darker. The value divided by 100 is the actual gamma applied,
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
+# and 100 does not change the gamma.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP         = NO
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of
+# entries shown in the various tree structured indices initially; the user
+# can expand and collapse entries dynamically later on. Doxygen will expand
+# the tree to such a level that at most the specified number of entries are
+# visible (unless a fully collapsed tree already exceeds this amount).
+# So setting the number of entries 1 will produce a full collapsed tree by
+# default. 0 is a special value representing an infinite number of entries
+# and will result in a full expanded tree by default.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely
+# identify the documentation publisher. This should be a reverse domain-name
+# style string, e.g. com.mycompany.MyDocSet.documentation.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE               =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the main .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
+# that can be used as input for Qt's qhelpgenerator to generate a
+# Qt Compressed Help (.qch) of the generated HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
+# add. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
+# Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
+# Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+#  will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
+# the help appears.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
+# at top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it. Since the tabs have the same information as the
+# navigation tree you can set this option to NO if you already set
+# GENERATE_TREEVIEW to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+# Since the tree basically has the same information as the tab index you
+# could consider to set DISABLE_INDEX to NO when enabling this option.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
+# (range [0,1..20]) that doxygen will group on one line in the generated HTML
+# documentation. Note that a value of 0 will completely suppress the enum
+# values from appearing in the overview section.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
+# links to external symbols imported via tag files in a separate window.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are
+# not supported properly for IE 6.0, but are supported on all modern browsers.
+# Note that when changing this option you need to delete any form_*.png files
+# in the HTML output before the changes have effect.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
+# (see http://www.mathjax.org) which uses client side Javascript for the
+# rendering instead of using prerendered bitmaps. Use this if you do not
+# have LaTeX installed or if you want to formulas look prettier in the HTML
+# output. When enabled you may also need to install MathJax separately and
+# configure the path to it using the MATHJAX_RELPATH option.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you need to specify the location relative to the
+# HTML output directory using the MATHJAX_RELPATH option. The destination
+# directory should contain the MathJax.js script. For instance, if the mathjax
+# directory is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to
+# the MathJax Content Delivery Network so you can quickly see the result without
+# installing MathJax.
+# However, it is strongly recommended to install a local
+# copy of MathJax from http://www.mathjax.org before deployment.
+
+MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
+# names that should be enabled during MathJax rendering.
+
+MATHJAX_EXTENSIONS     =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a PHP enabled web server instead of at the web client
+# using Javascript. Doxygen will generate the search PHP script and index
+# file to put on the web server. The advantage of the server
+# based approach is that it scales better to large projects and allows
+# full text search. The disadvantages are that it is more difficult to setup
+# and does not have live searching capabilities.
+
+SERVER_BASED_SEARCH    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX         = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           =
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           = doc/doxygen/header.tex
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
+# the generated latex document. The footer should contain everything after
+# the last chapter. If it is left blank doxygen will generate a
+# standard footer. Notice: only use this tag if you know what you are doing!
+
+LATEX_FOOTER           =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
+# http://en.wikipedia.org/wiki/BibTeX for more info.
+
+LATEX_BIB_STYLE        = plain
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             =
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load style sheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             =
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          =
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = YES
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = YES
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# pointed to by INCLUDE_PATH will be searched when a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             = OMP_30_ENABLED=1, OMP_40_ENABLED=1, KMP_STATS_ENABLED=1
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition that
+# overrules the definition found in the source code.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all references to function-like macros
+# that are alone on a line, have an all uppercase name, and do not end with a
+# semicolon, because these will confuse the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles. For each
+# tag file the location of the external documentation should be added. The
+# format of a tag file without this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths
+# or URLs. Note that each tag file must have a unique name (where the name does
+# NOT include the path). If a tag file is not located in the directory in which
+# doxygen is run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option also works with HAVE_DOT disabled, but it is recommended to
+# install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
+# allowed to run in parallel. When set to 0 (the default) doxygen will
+# base this on the number of processors available in the system. You can set it
+# explicitly to a value larger than 0 to get control over the balance
+# between CPU load and processing speed.
+
+DOT_NUM_THREADS        = 0
+
+# By default doxygen will use the Helvetica font for all dot files that
+# doxygen generates. When you want a differently looking font you can specify
+# the font name using DOT_FONTNAME. You need to make sure dot is able to find
+# the font, which can be done by putting it in a standard location or by setting
+# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the Helvetica font.
+# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
+# set the path where dot can find it.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = NO
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside
+# the class node. If there are many fields or methods and many nodes the
+# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS
+# threshold limits the number of items for each type to make the size more
+# manageable. Set this to 0 for no limit. Note that the threshold may be
+# exceeded by 50% before the limit is enforced.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH          = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = NO
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will generate a graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are svg, png, jpg, or gif.
+# If left blank png will be used. If you choose svg you need to set
+# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible in IE 9+ (other browsers do not have this requirement).
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+# Note that this requires a modern browser other than Internet Explorer.
+# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
+# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible. Older versions of IE do not have SVG support.
+
+INTERACTIVE_SVG        = NO
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the
+# \mscfile command).
+
+MSCFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP            = YES
diff --git a/pstl/CREDITS.txt b/pstl/CREDITS.txt
index 174722510fde..4945fd5ad308 100644
--- a/pstl/CREDITS.txt
+++ b/pstl/CREDITS.txt
@@ -1,21 +1,21 @@
-This file is a partial list of people who have contributed to the LLVM/pstl
-(Parallel STL) project.  If you have contributed a patch or made some other
-contribution to LLVM/pstl, please submit a patch to this file to add yourself,
-and it will be done!
-
-The list is sorted by surname and formatted to allow easy grepping and
-beautification by scripts.  The fields are: name (N), email (E), web-address 
-(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
-(S).
-
-N: Intel Corporation
-W: http://www.intel.com
-D: Created the initial implementation.
-
-N: Thomas Rodgers
-E: trodgers@redhat.com
-D: Identifier name transformation for inclusion in a Standard C++ library.
-
-N: Christopher Nelson
-E: nadiasvertex@gmail.com
-D: Add support for an OpenMP backend.
+This file is a partial list of people who have contributed to the LLVM/pstl
+(Parallel STL) project.  If you have contributed a patch or made some other
+contribution to LLVM/pstl, please submit a patch to this file to add yourself,
+and it will be done!
+
+The list is sorted by surname and formatted to allow easy grepping and
+beautification by scripts.  The fields are: name (N), email (E), web-address 
+(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
+(S).
+
+N: Intel Corporation
+W: http://www.intel.com
+D: Created the initial implementation.
+
+N: Thomas Rodgers
+E: trodgers@redhat.com
+D: Identifier name transformation for inclusion in a Standard C++ library.
+
+N: Christopher Nelson
+E: nadiasvertex@gmail.com
+D: Add support for an OpenMP backend.
author	Vitaly Buka <vitalybuka@google.com>	2024-10-18 16:36:18 -0700
committer	Vitaly Buka <vitalybuka@google.com>	2024-10-18 16:36:18 -0700
commit	3f695d789826bc19057a2e758e9bebfde2678fde (patch)
tree	0a159d1148f00705fad6c03b5071f9f1cc3262cf
parent	9a4661cf31ea41143ee1c5a926a75320f91b1783 (diff)
parent	f5bd36aece8f6b12422ce30903dd78d1b5006efd (diff)