diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h
index d57be1990caff..bcf49c5777352 100644
--- a/clang/include/clang/Lex/PPCallbacks.h
+++ b/clang/include/clang/Lex/PPCallbacks.h
@@ -191,6 +191,10 @@ class PPCallbacks {
                              StringRef Str) {
   }
 
+  /// Callback invoked when a \#pragma mark comment is read.
+  virtual void PragmaMark(SourceLocation Loc, StringRef Trivia) {
+  }
+
   /// Callback invoked when a \#pragma detect_mismatch directive is
   /// read.
   virtual void PragmaDetectMismatch(SourceLocation Loc, StringRef Name,
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 16fbf5ea5a5bb..2d6335471383c 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2365,7 +2365,7 @@ class Preprocessor {
 
 public:
   void HandlePragmaOnce(Token &OnceTok);
-  void HandlePragmaMark();
+  void HandlePragmaMark(Token &MarkTok);
   void HandlePragmaPoison();
   void HandlePragmaSystemHeader(Token &SysHeaderTok);
   void HandlePragmaDependency(Token &DependencyTok);
diff --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h
index acb26ad345d59..538556f394da4 100644
--- a/clang/lib/Headers/__clang_cuda_math.h
+++ b/clang/lib/Headers/__clang_cuda_math.h
@@ -166,6 +166,8 @@ __DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); }
 __DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); }
 __DEVICE__ long long llround(double __a) { return __nv_llround(__a); }
 __DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); }
+__DEVICE__ double round(double __a) { return __nv_round(__a); }
+__DEVICE__ float roundf(float __a) { return __nv_roundf(__a); }
 __DEVICE__ double log(double __a) { return __nv_log(__a); }
 __DEVICE__ double log10(double __a) { return __nv_log10(__a); }
 __DEVICE__ float log10f(float __a) { return __nv_log10f(__a); }
@@ -270,8 +272,6 @@ __DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) {
 __DEVICE__ float rnormf(int __dim, const float *__t) {
   return __nv_rnormf(__dim, __t);
 }
-__DEVICE__ double round(double __a) { return __nv_round(__a); }
-__DEVICE__ float roundf(float __a) { return __nv_roundf(__a); }
 __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }
 __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
 __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index 081b92ac21d9a..c89061ba6d02e 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp
@@ -412,9 +412,13 @@ void Preprocessor::HandlePragmaOnce(Token &OnceTok) {
   HeaderInfo.MarkFileIncludeOnce(getCurrentFileLexer()->getFileEntry());
 }
 
-void Preprocessor::HandlePragmaMark() {
+void Preprocessor::HandlePragmaMark(Token &MarkTok) {
   assert(CurPPLexer && "No current lexer?");
-  CurLexer->ReadToEndOfLine();
+
+  SmallString<64> Buffer;
+  CurLexer->ReadToEndOfLine(&Buffer);
+  if (Callbacks)
+    Callbacks->PragmaMark(MarkTok.getLocation(), Buffer);
 }
 
 /// HandlePragmaPoison - Handle \#pragma GCC poison.  PoisonTok is the 'poison'.
@@ -992,7 +996,7 @@ struct PragmaMarkHandler : public PragmaHandler {
 
   void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &MarkTok) override {
-    PP.HandlePragmaMark();
+    PP.HandlePragmaMark(MarkTok);
   }
 };
 
diff --git a/clang/unittests/Lex/PPCallbacksTest.cpp b/clang/unittests/Lex/PPCallbacksTest.cpp
index 5581f9fb82f34..f92587af8dc5e 100644
--- a/clang/unittests/Lex/PPCallbacksTest.cpp
+++ b/clang/unittests/Lex/PPCallbacksTest.cpp
@@ -112,6 +112,20 @@ class PragmaOpenCLExtensionCallbacks : public PPCallbacks {
   unsigned State;
 };
 
+class PragmaMarkCallbacks : public PPCallbacks {
+public:
+  struct Mark {
+    SourceLocation Location;
+    std::string Trivia;
+  };
+
+  std::vector<Mark> Marks;
+
+  void PragmaMark(SourceLocation Loc, StringRef Trivia) override {
+    Marks.emplace_back(Mark{Loc, Trivia.str()});
+  }
+};
+
 // PPCallbacks test fixture.
 class PPCallbacksTest : public ::testing::Test {
 protected:
@@ -256,6 +270,36 @@ class PPCallbacksTest : public ::testing::Test {
     return Callbacks->Results;
   }
 
+  std::vector<PragmaMarkCallbacks::Mark>
+  PragmaMarkCall(const char *SourceText) {
+    std::unique_ptr<llvm::MemoryBuffer> SourceBuf =
+        llvm::MemoryBuffer::getMemBuffer(SourceText, "test.c");
+    SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(SourceBuf)));
+
+    HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
+                            Diags, LangOpts, Target.get());
+    TrivialModuleLoader ModLoader;
+
+    Preprocessor PP(std::make_shared<PreprocessorOptions>(), Diags, LangOpts,
+                    SourceMgr, HeaderInfo, ModLoader, /*IILookup=*/nullptr,
+                    /*OwnsHeaderSearch=*/false);
+    PP.Initialize(*Target);
+
+    auto *Callbacks = new PragmaMarkCallbacks;
+    PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
+
+    // Lex source text.
+    PP.EnterMainSourceFile();
+    while (true) {
+      Token Tok;
+      PP.Lex(Tok);
+      if (Tok.is(tok::eof))
+        break;
+    }
+
+    return Callbacks->Marks;
+  }
+
   PragmaOpenCLExtensionCallbacks::CallbackParameters
   PragmaOpenCLExtensionCall(const char *SourceText) {
     LangOptions OpenCLLangOpts;
@@ -424,6 +468,24 @@ TEST_F(PPCallbacksTest, OpenCLExtensionPragmaDisabled) {
   ASSERT_EQ(ExpectedState, Parameters.State);
 }
 
+TEST_F(PPCallbacksTest, CollectMarks) {
+  const char *Source =
+    "#pragma mark\n"
+    "#pragma mark\r\n"
+    "#pragma mark - trivia\n"
+    "#pragma mark - trivia\r\n";
+
+  auto Marks = PragmaMarkCall(Source);
+
+  ASSERT_EQ(4u, Marks.size());
+  ASSERT_TRUE(Marks[0].Trivia.empty());
+  ASSERT_TRUE(Marks[1].Trivia.empty());
+  ASSERT_FALSE(Marks[2].Trivia.empty());
+  ASSERT_FALSE(Marks[3].Trivia.empty());
+  ASSERT_EQ(" - trivia", Marks[2].Trivia);
+  ASSERT_EQ(" - trivia", Marks[3].Trivia);
+}
+
 TEST_F(PPCallbacksTest, DirectiveExprRanges) {
   const auto &Results1 = DirectiveExprRange("#if FLUZZY_FLOOF\n#endif\n");
   EXPECT_EQ(Results1.size(), 1U);
diff --git a/compiler-rt/lib/gwp_asan/tests/backtrace.cpp b/compiler-rt/lib/gwp_asan/tests/backtrace.cpp
index 95150653ff61a..4f6364891bfcc 100644
--- a/compiler-rt/lib/gwp_asan/tests/backtrace.cpp
+++ b/compiler-rt/lib/gwp_asan/tests/backtrace.cpp
@@ -30,7 +30,7 @@ __attribute__((optnone)) void TouchMemory(void *Ptr) {
   *(reinterpret_cast<volatile char *>(Ptr)) = 7;
 }
 
-TEST_F(BacktraceGuardedPoolAllocator, DoubleFree) {
+TEST_F(BacktraceGuardedPoolAllocatorDeathTest, DoubleFree) {
   void *Ptr = AllocateMemory(GPA);
   DeallocateMemory(GPA, Ptr);
 
@@ -45,7 +45,7 @@ TEST_F(BacktraceGuardedPoolAllocator, DoubleFree) {
   ASSERT_DEATH(DeallocateMemory2(GPA, Ptr), DeathRegex);
 }
 
-TEST_F(BacktraceGuardedPoolAllocator, UseAfterFree) {
+TEST_F(BacktraceGuardedPoolAllocatorDeathTest, UseAfterFree) {
   void *Ptr = AllocateMemory(GPA);
   DeallocateMemory(GPA, Ptr);
 
diff --git a/compiler-rt/lib/gwp_asan/tests/enable_disable.cpp b/compiler-rt/lib/gwp_asan/tests/enable_disable.cpp
index 2c6ba514f49f1..98da591c40d62 100644
--- a/compiler-rt/lib/gwp_asan/tests/enable_disable.cpp
+++ b/compiler-rt/lib/gwp_asan/tests/enable_disable.cpp
@@ -10,7 +10,7 @@
 
 constexpr size_t Size = 100;
 
-TEST_F(DefaultGuardedPoolAllocator, Fork) {
+TEST_F(DefaultGuardedPoolAllocatorDeathTest, Fork) {
   void *P;
   pid_t Pid = fork();
   EXPECT_GE(Pid, 0);
diff --git a/compiler-rt/lib/gwp_asan/tests/harness.h b/compiler-rt/lib/gwp_asan/tests/harness.h
index a61b856c6a38a..ed91e642de70e 100644
--- a/compiler-rt/lib/gwp_asan/tests/harness.h
+++ b/compiler-rt/lib/gwp_asan/tests/harness.h
@@ -106,4 +106,9 @@ class BacktraceGuardedPoolAllocator : public Test {
   gwp_asan::GuardedPoolAllocator GPA;
 };
 
+// https://github.com/google/googletest/blob/master/docs/advanced.md#death-tests-and-threads
+using DefaultGuardedPoolAllocatorDeathTest = DefaultGuardedPoolAllocator;
+using CustomGuardedPoolAllocatorDeathTest = CustomGuardedPoolAllocator;
+using BacktraceGuardedPoolAllocatorDeathTest = BacktraceGuardedPoolAllocator;
+
 #endif // GWP_ASAN_TESTS_HARNESS_H_
diff --git a/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp b/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp
index 6458e23e1423a..7a29f3c11b70f 100644
--- a/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp
@@ -21,7 +21,7 @@ static void initChecksum(void) {
     scudo::HashAlgorithm = scudo::Checksum::HardwareCRC32;
 }
 
-TEST(ScudoChunkTest, ChunkBasic) {
+TEST(ScudoChunkDeathTest, ChunkBasic) {
   initChecksum();
   const scudo::uptr Size = 0x100U;
   scudo::Chunk::UnpackedHeader Header = {};
@@ -60,7 +60,7 @@ TEST(ScudoChunkTest, ChunkCmpXchg) {
   free(Block);
 }
 
-TEST(ScudoChunkTest, CorruptHeader) {
+TEST(ScudoChunkDeathTest, CorruptHeader) {
   initChecksum();
   const scudo::uptr Size = 0x100U;
   scudo::Chunk::UnpackedHeader Header = {};
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index 6716d5df1e022..a2461c53cd950 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -103,6 +103,8 @@ template <class TypeParam> struct ScudoCombinedTest : public Test {
   std::unique_ptr<AllocatorT> Allocator;
 };
 
+template <typename T> using ScudoCombinedDeathTest = ScudoCombinedTest<T>;
+
 #if SCUDO_FUCHSIA
 #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME)                              \
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidSvelteConfig)                    \
@@ -166,7 +168,7 @@ void ScudoCombinedTest<Config>::BasicTest(scudo::uptr SizeLog) {
 }
 
 #define SCUDO_MAKE_BASIC_TEST(SizeLog)                                         \
-  SCUDO_TYPED_TEST(ScudoCombinedTest, BasicCombined##SizeLog) {                \
+  SCUDO_TYPED_TEST(ScudoCombinedDeathTest, BasicCombined##SizeLog) {           \
     this->BasicTest(SizeLog);                                                  \
   }
 
@@ -314,7 +316,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateLargeDecreasing) {
   Allocator->deallocate(P, Origin);
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateSame) {
+SCUDO_TYPED_TEST(ScudoCombinedDeathTest, ReallocateSame) {
   auto *Allocator = this->Allocator.get();
 
   // Check that reallocating a chunk to a slightly smaller or larger size
@@ -365,7 +367,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, IterateOverChunks) {
   }
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, UseAfterFree) {
+SCUDO_TYPED_TEST(ScudoCombinedDeathTest, UseAfterFree) {
   auto *Allocator = this->Allocator.get();
 
   // Check that use-after-free is detected.
@@ -392,7 +394,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, UseAfterFree) {
   }
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, DisableMemoryTagging) {
+SCUDO_TYPED_TEST(ScudoCombinedDeathTest, DisableMemoryTagging) {
   auto *Allocator = this->Allocator.get();
 
   if (Allocator->useMemoryTaggingTestOnly()) {
@@ -490,7 +492,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) {
 
 // Test that multiple instantiations of the allocator have not messed up the
 // process's signal handlers (GWP-ASan used to do this).
-TEST(ScudoCombinedTest, SKIP_ON_FUCHSIA(testSEGV)) {
+TEST(ScudoCombinedDeathTest, SKIP_ON_FUCHSIA(testSEGV)) {
   const scudo::uptr Size = 4 * scudo::getPageSizeCached();
   scudo::MapPlatformData Data = {};
   void *P = scudo::map(nullptr, Size, "testSEGV", MAP_NOACCESS, &Data);
@@ -528,7 +530,7 @@ struct DeathConfig {
   template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U, 1U>;
 };
 
-TEST(ScudoCombinedTest, DeathCombined) {
+TEST(ScudoCombinedDeathTest, DeathCombined) {
   using AllocatorT = TestAllocator<DeathConfig>;
   auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT());
 
diff --git a/compiler-rt/lib/scudo/standalone/tests/map_test.cpp b/compiler-rt/lib/scudo/standalone/tests/map_test.cpp
index 149a704e4ddde..095e1b6a5d2a0 100644
--- a/compiler-rt/lib/scudo/standalone/tests/map_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/map_test.cpp
@@ -20,7 +20,7 @@ TEST(ScudoMapTest, PageSize) {
             static_cast<scudo::uptr>(getpagesize()));
 }
 
-TEST(ScudoMapTest, MapNoAccessUnmap) {
+TEST(ScudoMapDeathTest, MapNoAccessUnmap) {
   const scudo::uptr Size = 4 * scudo::getPageSizeCached();
   scudo::MapPlatformData Data = {};
   void *P = scudo::map(nullptr, Size, MappingName, MAP_NOACCESS, &Data);
@@ -29,7 +29,7 @@ TEST(ScudoMapTest, MapNoAccessUnmap) {
   scudo::unmap(P, Size, UNMAP_ALL, &Data);
 }
 
-TEST(ScudoMapTest, MapUnmap) {
+TEST(ScudoMapDeathTest, MapUnmap) {
   const scudo::uptr Size = 4 * scudo::getPageSizeCached();
   EXPECT_DEATH(
       {
@@ -46,7 +46,7 @@ TEST(ScudoMapTest, MapUnmap) {
       "");
 }
 
-TEST(ScudoMapTest, MapWithGuardUnmap) {
+TEST(ScudoMapDeathTest, MapWithGuardUnmap) {
   const scudo::uptr PageSize = scudo::getPageSizeCached();
   const scudo::uptr Size = 4 * PageSize;
   scudo::MapPlatformData Data = {};
diff --git a/compiler-rt/lib/scudo/standalone/tests/memtag_test.cpp b/compiler-rt/lib/scudo/standalone/tests/memtag_test.cpp
index 50ba0fc82cedf..72c9de36b8bd5 100644
--- a/compiler-rt/lib/scudo/standalone/tests/memtag_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/memtag_test.cpp
@@ -14,7 +14,7 @@
 #if SCUDO_LINUX
 namespace scudo {
 
-TEST(MemtagBasicTest, Unsupported) {
+TEST(MemtagBasicDeathTest, Unsupported) {
   if (archSupportsMemoryTagging())
     GTEST_SKIP();
 
@@ -63,6 +63,8 @@ class MemtagTest : public ::testing::Test {
   uptr Addr = 0;
 };
 
+using MemtagDeathTest = MemtagTest;
+
 TEST_F(MemtagTest, ArchMemoryTagGranuleSize) {
   EXPECT_GT(archMemoryTagGranuleSize(), 1u);
   EXPECT_TRUE(isPowerOfTwo(archMemoryTagGranuleSize()));
@@ -77,7 +79,7 @@ TEST_F(MemtagTest, ExtractTag) {
   EXPECT_EQ(0xffffull, Tags);
 }
 
-TEST_F(MemtagTest, AddFixedTag) {
+TEST_F(MemtagDeathTest, AddFixedTag) {
   for (uptr Tag = 0; Tag < 0x10; ++Tag)
     EXPECT_EQ(Tag, extractTag(addFixedTag(Addr, Tag)));
   if (SCUDO_DEBUG) {
@@ -94,7 +96,7 @@ TEST_F(MemtagTest, UntagPointer) {
   }
 }
 
-TEST_F(MemtagTest, ScopedDisableMemoryTagChecks) {
+TEST_F(MemtagDeathTest, ScopedDisableMemoryTagChecks) {
   u8 *P = reinterpret_cast<u8 *>(addFixedTag(Addr, 1));
   EXPECT_NE(P, Buffer);
 
@@ -120,7 +122,7 @@ TEST_F(MemtagTest, SelectRandomTagWithMask) {
   }
 }
 
-TEST_F(MemtagTest, SKIP_NO_DEBUG(LoadStoreTagUnaligned)) {
+TEST_F(MemtagDeathTest, SKIP_NO_DEBUG(LoadStoreTagUnaligned)) {
   for (uptr P = Addr; P < Addr + 4 * archMemoryTagGranuleSize(); ++P) {
     if (P % archMemoryTagGranuleSize() == 0)
       continue;
@@ -141,7 +143,7 @@ TEST_F(MemtagTest, LoadStoreTag) {
             loadTag(Base + archMemoryTagGranuleSize()));
 }
 
-TEST_F(MemtagTest, SKIP_NO_DEBUG(StoreTagsUnaligned)) {
+TEST_F(MemtagDeathTest, SKIP_NO_DEBUG(StoreTagsUnaligned)) {
   for (uptr P = Addr; P < Addr + 4 * archMemoryTagGranuleSize(); ++P) {
     uptr Tagged = addFixedTag(P, 5);
     if (Tagged % archMemoryTagGranuleSize() == 0)
diff --git a/compiler-rt/lib/scudo/standalone/tests/report_test.cpp b/compiler-rt/lib/scudo/standalone/tests/report_test.cpp
index 374b6b8de3436..81587bae6b5a8 100644
--- a/compiler-rt/lib/scudo/standalone/tests/report_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/report_test.cpp
@@ -10,14 +10,14 @@
 
 #include "report.h"
 
-TEST(ScudoReportTest, Check) {
+TEST(ScudoReportDeathTest, Check) {
   CHECK_LT(-1, 1);
   EXPECT_DEATH(CHECK_GT(-1, 1),
                "\\(-1\\) > \\(1\\) \\(\\(u64\\)op1=18446744073709551615, "
                "\\(u64\\)op2=1");
 }
 
-TEST(ScudoReportTest, Generic) {
+TEST(ScudoReportDeathTest, Generic) {
   // Potentially unused if EXPECT_DEATH isn't defined.
   UNUSED void *P = reinterpret_cast<void *>(0x42424242U);
   EXPECT_DEATH(scudo::reportError("TEST123"), "Scudo ERROR.*TEST123");
@@ -45,7 +45,7 @@ TEST(ScudoReportTest, Generic) {
                "Scudo ERROR.*42424242.*123.*456");
 }
 
-TEST(ScudoReportTest, CSpecific) {
+TEST(ScudoReportDeathTest, CSpecific) {
   EXPECT_DEATH(scudo::reportAlignmentNotPowerOfTwo(123), "Scudo ERROR.*123");
   EXPECT_DEATH(scudo::reportCallocOverflow(123, 456), "Scudo ERROR.*123.*456");
   EXPECT_DEATH(scudo::reportInvalidPosixMemalignAlignment(789),
diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp
index b82b5fb4b5158..f607ba70ab47a 100644
--- a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp
@@ -38,7 +38,7 @@ void *pvalloc(size_t size);
 
 static const size_t Size = 100U;
 
-TEST(ScudoWrappersCTest, Malloc) {
+TEST(ScudoWrappersCDeathTest, Malloc) {
   void *P = malloc(Size);
   EXPECT_NE(P, nullptr);
   EXPECT_LE(Size, malloc_usable_size(P));
@@ -154,7 +154,7 @@ TEST(ScudoWrappersCTest, AlignedAlloc) {
   EXPECT_EQ(errno, EINVAL);
 }
 
-TEST(ScudoWrappersCTest, Realloc) {
+TEST(ScudoWrappersCDeathTest, Realloc) {
   // realloc(nullptr, N) is malloc(N)
   void *P = realloc(nullptr, 0U);
   EXPECT_NE(P, nullptr);
@@ -333,7 +333,7 @@ TEST(ScudoWrappersCTest, MallocIterateBoundary) {
 
 // Fuchsia doesn't have alarm, fork or malloc_info.
 #if !SCUDO_FUCHSIA
-TEST(ScudoWrappersCTest, MallocDisableDeadlock) {
+TEST(ScudoWrappersCDeathTest, MallocDisableDeadlock) {
   // We expect heap operations within a disable/enable scope to deadlock.
   EXPECT_DEATH(
       {
@@ -368,7 +368,7 @@ TEST(ScudoWrappersCTest, MallocInfo) {
   free(P2);
 }
 
-TEST(ScudoWrappersCTest, Fork) {
+TEST(ScudoWrappersCDeathTest, Fork) {
   void *P;
   pid_t Pid = fork();
   EXPECT_GE(Pid, 0) << strerror(errno);
diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp
index eb098c7f4d21e..114196778a973 100644
--- a/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp
@@ -67,7 +67,7 @@ class Pixel {
   Color C = Color::Red;
 };
 
-TEST(ScudoWrappersCppTest, New) {
+TEST(ScudoWrappersCppDeathTest, New) {
   if (getenv("SKIP_TYPE_MISMATCH")) {
     printf("Skipped type mismatch tests.\n");
     return;
diff --git a/lld/test/MachO/bug_50812.s b/lld/test/MachO/bug_50812.s
index 676e0e003c528..134a748a7ec2a 100644
--- a/lld/test/MachO/bug_50812.s
+++ b/lld/test/MachO/bug_50812.s
@@ -3,7 +3,7 @@
 ##        Replace this with "normal" .s test format once lld supports `-r`
 
 # RUN: yaml2obj %s -o %t.o
-# RUN: %lld -lSystem -platform_version macos 11.3 11.0 -arch x86_64 %t.o
+# RUN: %lld -lSystem -platform_version macos 11.3 11.0 -arch x86_64 %t.o -o %t
 --- !mach-o
 FileHeader:
   magic:           0xFEEDFACF
diff --git a/lldb/examples/darwin/heap_find/heap.py b/lldb/examples/darwin/heap_find/heap.py
index 830e851e21056..8ee44ae25e446 100644
--- a/lldb/examples/darwin/heap_find/heap.py
+++ b/lldb/examples/darwin/heap_find/heap.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 #----------------------------------------------------------------------
 # This module is designed to live inside the "lldb" python package
diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py
index 45f7d01bc38a2..a7b2d3b52b562 100755
--- a/lldb/examples/python/crashlog.py
+++ b/lldb/examples/python/crashlog.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 #----------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h
index 74e069b22c67b..bf1417d4dc190 100644
--- a/lldb/include/lldb/Symbol/ObjectFile.h
+++ b/lldb/include/lldb/Symbol/ObjectFile.h
@@ -712,6 +712,8 @@ class ObjectFile : public std::enable_shared_from_this<ObjectFile>,
   ///     false otherwise.
   bool SetModulesArchitecture(const ArchSpec &new_arch);
 
+  ConstString GetNextSyntheticSymbolName();
+
   static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size,
                                         uint64_t Offset);
 
diff --git a/lldb/include/lldb/Symbol/Symbol.h b/lldb/include/lldb/Symbol/Symbol.h
index be3e8abefa490..3abe3114863de 100644
--- a/lldb/include/lldb/Symbol/Symbol.h
+++ b/lldb/include/lldb/Symbol/Symbol.h
@@ -113,20 +113,14 @@ class Symbol : public SymbolContextScope {
   lldb::LanguageType GetLanguage() const {
     // TODO: See if there is a way to determine the language for a symbol
     // somehow, for now just return our best guess
-    return GetMangled().GuessLanguage();
+    return m_mangled.GuessLanguage();
   }
 
   void SetID(uint32_t uid) { m_uid = uid; }
 
-  Mangled &GetMangled() {
-    SynthesizeNameIfNeeded();
-    return m_mangled;
-  }
+  Mangled &GetMangled() { return m_mangled; }
 
-  const Mangled &GetMangled() const {
-    SynthesizeNameIfNeeded();
-    return m_mangled;
-  }
+  const Mangled &GetMangled() const { return m_mangled; }
 
   ConstString GetReExportedSymbolName() const;
 
@@ -172,9 +166,9 @@ class Symbol : public SymbolContextScope {
   bool IsTrampoline() const;
 
   bool IsIndirect() const;
-
+  
   bool IsWeak() const { return m_is_weak; }
-
+  
   void SetIsWeak (bool b) { m_is_weak = b; }
 
   bool GetByteSizeIsValid() const { return m_size_is_valid; }
@@ -229,10 +223,6 @@ class Symbol : public SymbolContextScope {
 
   bool ContainsFileAddress(lldb::addr_t file_addr) const;
 
-  static llvm::StringRef GetSyntheticSymbolPrefix() {
-    return "___lldb_unnamed_symbol";
-  }
-
 protected:
   // This is the internal guts of ResolveReExportedSymbol, it assumes
   // reexport_name is not null, and that module_spec is valid.  We track the
@@ -243,8 +233,6 @@ class Symbol : public SymbolContextScope {
       lldb_private::ModuleSpec &module_spec,
       lldb_private::ModuleList &seen_modules) const;
 
-  void SynthesizeNameIfNeeded() const;
-
   uint32_t m_uid =
       UINT32_MAX;           // User ID (usually the original symbol table index)
   uint16_t m_type_data = 0; // data specific to m_type
@@ -270,7 +258,7 @@ class Symbol : public SymbolContextScope {
                                          // doing name lookups
       m_is_weak : 1,
       m_type : 6;            // Values from the lldb::SymbolType enum.
-  mutable Mangled m_mangled; // uniqued symbol name/mangled name pair
+  Mangled m_mangled;         // uniqued symbol name/mangled name pair
   AddressRange m_addr_range; // Contains the value, or the section offset
                              // address when the value is an address in a
                              // section, and the size (if any)
diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h
index e1ad0dfd2eb8d..fbfa3a5e0cec7 100644
--- a/lldb/include/lldb/Symbol/Symtab.h
+++ b/lldb/include/lldb/Symbol/Symtab.h
@@ -219,26 +219,6 @@ class Symtab {
     return false;
   }
 
-  /// A helper function that looks up full function names.
-  ///
-  /// We generate unique names for synthetic symbols so that users can look
-  /// them up by name when needed. But because doing so is uncommon in normal
-  /// debugger use, we trade off some performance at lookup time for faster
-  /// symbol table building by detecting these symbols and generating their
-  /// names lazily, rather than adding them to the normal symbol indexes. This
-  /// function does the job of first consulting the name indexes, and if that
-  /// fails it extracts the information it needs from the synthetic name and
-  /// locates the symbol.
-  ///
-  /// @param[in] symbol_name The symbol name to search for.
-  ///
-  /// @param[out] indexes The vector if symbol indexes to update with results.
-  ///
-  /// @returns The number of indexes added to the index vector. Zero if no
-  /// matches were found.
-  uint32_t GetNameIndexes(ConstString symbol_name,
-                          std::vector<uint32_t> &indexes);
-
   void SymbolIndicesToSymbolContextList(std::vector<uint32_t> &symbol_indexes,
                                         SymbolContextList &sc_list);
 
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index a5e86f0c2c1b7..be73d38961ea6 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -1880,7 +1880,7 @@ void ObjectFileELF::CreateSections(SectionList &unified_section_list) {
           unified_section_list.AddSection(symtab_section_sp);
       }
     }
-  }
+  }  
 }
 
 std::shared_ptr<ObjectFileELF> ObjectFileELF::GetGnuDebugDataObjectFile() {
@@ -2813,37 +2813,31 @@ Symtab *ObjectFileELF::GetSymtab() {
       if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress(
                                       entry_point_file_addr)) {
         uint64_t symbol_id = m_symtab_up->GetNumSymbols();
-        // Don't set the name for any synthetic symbols, the Symbol
-        // object will generate one if needed when the name is accessed
-        // via accessors.
-        SectionSP section_sp = entry_point_addr.GetSection();
-        Symbol symbol(
-            /*symID=*/symbol_id,
-            /*name=*/llvm::StringRef(), // Name will be auto generated.
-            /*type=*/eSymbolTypeCode,
-            /*external=*/true,
-            /*is_debug=*/false,
-            /*is_trampoline=*/false,
-            /*is_artificial=*/true,
-            /*section_sp=*/section_sp,
-            /*offset=*/0,
-            /*size=*/0, // FDE can span multiple symbols so don't use its size.
-            /*size_is_valid=*/false,
-            /*contains_linker_annotations=*/false,
-            /*flags=*/0);
+        Symbol symbol(symbol_id,
+                      GetNextSyntheticSymbolName().GetCString(), // Symbol name.
+                      eSymbolTypeCode, // Type of this symbol.
+                      true,            // Is this globally visible?
+                      false,           // Is this symbol debug info?
+                      false,           // Is this symbol a trampoline?
+                      true,            // Is this symbol artificial?
+                      entry_point_addr.GetSection(), // Section where this
+                                                     // symbol is defined.
+                      0,     // Offset in section or symbol value.
+                      0,     // Size.
+                      false, // Size is valid.
+                      false, // Contains linker annotations?
+                      0);    // Symbol flags.
+        m_symtab_up->AddSymbol(symbol);
         // When the entry point is arm thumb we need to explicitly set its
         // class address to reflect that. This is important because expression
         // evaluation relies on correctly setting a breakpoint at this
         // address.
         if (arch.GetMachine() == llvm::Triple::arm &&
-            (entry_point_file_addr & 1)) {
-          symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1);
+            (entry_point_file_addr & 1))
           m_address_class_map[entry_point_file_addr ^ 1] =
               AddressClass::eCodeAlternateISA;
-        } else {
+        else
           m_address_class_map[entry_point_file_addr] = AddressClass::eCode;
-        }
-        m_symtab_up->AddSymbol(symbol);
       }
     }
 
@@ -2923,24 +2917,22 @@ void ObjectFileELF::ParseUnwindSymbols(Symtab *symbol_table,
           section_list->FindSectionContainingFileAddress(file_addr);
       if (section_sp) {
         addr_t offset = file_addr - section_sp->GetFileAddress();
+        const char *symbol_name = GetNextSyntheticSymbolName().GetCString();
         uint64_t symbol_id = ++last_symbol_id;
-        // Don't set the name for any synthetic symbols, the Symbol
-        // object will generate one if needed when the name is accessed
-        // via accessors.
         Symbol eh_symbol(
-            /*symID=*/symbol_id,
-            /*name=*/llvm::StringRef(), // Name will be auto generated.
-            /*type=*/eSymbolTypeCode,
-            /*external=*/true,
-            /*is_debug=*/false,
-            /*is_trampoline=*/false,
-            /*is_artificial=*/true,
-            /*section_sp=*/section_sp,
-            /*offset=*/offset,
-            /*size=*/0, // FDE can span multiple symbols so don't use its size.
-            /*size_is_valid=*/false,
-            /*contains_linker_annotations=*/false,
-            /*flags=*/0);
+            symbol_id,       // Symbol table index.
+            symbol_name,     // Symbol name.
+            eSymbolTypeCode, // Type of this symbol.
+            true,            // Is this globally visible?
+            false,           // Is this symbol debug info?
+            false,           // Is this symbol a trampoline?
+            true,            // Is this symbol artificial?
+            section_sp,      // Section in which this symbol is defined or null.
+            offset,          // Offset in section or symbol value.
+            0,     // Size:          Don't specify the size as an FDE can
+            false, // Size is valid: cover multiple symbols.
+            false, // Contains linker annotations?
+            0);    // Symbol flags.
         new_symbols.push_back(eh_symbol);
       }
     }
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 72389e9fd5c67..e7652cffb1c81 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -4696,10 +4696,8 @@ size_t ObjectFileMachO::ParseSymtab() {
                 symbol_byte_size = section_end_file_addr - symbol_file_addr;
               }
               sym[sym_idx].SetID(synthetic_sym_id++);
-              // Don't set the name for any synthetic symbols, the Symbol
-              // object will generate one if needed when the name is accessed
-              // via accessors.
-              sym[sym_idx].GetMangled().SetDemangledName(ConstString());
+              sym[sym_idx].GetMangled().SetDemangledName(
+                  GetNextSyntheticSymbolName());
               sym[sym_idx].SetType(eSymbolTypeCode);
               sym[sym_idx].SetIsSynthetic(true);
               sym[sym_idx].GetAddressRef() = symbol_addr;
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index ec320543a7828..8a1ebb98e5ba1 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -713,7 +713,8 @@ lldb::pid_t GDBRemoteCommunicationClient::GetCurrentProcessID(bool allow_lazy) {
         PacketResult::Success) {
       if (response.GetChar() == 'Q') {
         if (response.GetChar() == 'C') {
-          m_curr_pid = response.GetHexMaxU64(false, LLDB_INVALID_PROCESS_ID);
+          m_curr_pid_run = m_curr_pid =
+              response.GetHexMaxU64(false, LLDB_INVALID_PROCESS_ID);
           if (m_curr_pid != LLDB_INVALID_PROCESS_ID) {
             m_curr_pid_is_valid = eLazyBoolYes;
             return m_curr_pid;
@@ -729,10 +730,10 @@ lldb::pid_t GDBRemoteCommunicationClient::GetCurrentProcessID(bool allow_lazy) {
       auto ids = GetCurrentProcessAndThreadIDs(sequence_mutex_unavailable);
       if (!ids.empty() && !sequence_mutex_unavailable) {
         // If server returned an explicit PID, use that.
-        m_curr_pid = ids.front().first;
+        m_curr_pid_run = m_curr_pid = ids.front().first;
         // Otherwise, use the TID of the first thread (Linux hack).
         if (m_curr_pid == LLDB_INVALID_PROCESS_ID)
-          m_curr_pid = ids.front().second;
+          m_curr_pid_run = m_curr_pid = ids.front().second;
         m_curr_pid_is_valid = eLazyBoolYes;
         return m_curr_pid;
       }
@@ -1123,7 +1124,7 @@ bool GDBRemoteCommunicationClient::GetDefaultThreadId(lldb::tid_t &tid) {
 
     // if we get pid as well, update m_curr_pid
     if (pid != 0) {
-      m_curr_pid = pid;
+      m_curr_pid_run = m_curr_pid = pid;
       m_curr_pid_is_valid = eLazyBoolYes;
     }
     tid = pid_tid->second;
@@ -2137,7 +2138,7 @@ bool GDBRemoteCommunicationClient::GetCurrentProcessInfo(bool allow_lazy) {
         m_qProcessInfo_is_valid = eLazyBoolYes;
       if (pid != LLDB_INVALID_PROCESS_ID) {
         m_curr_pid_is_valid = eLazyBoolYes;
-        m_curr_pid = pid;
+        m_curr_pid_run = m_curr_pid = pid;
       }
 
       // Set the ArchSpec from the triple if we have it.
@@ -2639,21 +2640,30 @@ bool GDBRemoteCommunicationClient::KillSpawnedProcess(lldb::pid_t pid) {
   return false;
 }
 
-llvm::Optional<uint64_t>
+llvm::Optional<PidTid>
 GDBRemoteCommunicationClient::SendSetCurrentThreadPacket(uint64_t tid,
+                                                         uint64_t pid,
                                                          char op) {
   lldb_private::StreamString packet;
   packet.PutChar('H');
   packet.PutChar(op);
+
+  if (pid != LLDB_INVALID_PROCESS_ID) {
+    packet.PutChar('p');
+    packet.PutHex64(pid);
+    packet.PutChar('.');
+  }
+
   if (tid == UINT64_MAX)
     packet.PutCString("-1");
   else
     packet.PutHex64(tid);
+
   StringExtractorGDBRemote response;
   if (SendPacketAndWaitForResponse(packet.GetString(), response, false) ==
       PacketResult::Success) {
     if (response.IsOKResponse())
-      return tid;
+      return {{pid, tid}};
 
     /*
      * Connected bare-iron target (like YAMON gdb-stub) may not have support for
@@ -2663,28 +2673,38 @@ GDBRemoteCommunicationClient::SendSetCurrentThreadPacket(uint64_t tid,
      * give us pid and/or tid. Assume pid=tid=1 in such cases.
      */
     if (response.IsUnsupportedResponse() && IsConnected())
-      return 1;
+      return {{1, 1}};
   }
   return llvm::None;
 }
 
-bool GDBRemoteCommunicationClient::SetCurrentThread(uint64_t tid) {
-  if (m_curr_tid == tid)
+bool GDBRemoteCommunicationClient::SetCurrentThread(uint64_t tid,
+                                                    uint64_t pid) {
+  if (m_curr_tid == tid &&
+      (m_curr_pid == pid || LLDB_INVALID_PROCESS_ID == pid))
     return true;
 
-  llvm::Optional<uint64_t> ret = SendSetCurrentThreadPacket(tid, 'g');
-  if (ret.hasValue())
-    m_curr_tid = ret.getValue();
+  llvm::Optional<PidTid> ret = SendSetCurrentThreadPacket(tid, pid, 'g');
+  if (ret.hasValue()) {
+    if (ret->pid != LLDB_INVALID_PROCESS_ID)
+      m_curr_pid = ret->pid;
+    m_curr_tid = ret->tid;
+  }
   return ret.hasValue();
 }
 
-bool GDBRemoteCommunicationClient::SetCurrentThreadForRun(uint64_t tid) {
-  if (m_curr_tid_run == tid)
+bool GDBRemoteCommunicationClient::SetCurrentThreadForRun(uint64_t tid,
+                                                          uint64_t pid) {
+  if (m_curr_tid_run == tid &&
+      (m_curr_pid_run == pid || LLDB_INVALID_PROCESS_ID == pid))
     return true;
 
-  llvm::Optional<uint64_t> ret = SendSetCurrentThreadPacket(tid, 'c');
-  if (ret.hasValue())
-    m_curr_tid_run = ret.getValue();
+  llvm::Optional<PidTid> ret = SendSetCurrentThreadPacket(tid, pid, 'c');
+  if (ret.hasValue()) {
+    if (ret->pid != LLDB_INVALID_PROCESS_ID)
+      m_curr_pid_run = ret->pid;
+    m_curr_tid_run = ret->tid;
+  }
   return ret.hasValue();
 }
 
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
index 03704dfdd8cf0..4fe05a8a4e54c 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
@@ -49,6 +49,12 @@ inline bool operator==(const QOffsets &a, const QOffsets &b) {
 }
 llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const QOffsets &offsets);
 
+// A trivial struct used to return a pair of PID and TID.
+struct PidTid {
+  uint64_t pid;
+  uint64_t tid;
+};
+
 class GDBRemoteCommunicationClient : public GDBRemoteClientBase {
 public:
   GDBRemoteCommunicationClient();
@@ -336,11 +342,14 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase {
   // and response times.
   bool SendSpeedTestPacket(uint32_t send_size, uint32_t recv_size);
 
-  llvm::Optional<uint64_t> SendSetCurrentThreadPacket(uint64_t tid, char op);
+  llvm::Optional<PidTid>
+  SendSetCurrentThreadPacket(uint64_t tid, uint64_t pid, char op);
 
-  bool SetCurrentThread(uint64_t tid);
+  bool SetCurrentThread(uint64_t tid,
+                        lldb::pid_t pid = LLDB_INVALID_PROCESS_ID);
 
-  bool SetCurrentThreadForRun(uint64_t tid);
+  bool SetCurrentThreadForRun(uint64_t tid,
+                              lldb::pid_t pid = LLDB_INVALID_PROCESS_ID);
 
   bool GetQXferAuxvReadSupported();
 
@@ -576,13 +585,14 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase {
       m_supports_qModuleInfo : 1, m_supports_jThreadsInfo : 1,
       m_supports_jModulesInfo : 1;
 
+  /// Current gdb remote protocol process identifier for all other operations
   lldb::pid_t m_curr_pid = LLDB_INVALID_PROCESS_ID;
-  lldb::tid_t m_curr_tid =
-      LLDB_INVALID_THREAD_ID; // Current gdb remote protocol thread index for
-                              // all other operations
-  lldb::tid_t m_curr_tid_run =
-      LLDB_INVALID_THREAD_ID; // Current gdb remote protocol thread index for
-                              // continue, step, etc
+  /// Current gdb remote protocol process identifier for continue, step, etc
+  lldb::pid_t m_curr_pid_run = LLDB_INVALID_PROCESS_ID;
+  /// Current gdb remote protocol thread identifier for all other operations
+  lldb::tid_t m_curr_tid = LLDB_INVALID_THREAD_ID;
+  /// Current gdb remote protocol thread identifier for continue, step, etc
+  lldb::tid_t m_curr_tid_run = LLDB_INVALID_THREAD_ID;
 
   uint32_t m_num_supported_hardware_watchpoints = 0;
   uint32_t m_addressing_bits = 0;
diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp
index 101af01341a20..b0fdd50b3c0f1 100644
--- a/lldb/source/Symbol/ObjectFile.cpp
+++ b/lldb/source/Symbol/ObjectFile.cpp
@@ -616,6 +616,16 @@ ObjectFile::GetSymbolTypeFromName(llvm::StringRef name,
   return symbol_type_hint;
 }
 
+ConstString ObjectFile::GetNextSyntheticSymbolName() {
+  llvm::SmallString<256> name;
+  llvm::raw_svector_ostream os(name);
+  ConstString file_name = GetModule()->GetFileSpec().GetFilename();
+  ++m_synthetic_symbol_idx;
+  os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$"
+     << file_name.GetStringRef();
+  return ConstString(os.str());
+}
+
 std::vector<ObjectFile::LoadableData>
 ObjectFile::GetLoadableData(Target &target) {
   std::vector<LoadableData> loadables;
diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp
index b24372795ad55..a25911d1734da 100644
--- a/lldb/source/Symbol/Symbol.cpp
+++ b/lldb/source/Symbol/Symbol.cpp
@@ -56,8 +56,8 @@ Symbol::Symbol(uint32_t symID, const Mangled &mangled, SymbolType type,
       m_size_is_synthesized(false),
       m_size_is_valid(size_is_valid || range.GetByteSize() > 0),
       m_demangled_is_synthesized(false),
-      m_contains_linker_annotations(contains_linker_annotations),
-      m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range),
+      m_contains_linker_annotations(contains_linker_annotations), 
+      m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), 
       m_flags(flags) {}
 
 Symbol::Symbol(const Symbol &rhs)
@@ -119,7 +119,7 @@ bool Symbol::ValueIsAddress() const {
 }
 
 ConstString Symbol::GetDisplayName() const {
-  return GetMangled().GetDisplayDemangledName();
+  return m_mangled.GetDisplayDemangledName();
 }
 
 ConstString Symbol::GetReExportedSymbolName() const {
@@ -202,7 +202,7 @@ void Symbol::GetDescription(Stream *s, lldb::DescriptionLevel level,
       s->Printf(", value = 0x%16.16" PRIx64,
                 m_addr_range.GetBaseAddress().GetOffset());
   }
-  ConstString demangled = GetMangled().GetDemangledName();
+  ConstString demangled = m_mangled.GetDemangledName();
   if (demangled)
     s->Printf(", name=\"%s\"", demangled.AsCString());
   if (m_mangled.GetMangledName())
@@ -218,7 +218,7 @@ void Symbol::Dump(Stream *s, Target *target, uint32_t index,
   // Make sure the size of the symbol is up to date before dumping
   GetByteSize();
 
-  ConstString name = GetMangled().GetName(name_preference);
+  ConstString name = m_mangled.GetName(name_preference);
   if (ValueIsAddress()) {
     if (!m_addr_range.GetBaseAddress().Dump(s, nullptr,
                                             Address::DumpStyleFileAddress))
@@ -330,11 +330,9 @@ uint32_t Symbol::GetPrologueByteSize() {
 }
 
 bool Symbol::Compare(ConstString name, SymbolType type) const {
-  if (type == eSymbolTypeAny || m_type == type) {
-    const Mangled &mangled = GetMangled();
-    return mangled.GetMangledName() == name ||
-           mangled.GetDemangledName() == name;
-  }
+  if (type == eSymbolTypeAny || m_type == type)
+    return m_mangled.GetMangledName() == name ||
+           m_mangled.GetDemangledName() == name;
   return false;
 }
 
@@ -497,10 +495,10 @@ lldb::addr_t Symbol::GetLoadAddress(Target *target) const {
     return LLDB_INVALID_ADDRESS;
 }
 
-ConstString Symbol::GetName() const { return GetMangled().GetName(); }
+ConstString Symbol::GetName() const { return m_mangled.GetName(); }
 
 ConstString Symbol::GetNameNoArguments() const {
-  return GetMangled().GetName(Mangled::ePreferDemangledWithoutArguments);
+  return m_mangled.GetName(Mangled::ePreferDemangledWithoutArguments);
 }
 
 lldb::addr_t Symbol::ResolveCallableAddress(Target &target) const {
@@ -567,21 +565,3 @@ bool Symbol::GetDisassembly(const ExecutionContext &exe_ctx, const char *flavor,
 bool Symbol::ContainsFileAddress(lldb::addr_t file_addr) const {
   return m_addr_range.ContainsFileAddress(file_addr);
 }
-
-void Symbol::SynthesizeNameIfNeeded() const {
-  if (m_is_synthetic && !m_mangled) {
-    // Synthetic symbol names don't mean anything, but they do uniquely
-    // identify individual symbols so we give them a unique name. The name
-    // starts with the synthetic symbol prefix, followed by a unique number.
-    // Typically the UserID of a real symbol is the symbol table index of the
-    // symbol in the object file's symbol table(s), so it will be the same
-    // every time you read in the object file. We want the same persistence for
-    // synthetic symbols so that users can identify them across multiple debug
-    // sessions, to understand crashes in those symbols and to reliably set
-    // breakpoints on them.
-    llvm::SmallString<256> name;
-    llvm::raw_svector_ostream os(name);
-    os << GetSyntheticSymbolPrefix() << GetID();
-    m_mangled.SetDemangledName(ConstString(os.str()));
-  }
-}
diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp
index 8b4f1d953af3a..85ece8d6d875a 100644
--- a/lldb/source/Symbol/Symtab.cpp
+++ b/lldb/source/Symbol/Symtab.cpp
@@ -301,7 +301,7 @@ void Symtab::InitNameIndexes() {
       // the trampoline symbols to be searchable by name we can remove this and
       // then possibly add a new bool to any of the Symtab functions that
       // lookup symbols by name to indicate if they want trampolines.
-      if (symbol->IsTrampoline() || symbol->IsSynthetic())
+      if (symbol->IsTrampoline())
         continue;
 
       // If the symbol's name string matched a Mangled::ManglingScheme, it is
@@ -628,36 +628,6 @@ void Symtab::SortSymbolIndexesByValue(std::vector<uint32_t> &indexes,
   }
 }
 
-uint32_t Symtab::GetNameIndexes(ConstString symbol_name,
-                                std::vector<uint32_t> &indexes) {
-  auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
-  const uint32_t count = name_to_index.GetValues(symbol_name, indexes);
-  if (count)
-    return count;
-  // Synthetic symbol names are not added to the name indexes, but they start
-  // with a prefix and end with a the symbol UserID. This allows users to find
-  // these symbols without having to add them to the name indexes. These
-  // queries will not happen very often since the names don't mean anything, so
-  // performance is not paramount in this case.
-  llvm::StringRef name = symbol_name.GetStringRef();
-  // String the synthetic prefix if the name starts with it.
-  if (!name.consume_front(Symbol::GetSyntheticSymbolPrefix()))
-    return 0; // Not a synthetic symbol name
-
-  // Extract the user ID from the symbol name
-  unsigned long long uid = 0;
-  if (getAsUnsignedInteger(name, /*Radix=*/10, uid))
-    return 0; // Failed to extract the user ID as an integer
-  Symbol *symbol = FindSymbolByID(uid);
-  if (symbol == nullptr)
-    return 0;
-  const uint32_t symbol_idx = GetIndexForSymbol(symbol);
-  if (symbol_idx == UINT32_MAX)
-    return 0;
-  indexes.push_back(symbol_idx);
-  return 1;
-}
-
 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
                                              std::vector<uint32_t> &indexes) {
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
@@ -667,7 +637,8 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
     if (!m_name_indexes_computed)
       InitNameIndexes();
 
-    return GetNameIndexes(symbol_name, indexes);
+    auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
+    return name_to_index.GetValues(symbol_name, indexes);
   }
   return 0;
 }
@@ -684,9 +655,10 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
     if (!m_name_indexes_computed)
       InitNameIndexes();
 
+    auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
     std::vector<uint32_t> all_name_indexes;
     const size_t name_match_count =
-        GetNameIndexes(symbol_name, all_name_indexes);
+        name_to_index.GetValues(symbol_name, all_name_indexes);
     for (size_t i = 0; i < name_match_count; ++i) {
       if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type,
                              symbol_visibility))
diff --git a/lldb/test/API/macosx/function-starts/TestFunctionStarts.py b/lldb/test/API/macosx/function-starts/TestFunctionStarts.py
index 0a983436462ae..5dc43a5a10935 100644
--- a/lldb/test/API/macosx/function-starts/TestFunctionStarts.py
+++ b/lldb/test/API/macosx/function-starts/TestFunctionStarts.py
@@ -81,7 +81,6 @@ def do_function_starts(self, in_memory):
         self.assertTrue(thread.num_frames > 1, "Couldn't backtrace.")
         name = thread.frame[1].GetFunctionName()
         self.assertTrue(name.startswith("___lldb_unnamed_symbol"))
-        self.assertTrue(name.endswith("$$StripMe"))
 
 
 
diff --git a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml
index 0dcc9fb76bd4f..6178a45de1b59 100644
--- a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml
+++ b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml
@@ -3,8 +3,8 @@
 
 # CHECK: Index   UserID DSX Type            File Address/Value Load Address       Size               Flags      Name
 # CHECK: [    0]      1     SourceFile      0x0000000000000000                    0x0000000000000000 0x00000004 -
-# CHECK: [    1]      2  SX Code            0x0000000000201180                    0x0000000000000010 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}
-# CHECK: [    2]      3  SX Code            0x0000000000201190                    0x0000000000000006 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}
+# CHECK: [    1]      2  SX Code            0x0000000000201180                    0x0000000000000010 0x00000000 ___lldb_unnamed_symbol1$${{.*}}
+# CHECK: [    2]      3  SX Code            0x0000000000201190                    0x0000000000000006 0x00000000 ___lldb_unnamed_symbol2$${{.*}}
 
 --- !ELF
 FileHeader:
diff --git a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test
index 788dafe248d50..1eb03fa43deb0 100644
--- a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test
+++ b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test
@@ -5,7 +5,7 @@
 # CHECK-LABEL: (lldb) image dump symtab symtab.out
 # CHECK: Symtab, file = {{.*}}symtab.out, num_symbols = 5:
 # CHECK: Index   UserID DSX Type            File Address/Value Load Address       Size               Flags      Name
-# CHECK: [    0]      0  SX Code            0x0000000000400000                    0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}
+# CHECK: [    0]      0  SX Code            0x0000000000400000                    0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}$$symtab.out
 # CHECK: [    1]      0   X Code            0x00000000004000b0                    0x000000000000000c 0x00000000 f1_func
 # CHECK: [    2]      0   X Code            0x00000000004000a0                    0x000000000000000d 0x00000000 func_only
 # CHECK: [    3]      0   X Code            0x00000000004000c0                    0x0000000000000010 0x00000000 f2
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index d87f791077e51..98a18e8d093d3 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -5102,22 +5102,21 @@ metadata nodes are related to debug info.
 DICompileUnit
 """""""""""""
 
-``DICompileUnit`` nodes represent a compile unit. ``DICompileUnit`` nodes must
-be ``distinct``. The ``enums:``, ``retainedTypes:``, ``globals:``, ``imports:``
-and ``macros:`` fields are tuples containing the debug info to be emitted along
-with the compile unit, regardless of code optimizations (some nodes are only
-emitted if there are references to them from instructions). The
-``debugInfoForProfiling:`` field is a boolean indicating whether or not
-line-table discriminators are updated to provide more-accurate debug info for
-profiling results.
+``DICompileUnit`` nodes represent a compile unit. The ``enums:``,
+``retainedTypes:``, ``globals:``, ``imports:`` and ``macros:`` fields are tuples
+containing the debug info to be emitted along with the compile unit, regardless
+of code optimizations (some nodes are only emitted if there are references to
+them from instructions). The ``debugInfoForProfiling:`` field is a boolean
+indicating whether or not line-table discriminators are updated to provide
+more-accurate debug info for profiling results.
 
 .. code-block:: text
 
-    !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang",
-                                 isOptimized: true, flags: "-O2", runtimeVersion: 2,
-                                 splitDebugFilename: "abc.debug", emissionKind: FullDebug,
-                                 enums: !2, retainedTypes: !3, globals: !4, imports: !5,
-                                 macros: !6, dwoId: 0x0abcd)
+    !0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang",
+                        isOptimized: true, flags: "-O2", runtimeVersion: 2,
+                        splitDebugFilename: "abc.debug", emissionKind: FullDebug,
+                        enums: !2, retainedTypes: !3, globals: !4, imports: !5,
+                        macros: !6, dwoId: 0x0abcd)
 
 Compile unit descriptors provide the root scope for objects declared in a
 specific compilation unit. File descriptors are defined using this scope.  These
@@ -5528,14 +5527,12 @@ DIExpression
 """"""""""""
 
 ``DIExpression`` nodes represent expressions that are inspired by the DWARF
-expression language. ``DIExpression`` nodes must not be ``distinct``, and are
-canonically printed inline at each use. They are used in :ref:`debug
-intrinsics<dbg_intrinsics>` (such as ``llvm.dbg.declare`` and
-``llvm.dbg.value``) to describe how the referenced LLVM variable relates to the
-source language variable. Debug intrinsics are interpreted left-to-right: start
-by pushing the value/address operand of the intrinsic onto a stack, then
-repeatedly push and evaluate opcodes from the DIExpression until the final
-variable description is produced.
+expression language. They are used in :ref:`debug intrinsics<dbg_intrinsics>`
+(such as ``llvm.dbg.declare`` and ``llvm.dbg.value``) to describe how the
+referenced LLVM variable relates to the source language variable. Debug
+intrinsics are interpreted left-to-right: start by pushing the value/address
+operand of the intrinsic onto a stack, then repeatedly push and evaluate
+opcodes from the DIExpression until the final variable description is produced.
 
 The current supported opcode vocabulary is limited:
 
@@ -5613,23 +5610,23 @@ The current supported opcode vocabulary is limited:
 
     IR for "*ptr = 4;"
     --------------
-    call void @llvm.dbg.value(metadata i32 4, metadata !17,
-                              metadata !DIExpression(DW_OP_LLVM_implicit_pointer)))
+    call void @llvm.dbg.value(metadata i32 4, metadata !17, metadata !20)
     !17 = !DILocalVariable(name: "ptr1", scope: !12, file: !3, line: 5,
                            type: !18)
     !18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64)
     !19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+    !20 = !DIExpression(DW_OP_LLVM_implicit_pointer))
 
     IR for "**ptr = 4;"
     --------------
-    call void @llvm.dbg.value(metadata i32 4, metadata !17,
-                              metadata !DIExpression(DW_OP_LLVM_implicit_pointer,
-                                                     DW_OP_LLVM_implicit_pointer)))
+    call void @llvm.dbg.value(metadata i32 4, metadata !17, metadata !21)
     !17 = !DILocalVariable(name: "ptr1", scope: !12, file: !3, line: 5,
                            type: !18)
     !18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64)
     !19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64)
     !20 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+    !21 = !DIExpression(DW_OP_LLVM_implicit_pointer,
+                        DW_OP_LLVM_implicit_pointer))
 
 DWARF specifies three kinds of simple location descriptions: Register, memory,
 and implicit location descriptions.  Note that a location description is
@@ -5670,13 +5667,12 @@ valid debug intrinsic.
 DIArgList
 """"""""""""
 
-``DIArgList`` nodes hold a list of constant or SSA value references.
-``DIArgList`` must not be ``distinct``, must only be used as an argument to a
-function call, and must appear inline at each use. ``DIArgList`` may refer to
-function-local values of the containing function. ``DIArgList`` nodes are used
-in :ref:`debug intrinsics<dbg_intrinsics>` (currently only in
+``DIArgList`` nodes hold a list of constant or SSA value references. These are
+used in :ref:`debug intrinsics<dbg_intrinsics>` (currently only in
 ``llvm.dbg.value``) in combination with a ``DIExpression`` that uses the
-``DW_OP_LLVM_arg`` operator.
+``DW_OP_LLVM_arg`` operator. Because a DIArgList may refer to local values
+within a function, it must only be used as a function argument, must always be
+inlined, and cannot appear in named metadata.
 
 .. code-block:: text
 
diff --git a/llvm/docs/SourceLevelDebugging.rst b/llvm/docs/SourceLevelDebugging.rst
index 69b7f35fc7b87..652eccb33f077 100644
--- a/llvm/docs/SourceLevelDebugging.rst
+++ b/llvm/docs/SourceLevelDebugging.rst
@@ -291,17 +291,17 @@ Compiled to LLVM, this function would be represented like this:
     %X = alloca i32, align 4
     %Y = alloca i32, align 4
     %Z = alloca i32, align 4
-    call void @llvm.dbg.declare(metadata i32* %X, metadata !11, metadata !DIExpression()), !dbg !13
-    store i32 21, i32* %X, align 4, !dbg !13
-    call void @llvm.dbg.declare(metadata i32* %Y, metadata !14, metadata !DIExpression()), !dbg !15
-    store i32 22, i32* %Y, align 4, !dbg !15
-    call void @llvm.dbg.declare(metadata i32* %Z, metadata !16, metadata !DIExpression()), !dbg !18
-    store i32 23, i32* %Z, align 4, !dbg !18
-    %0 = load i32, i32* %X, align 4, !dbg !19
-    store i32 %0, i32* %Z, align 4, !dbg !20
-    %1 = load i32, i32* %Y, align 4, !dbg !21
-    store i32 %1, i32* %X, align 4, !dbg !22
-    ret void, !dbg !23
+    call void @llvm.dbg.declare(metadata i32* %X, metadata !11, metadata !13), !dbg !14
+    store i32 21, i32* %X, align 4, !dbg !14
+    call void @llvm.dbg.declare(metadata i32* %Y, metadata !15, metadata !13), !dbg !16
+    store i32 22, i32* %Y, align 4, !dbg !16
+    call void @llvm.dbg.declare(metadata i32* %Z, metadata !17, metadata !13), !dbg !19
+    store i32 23, i32* %Z, align 4, !dbg !19
+    %0 = load i32, i32* %X, align 4, !dbg !20
+    store i32 %0, i32* %Z, align 4, !dbg !21
+    %1 = load i32, i32* %Y, align 4, !dbg !22
+    store i32 %1, i32* %X, align 4, !dbg !23
+    ret void, !dbg !24
   }
 
   ; Function Attrs: nounwind readnone
@@ -327,17 +327,18 @@ Compiled to LLVM, this function would be represented like this:
   !10 = !{!"clang version 3.7.0 (trunk 231150) (llvm/trunk 231154)"}
   !11 = !DILocalVariable(name: "X", scope: !4, file: !1, line: 2, type: !12)
   !12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-  !13 = !DILocation(line: 2, column: 9, scope: !4)
-  !14 = !DILocalVariable(name: "Y", scope: !4, file: !1, line: 3, type: !12)
-  !15 = !DILocation(line: 3, column: 9, scope: !4)
-  !16 = !DILocalVariable(name: "Z", scope: !17, file: !1, line: 5, type: !12)
-  !17 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5)
-  !18 = !DILocation(line: 5, column: 11, scope: !17)
-  !19 = !DILocation(line: 6, column: 11, scope: !17)
-  !20 = !DILocation(line: 6, column: 9, scope: !17)
-  !21 = !DILocation(line: 8, column: 9, scope: !4)
-  !22 = !DILocation(line: 8, column: 7, scope: !4)
-  !23 = !DILocation(line: 9, column: 3, scope: !4)
+  !13 = !DIExpression()
+  !14 = !DILocation(line: 2, column: 9, scope: !4)
+  !15 = !DILocalVariable(name: "Y", scope: !4, file: !1, line: 3, type: !12)
+  !16 = !DILocation(line: 3, column: 9, scope: !4)
+  !17 = !DILocalVariable(name: "Z", scope: !18, file: !1, line: 5, type: !12)
+  !18 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5)
+  !19 = !DILocation(line: 5, column: 11, scope: !18)
+  !20 = !DILocation(line: 6, column: 11, scope: !18)
+  !21 = !DILocation(line: 6, column: 9, scope: !18)
+  !22 = !DILocation(line: 8, column: 9, scope: !4)
+  !23 = !DILocation(line: 8, column: 7, scope: !4)
+  !24 = !DILocation(line: 9, column: 3, scope: !4)
 
 
 This example illustrates a few important details about LLVM debugging
@@ -348,21 +349,21 @@ variable definitions, and the code used to implement the function.
 
 .. code-block:: llvm
 
-  call void @llvm.dbg.declare(metadata i32* %X, metadata !11, metadata !DIExpression()), !dbg !13
+  call void @llvm.dbg.declare(metadata i32* %X, metadata !11, metadata !13), !dbg !14
     ; [debug line = 2:7] [debug variable = X]
 
 The first intrinsic ``%llvm.dbg.declare`` encodes debugging information for the
-variable ``X``.  The metadata ``!dbg !13`` attached to the intrinsic provides
+variable ``X``.  The metadata ``!dbg !14`` attached to the intrinsic provides
 scope information for the variable ``X``.
 
 .. code-block:: text
 
-  !13 = !DILocation(line: 2, column: 9, scope: !4)
+  !14 = !DILocation(line: 2, column: 9, scope: !4)
   !4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5,
                               isLocal: false, isDefinition: true, scopeLine: 1,
                               isOptimized: false, variables: !2)
 
-Here ``!13`` is metadata providing `location information
+Here ``!14`` is metadata providing `location information
 <LangRef.html#dilocation>`_.  In this example, scope is encoded by ``!4``, a
 `subprogram descriptor <LangRef.html#disubprogram>`_.  This way the location
 information attached to the intrinsics indicates that the variable ``X`` is
@@ -372,20 +373,20 @@ Now lets take another example.
 
 .. code-block:: llvm
 
-  call void @llvm.dbg.declare(metadata i32* %Z, metadata !16, metadata !DIExpression()), !dbg !18
+  call void @llvm.dbg.declare(metadata i32* %Z, metadata !17, metadata !13), !dbg !19
     ; [debug line = 5:9] [debug variable = Z]
 
 The third intrinsic ``%llvm.dbg.declare`` encodes debugging information for
-variable ``Z``.  The metadata ``!dbg !18`` attached to the intrinsic provides
+variable ``Z``.  The metadata ``!dbg !19`` attached to the intrinsic provides
 scope information for the variable ``Z``.
 
 .. code-block:: text
 
-  !17 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5)
-  !18 = !DILocation(line: 5, column: 11, scope: !17)
+  !18 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5)
+  !19 = !DILocation(line: 5, column: 11, scope: !18)
 
-Here ``!18`` indicates that ``Z`` is declared at line number 5 and column
-number 11 inside of lexical scope ``!17``.  The lexical scope itself resides
+Here ``!19`` indicates that ``Z`` is declared at line number 5 and column
+number 11 inside of lexical scope ``!18``.  The lexical scope itself resides
 inside of subprogram ``!4`` described above.
 
 The scope information attached with each instruction provides a straightforward
@@ -799,14 +800,14 @@ presents several difficulties:
     br label %exit, !dbg !26
 
   truebr:
-    call void @llvm.dbg.value(metadata i32 %input, metadata !30, metadata !DIExpression()), !dbg !23
-    call void @llvm.dbg.value(metadata i32 1, metadata !22, metadata !DIExpression()), !dbg !23
+    call void @llvm.dbg.value(metadata i32 %input, metadata !30, metadata !DIExpression()), !dbg !24
+    call void @llvm.dbg.value(metadata i32 1, metadata !23, metadata !DIExpression()), !dbg !24
     %value1 = add i32 %input, 1
     br label %bb1
 
   falsebr:
-    call void @llvm.dbg.value(metadata i32 %input, metadata !30, metadata !DIExpression()), !dbg !23
-    call void @llvm.dbg.value(metadata i32 2, metadata !22, metadata !DIExpression()), !dbg !23
+    call void @llvm.dbg.value(metadata i32 %input, metadata !30, metadata !DIExpression()), !dbg !24
+    call void @llvm.dbg.value(metadata i32 2, metadata !23, metadata !DIExpression()), !dbg !24
     %value = add i32 %input, 2
     br label %bb1
 
@@ -817,7 +818,7 @@ presents several difficulties:
 Here the difficulties are:
 
 * The control flow is roughly the opposite of basic block order
-* The value of the ``!22`` variable merges into ``%bb1``, but there is no PHI
+* The value of the ``!23`` variable merges into ``%bb1``, but there is no PHI
   node
 
 As mentioned above, the ``llvm.dbg.value`` intrinsics essentially form an
@@ -830,9 +831,9 @@ location, which would lead to a large number of debugging intrinsics being
 generated.
 
 Examining the example above, variable ``!30`` is assigned ``%input`` on both
-conditional paths through the function, while ``!22`` is assigned differing
+conditional paths through the function, while ``!23`` is assigned differing
 constant values on either path. Where control flow merges in ``%bb1`` we would
-want ``!30`` to keep its location (``%input``), but ``!22`` to become undefined
+want ``!30`` to keep its location (``%input``), but ``!23`` to become undefined
 as we cannot determine at runtime what value it should have in %bb1 without
 inserting a PHI node. mem2reg does not insert the PHI node to avoid changing
 codegen when debugging is enabled, and does not insert the other dbg.values
@@ -851,7 +852,7 @@ DbgEntityHistoryCalculator) to build a map of each instruction to every
 valid variable location, without the need to consider control flow. From
 the example above, it is otherwise difficult to determine that the location
 of variable ``!30`` should flow "up" into block ``%bb1``, but that the location
-of variable ``!22`` should not flow "down" into the ``%exit`` block.
+of variable ``!23`` should not flow "down" into the ``%exit`` block.
 
 .. _ccxx_frontend:
 
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 8b26b65528972..e12278d21b0ca 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -533,8 +533,7 @@ namespace llvm {
     template <class ParserTy> bool parseMDFieldsImplBody(ParserTy ParseField);
     template <class ParserTy>
     bool parseMDFieldsImpl(ParserTy ParseField, LocTy &ClosingLoc);
-    bool parseSpecializedMDNode(MDNode *&N, bool IsDistinct = false,
-                                LocTy DistinctLoc = LocTy());
+    bool parseSpecializedMDNode(MDNode *&N, bool IsDistinct = false);
 
 #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)                                  \
   bool parse##CLASS(MDNode *&Result, bool IsDistinct);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 2c8b6b14a67ec..0988fe793176d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -150,11 +150,11 @@ class CombinerHelper {
   void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
 
   bool matchSextTruncSextLoad(MachineInstr &MI);
-  bool applySextTruncSextLoad(MachineInstr &MI);
+  void applySextTruncSextLoad(MachineInstr &MI);
 
   /// Match sext_inreg(load p), imm -> sextload p
   bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
-  bool applySextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
+  void applySextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
 
   /// Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM
   /// when their source operands are identical.
@@ -239,87 +239,87 @@ class CombinerHelper {
   bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
 
   bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
-  bool applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
+  void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
 
   /// Fold (shift (shift base, x), y) -> (shift base (x+y))
   bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo);
-  bool applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo);
+  void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo);
 
   /// If we have a shift-by-constant of a bitwise logic op that itself has a
   /// shift-by-constant operand with identical opcode, we may be able to convert
   /// that into 2 independent shifts followed by the logic op.
   bool matchShiftOfShiftedLogic(MachineInstr &MI,
                                 ShiftOfShiftedLogic &MatchInfo);
-  bool applyShiftOfShiftedLogic(MachineInstr &MI,
+  void applyShiftOfShiftedLogic(MachineInstr &MI,
                                 ShiftOfShiftedLogic &MatchInfo);
 
   /// Transform a multiply by a power-of-2 value to a left shift.
   bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
-  bool applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
+  void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
 
   // Transform a G_SHL with an extended source into a narrower shift if
   // possible.
   bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
-  bool applyCombineShlOfExtend(MachineInstr &MI,
+  void applyCombineShlOfExtend(MachineInstr &MI,
                                const RegisterImmPair &MatchData);
 
   /// Reduce a shift by a constant to an unmerge and a shift on a half sized
   /// type. This will not produce a shift smaller than \p TargetShiftSize.
   bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize,
                                  unsigned &ShiftVal);
-  bool applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal);
+  void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal);
   bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount);
 
   /// Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
   bool
   matchCombineUnmergeMergeToPlainValues(MachineInstr &MI,
                                         SmallVectorImpl<Register> &Operands);
-  bool
+  void
   applyCombineUnmergeMergeToPlainValues(MachineInstr &MI,
                                         SmallVectorImpl<Register> &Operands);
 
   /// Transform G_UNMERGE Constant -> Constant1, Constant2, ...
   bool matchCombineUnmergeConstant(MachineInstr &MI,
                                    SmallVectorImpl<APInt> &Csts);
-  bool applyCombineUnmergeConstant(MachineInstr &MI,
+  void applyCombineUnmergeConstant(MachineInstr &MI,
                                    SmallVectorImpl<APInt> &Csts);
 
   /// Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
   bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
-  bool applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
+  void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
 
   /// Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0
   bool matchCombineUnmergeZExtToZExt(MachineInstr &MI);
-  bool applyCombineUnmergeZExtToZExt(MachineInstr &MI);
+  void applyCombineUnmergeZExtToZExt(MachineInstr &MI);
 
   /// Transform fp_instr(cst) to constant result of the fp operation.
   bool matchCombineConstantFoldFpUnary(MachineInstr &MI,
                                        Optional<APFloat> &Cst);
-  bool applyCombineConstantFoldFpUnary(MachineInstr &MI,
+  void applyCombineConstantFoldFpUnary(MachineInstr &MI,
                                        Optional<APFloat> &Cst);
 
   /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
   bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg);
-  bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg);
+  void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg);
 
   /// Transform PtrToInt(IntToPtr(x)) to x.
   bool matchCombineP2IToI2P(MachineInstr &MI, Register &Reg);
-  bool applyCombineP2IToI2P(MachineInstr &MI, Register &Reg);
+  void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg);
 
   /// Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y)
   /// Transform G_ADD y, (G_PTRTOINT x) -> G_PTRTOINT (G_PTR_ADD x, y)
   bool matchCombineAddP2IToPtrAdd(MachineInstr &MI,
                                   std::pair<Register, bool> &PtrRegAndCommute);
-  bool applyCombineAddP2IToPtrAdd(MachineInstr &MI,
+  void applyCombineAddP2IToPtrAdd(MachineInstr &MI,
                                   std::pair<Register, bool> &PtrRegAndCommute);
 
   // Transform G_PTR_ADD (G_PTRTOINT C1), C2 -> C1 + C2
   bool matchCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst);
-  bool applyCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst);
+  void applyCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst);
 
   /// Transform anyext(trunc(x)) to x.
   bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg);
-  bool applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg);
+  void applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg);
 
   /// Transform zext(trunc(x)) to x.
   bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg);
@@ -327,7 +327,7 @@ class CombinerHelper {
   /// Transform [asz]ext([asz]ext(x)) to [asz]ext x.
   bool matchCombineExtOfExt(MachineInstr &MI,
                             std::tuple<Register, unsigned> &MatchInfo);
-  bool applyCombineExtOfExt(MachineInstr &MI,
+  void applyCombineExtOfExt(MachineInstr &MI,
                             std::tuple<Register, unsigned> &MatchInfo);
 
   /// Transform fneg(fneg(x)) to x.
@@ -335,23 +335,23 @@ class CombinerHelper {
 
   /// Match fabs(fabs(x)) to fabs(x).
   bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
-  bool applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
+  void applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
 
   /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
   bool matchCombineTruncOfExt(MachineInstr &MI,
                               std::pair<Register, unsigned> &MatchInfo);
-  bool applyCombineTruncOfExt(MachineInstr &MI,
+  void applyCombineTruncOfExt(MachineInstr &MI,
                               std::pair<Register, unsigned> &MatchInfo);
 
   /// Transform trunc (shl x, K) to shl (trunc x),
   /// K => K < VT.getScalarSizeInBits().
   bool matchCombineTruncOfShl(MachineInstr &MI,
                               std::pair<Register, Register> &MatchInfo);
-  bool applyCombineTruncOfShl(MachineInstr &MI,
+  void applyCombineTruncOfShl(MachineInstr &MI,
                               std::pair<Register, Register> &MatchInfo);
 
   /// Transform G_MUL(x, -1) to G_SUB(0, x)
-  bool applyCombineMulByNegativeOne(MachineInstr &MI);
+  void applyCombineMulByNegativeOne(MachineInstr &MI);
 
   /// Return true if any explicit use operand on \p MI is defined by a
   /// G_IMPLICIT_DEF.
@@ -418,7 +418,7 @@ class CombinerHelper {
   /// Return true if MI is a G_ADD which can be simplified to a G_SUB.
   bool matchSimplifyAddToSub(MachineInstr &MI,
                              std::tuple<Register, Register> &MatchInfo);
-  bool applySimplifyAddToSub(MachineInstr &MI,
+  void applySimplifyAddToSub(MachineInstr &MI,
                              std::tuple<Register, Register> &MatchInfo);
 
   /// Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
@@ -427,13 +427,13 @@ class CombinerHelper {
                                        InstructionStepsMatchInfo &MatchInfo);
 
   /// Replace \p MI with a series of instructions described in \p MatchInfo.
-  bool applyBuildInstructionSteps(MachineInstr &MI,
+  void applyBuildInstructionSteps(MachineInstr &MI,
                                   InstructionStepsMatchInfo &MatchInfo);
 
   /// Match ashr (shl x, C), C -> sext_inreg (C)
   bool matchAshrShlToSextInreg(MachineInstr &MI,
                                std::tuple<Register, int64_t> &MatchInfo);
-  bool applyAshShlToSextInreg(MachineInstr &MI,
+  void applyAshShlToSextInreg(MachineInstr &MI,
                               std::tuple<Register, int64_t> &MatchInfo);
 
   /// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
@@ -462,27 +462,27 @@ class CombinerHelper {
 
   /// Combine inverting a result of a compare into the opposite cond code.
   bool matchNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate);
-  bool applyNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate);
+  void applyNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate);
 
   /// Fold (xor (and x, y), y) -> (and (not x), y)
   ///{
   bool matchXorOfAndWithSameReg(MachineInstr &MI,
                                 std::pair<Register, Register> &MatchInfo);
-  bool applyXorOfAndWithSameReg(MachineInstr &MI,
+  void applyXorOfAndWithSameReg(MachineInstr &MI,
                                 std::pair<Register, Register> &MatchInfo);
   ///}
 
   /// Combine G_PTR_ADD with nullptr to G_INTTOPTR
   bool matchPtrAddZero(MachineInstr &MI);
-  bool applyPtrAddZero(MachineInstr &MI);
+  void applyPtrAddZero(MachineInstr &MI);
 
   /// Combine G_UREM x, (known power of 2) to an add and bitmasking.
-  bool applySimplifyURemByPow2(MachineInstr &MI);
+  void applySimplifyURemByPow2(MachineInstr &MI);
 
   bool matchCombineInsertVecElts(MachineInstr &MI,
                                  SmallVectorImpl<Register> &MatchInfo);
 
-  bool applyCombineInsertVecElts(MachineInstr &MI,
+  void applyCombineInsertVecElts(MachineInstr &MI,
                              SmallVectorImpl<Register> &MatchInfo);
 
   /// Match expression trees of the form
@@ -498,7 +498,7 @@ class CombinerHelper {
                           std::function<void(MachineIRBuilder &)> &MatchInfo);
 
   bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
-  bool applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
+  void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
 
   bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg);
   void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg);
@@ -511,8 +511,14 @@ class CombinerHelper {
       SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
 
   /// Use a function which takes in a MachineIRBuilder to perform a combine.
-  bool applyBuildFn(MachineInstr &MI,
+  /// By default, it erases the instruction \p MI from the function.
+  void applyBuildFn(MachineInstr &MI,
                     std::function<void(MachineIRBuilder &)> &MatchInfo);
+  /// Use a function which takes in a MachineIRBuilder to perform a combine.
+  /// This variant does not erase \p MI after calling the build function.
+  void applyBuildFnNoErase(MachineInstr &MI,
+                           std::function<void(MachineIRBuilder &)> &MatchInfo);
+
   bool matchFunnelShiftToRotate(MachineInstr &MI);
   void applyFunnelShiftToRotate(MachineInstr &MI);
   bool matchRotateOutOfRange(MachineInstr &MI);
@@ -528,6 +534,11 @@ class CombinerHelper {
   bool matchBitfieldExtractFromAnd(
       MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
 
+  /// Reassociate pointer calculations with G_ADD involved, to allow better
+  /// addressing mode usage.
+  bool matchReassocPtrAdd(MachineInstr &MI,
+                          std::function<void(MachineIRBuilder &)> &MatchInfo);
+
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.
   bool tryCombine(MachineInstr &MI);
@@ -589,6 +600,11 @@ class CombinerHelper {
       SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
       const SmallVector<Register, 8> &RegsToVisit,
       const unsigned MemSizeInBits);
+
+  /// Examines the G_PTR_ADD instruction \p PtrAdd and determines if performing
+  /// a re-association of its operands would break an existing legal addressing
+  /// mode that the address computation currently represents.
+  bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index ca3decfc6a8f1..5e12da6791add 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1316,10 +1316,10 @@ class SelectionDAG {
   SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
                                 SDValue Base, SDValue Offset,
                                 ISD::MemIndexedMode AM);
-  SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
+  SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl,
                           ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
                           ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy);
-  SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
+  SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
                            ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
                            ISD::MemIndexType IndexType,
                            bool IsTruncating = false);
diff --git a/llvm/include/llvm/IR/Metadata.def b/llvm/include/llvm/IR/Metadata.def
index fdef80724de52..bbf349e6b508c 100644
--- a/llvm/include/llvm/IR/Metadata.def
+++ b/llvm/include/llvm/IR/Metadata.def
@@ -8,39 +8,12 @@
 //
 // Macros for running through all types of metadata.
 //
-// Definitions for terms used to describe metadata include:
-//
-// * BRANCH: refers to an "abstract" metadata kind, which exists only in the
-//   C++ class hierarchy. These cannot appear directly in IR/bitcode.
-// * LEAF: refers to a "concrete" metadata kind. These can appear directly in
-//   IR/bitcode.
-// * SPECIALIZED: refers to non-MDTuple MDNodes, i.e. those that use the
-//   syntax "!CLASS(...)" in IR.
-// * UNIQUABLE: refers to nodes which can use uniqued, distinct, or temporary
-//   storage without any restrictions.
-// * UNIQUED: refers to nodes which must use uniqued or temporary storage.
-// * DISTINCT: refers to nodes which must use distinct or temporary storage.
-//
-// In LLVM IR, UNIQUABLE and DISTINCT nodes must be referred to by MDNode ID,
-// as in `!0`, whereas UNIQUED nodes canonically appear inline at each use, as
-// in `DIExpression(...)`. This is because `distinct` nodes maintain their
-// identity irrespective of contents, making the inline syntax ambiguous in
-// some cases.
-//
-// Note: UNIQUABLE, UNIQUED, and DISTINCT are mutually exclusive. For example,
-// code which intends to consider all nodes which can use uniqued storage must
-// consider both UNIQUABLE and UNIQUED nodes.
-//
 //===----------------------------------------------------------------------===//
 
 #if !(defined HANDLE_METADATA || defined HANDLE_METADATA_LEAF ||               \
       defined HANDLE_METADATA_BRANCH || defined HANDLE_MDNODE_LEAF ||          \
-      defined HANDLE_MDNODE_LEAF_UNIQUABLE ||                                  \
-      defined HANDLE_MDNODE_LEAF_UNIQUED ||                                    \
-      defined HANDLE_MDNODE_LEAF_DISTINCT || defined HANDLE_MDNODE_BRANCH ||   \
+      defined HANDLE_MDNODE_LEAF_UNIQUABLE || defined HANDLE_MDNODE_BRANCH ||  \
       defined HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE ||                      \
-      defined HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED ||                        \
-      defined HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT ||                       \
       defined HANDLE_SPECIALIZED_MDNODE_LEAF ||                                \
       defined HANDLE_SPECIALIZED_MDNODE_BRANCH)
 #error "Missing macro definition of HANDLE_METADATA*"
@@ -61,7 +34,7 @@
 #define HANDLE_METADATA_BRANCH(CLASS) HANDLE_METADATA(CLASS)
 #endif
 
-// Handler for specialized and uniquable leaf nodes under MDNode. Defers to
+// Handler for specialized and uniquable leaf nodes under MDNode.  Defers to
 // HANDLE_MDNODE_LEAF_UNIQUABLE if it's defined, otherwise to
 // HANDLE_SPECIALIZED_MDNODE_LEAF.
 #ifndef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE
@@ -74,47 +47,11 @@
 #endif
 #endif
 
-// Handler for specialized and always-uniqued leaf nodes under MDNode. Defers to
-// HANDLE_MDNODE_LEAF_UNIQUED if it's defined, otherwise to
-// HANDLE_SPECIALIZED_MDNODE_LEAF.
-#ifndef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED
-#ifdef HANDLE_MDNODE_LEAF_UNIQUED
-#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED(CLASS)                        \
-  HANDLE_MDNODE_LEAF_UNIQUED(CLASS)
-#else
-#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED(CLASS)                        \
-  HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)
-#endif
-#endif
-
-// Handler for specialized and always-distinct leaf nodes under MDNode. Defers
-// to HANDLE_MDNODE_LEAF_DISTINCT if it's defined, otherwise to
-// HANDLE_SPECIALIZED_MDNODE_LEAF.
-#ifndef HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT
-#ifdef HANDLE_MDNODE_LEAF_DISTINCT
-#define HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT(CLASS)                        \
-  HANDLE_MDNODE_LEAF_DISTINCT(CLASS)
-#else
-#define HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT(CLASS)                        \
-  HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)
-#endif
-#endif
-
-// Handler for uniquable leaf nodes under MDNode.
+// Handler for leaf nodes under MDNode.
 #ifndef HANDLE_MDNODE_LEAF_UNIQUABLE
 #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) HANDLE_MDNODE_LEAF(CLASS)
 #endif
 
-// Handler for uniqued leaf nodes under MDNode.
-#ifndef HANDLE_MDNODE_LEAF_UNIQUED
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF(CLASS)
-#endif
-
-// Handler for distinct leaf nodes under MDNode.
-#ifndef HANDLE_MDNODE_LEAF_DISTINCT
-#define HANDLE_MDNODE_LEAF_DISTINCT(CLASS) HANDLE_MDNODE_LEAF(CLASS)
-#endif
-
 // Handler for leaf nodes under MDNode.
 #ifndef HANDLE_MDNODE_LEAF
 #define HANDLE_MDNODE_LEAF(CLASS) HANDLE_METADATA_LEAF(CLASS)
@@ -143,7 +80,7 @@ HANDLE_METADATA_LEAF(DistinctMDOperandPlaceholder)
 HANDLE_MDNODE_BRANCH(MDNode)
 HANDLE_MDNODE_LEAF_UNIQUABLE(MDTuple)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocation)
-HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED(DIExpression)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIExpression)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGlobalVariableExpression)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DINode)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(GenericDINode)
@@ -156,7 +93,7 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIDerivedType)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICompositeType)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubroutineType)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIFile)
-HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT(DICompileUnit)
+HANDLE_SPECIALIZED_MDNODE_LEAF(DICompileUnit)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DILocalScope)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubprogram)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DILexicalBlockBase)
@@ -177,7 +114,7 @@ HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICommonBlock)
-HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED(DIArgList)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIArgList)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIStringType)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGenericSubrange)
 
@@ -186,11 +123,7 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGenericSubrange)
 #undef HANDLE_METADATA_BRANCH
 #undef HANDLE_MDNODE_LEAF
 #undef HANDLE_MDNODE_LEAF_UNIQUABLE
-#undef HANDLE_MDNODE_LEAF_UNIQUED
-#undef HANDLE_MDNODE_LEAF_DISTINCT
 #undef HANDLE_MDNODE_BRANCH
 #undef HANDLE_SPECIALIZED_MDNODE_LEAF
 #undef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE
-#undef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED
-#undef HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT
 #undef HANDLE_SPECIALIZED_MDNODE_BRANCH
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index a01d9b26f351c..842a382e4c902 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -142,7 +142,7 @@ def sext_inreg_of_load : GICombineRule<
   (defs root:$root, sext_inreg_of_load_matchdata:$matchinfo),
   (match (wip_match_opcode G_SEXT_INREG):$root,
          [{ return Helper.matchSextInRegOfLoad(*${root}, ${matchinfo}); }]),
-  (apply [{ return Helper.applySextInRegOfLoad(*${root}, ${matchinfo}); }])>;
+  (apply [{ Helper.applySextInRegOfLoad(*${root}, ${matchinfo}); }])>;
 
 def combine_indexed_load_store : GICombineRule<
   (defs root:$root, indexed_load_store_matchdata:$matchinfo),
@@ -307,7 +307,7 @@ def urem_pow2_to_mask : GICombineRule<
   (defs root:$root),
   (match (wip_match_opcode G_UREM):$root,
     [{ return Helper.matchOperandIsKnownToBeAPowerOfTwo(*${root}, 2); }]),
-  (apply [{ return Helper.applySimplifyURemByPow2(*${root}); }])
+  (apply [{ Helper.applySimplifyURemByPow2(*${root}); }])
 >;
 
 // Transform d = [su]div(x, y) and r = [su]rem(x, y) - > d, r = [su]divrem(x, y)
@@ -340,7 +340,7 @@ def simplify_add_to_sub: GICombineRule <
   (defs root:$root, simplify_add_to_sub_matchinfo:$info),
   (match (wip_match_opcode G_ADD):$root,
     [{ return Helper.matchSimplifyAddToSub(*${root}, ${info}); }]),
-  (apply [{ return Helper.applySimplifyAddToSub(*${root}, ${info});}])
+  (apply [{ Helper.applySimplifyAddToSub(*${root}, ${info});}])
 >;
 
 // Fold fp_op(cst) to the constant result of the floating point operation.
@@ -349,7 +349,7 @@ def constant_fp_op: GICombineRule <
   (defs root:$root, constant_fp_op_matchinfo:$info),
   (match (wip_match_opcode G_FNEG, G_FABS, G_FPTRUNC, G_FSQRT, G_FLOG2):$root,
     [{ return Helper.matchCombineConstantFoldFpUnary(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyCombineConstantFoldFpUnary(*${root}, ${info}); }])
+  (apply [{ Helper.applyCombineConstantFoldFpUnary(*${root}, ${info}); }])
 >;
 
 // Fold int2ptr(ptr2int(x)) -> x
@@ -357,7 +357,7 @@ def p2i_to_i2p: GICombineRule<
   (defs root:$root, register_matchinfo:$info),
   (match (wip_match_opcode G_INTTOPTR):$root,
     [{ return Helper.matchCombineI2PToP2I(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyCombineI2PToP2I(*${root}, ${info}); }])
+  (apply [{ Helper.applyCombineI2PToP2I(*${root}, ${info}); }])
 >;
 
 // Fold ptr2int(int2ptr(x)) -> x
@@ -365,7 +365,7 @@ def i2p_to_p2i: GICombineRule<
   (defs root:$root, register_matchinfo:$info),
   (match (wip_match_opcode G_PTRTOINT):$root,
     [{ return Helper.matchCombineP2IToI2P(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyCombineP2IToI2P(*${root}, ${info}); }])
+  (apply [{ Helper.applyCombineP2IToI2P(*${root}, ${info}); }])
 >;
 
 // Fold add ptrtoint(x), y -> ptrtoint (ptr_add x), y
@@ -374,7 +374,7 @@ def add_p2i_to_ptradd : GICombineRule<
   (defs root:$root, add_p2i_to_ptradd_matchinfo:$info),
   (match (wip_match_opcode G_ADD):$root,
     [{ return Helper.matchCombineAddP2IToPtrAdd(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyCombineAddP2IToPtrAdd(*${root}, ${info}); }])
+  (apply [{ Helper.applyCombineAddP2IToPtrAdd(*${root}, ${info}); }])
 >;
 
 // Fold (ptr_add (int2ptr C1), C2) -> C1 + C2
@@ -383,7 +383,7 @@ def const_ptradd_to_i2p: GICombineRule<
   (defs root:$root, const_ptradd_to_i2p_matchinfo:$info),
   (match (wip_match_opcode G_PTR_ADD):$root,
     [{ return Helper.matchCombineConstPtrAddToI2P(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyCombineConstPtrAddToI2P(*${root}, ${info}); }])
+  (apply [{ Helper.applyCombineConstPtrAddToI2P(*${root}, ${info}); }])
 >;
 
 // Simplify: (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
@@ -391,7 +391,7 @@ def hoist_logic_op_with_same_opcode_hands: GICombineRule <
   (defs root:$root, instruction_steps_matchdata:$info),
   (match (wip_match_opcode G_AND, G_OR, G_XOR):$root,
     [{ return Helper.matchHoistLogicOpWithSameOpcodeHands(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyBuildInstructionSteps(*${root}, ${info});}])
+  (apply [{ Helper.applyBuildInstructionSteps(*${root}, ${info});}])
 >;
 
 // Fold ashr (shl x, C), C -> sext_inreg (C)
@@ -400,7 +400,7 @@ def shl_ashr_to_sext_inreg : GICombineRule<
   (defs root:$root, shl_ashr_to_sext_inreg_matchinfo:$info),
   (match (wip_match_opcode G_ASHR): $root,
     [{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
+  (apply [{ Helper.applyAshShlToSextInreg(*${root}, ${info});}])
 >;
 
 // Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
@@ -408,7 +408,7 @@ def overlapping_and: GICombineRule <
   (defs root:$root, build_fn_matchinfo:$info),
   (match (wip_match_opcode G_AND):$root,
          [{ return Helper.matchOverlappingAnd(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])
+  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])
 >;
 
 // Fold (x & y) -> x or (x & y) -> y when (x & y) is known to equal x or equal y.
@@ -462,7 +462,7 @@ def ext_ext_fold: GICombineRule <
   (defs root:$root, ext_ext_fold_matchinfo:$matchinfo),
   (match (wip_match_opcode G_ANYEXT, G_SEXT, G_ZEXT):$root,
          [{ return Helper.matchCombineExtOfExt(*${root}, ${matchinfo}); }]),
-  (apply [{ return Helper.applyCombineExtOfExt(*${root}, ${matchinfo}); }])
+  (apply [{ Helper.applyCombineExtOfExt(*${root}, ${matchinfo}); }])
 >;
 
 def not_cmp_fold_matchinfo : GIDefMatchData<"SmallVector<Register, 4>">;
@@ -470,7 +470,7 @@ def not_cmp_fold : GICombineRule<
   (defs root:$d, not_cmp_fold_matchinfo:$info),
   (match (wip_match_opcode G_XOR): $d,
   [{ return Helper.matchNotCmp(*${d}, ${info}); }]),
-  (apply [{ return Helper.applyNotCmp(*${d}, ${info}); }])
+  (apply [{ Helper.applyNotCmp(*${d}, ${info}); }])
 >;
 
 // Fold (fneg (fneg x)) -> x.
@@ -487,7 +487,7 @@ def unmerge_merge : GICombineRule<
   (defs root:$d, unmerge_merge_matchinfo:$info),
   (match (wip_match_opcode G_UNMERGE_VALUES): $d,
   [{ return Helper.matchCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]),
-  (apply [{ return Helper.applyCombineUnmergeMergeToPlainValues(*${d}, ${info}); }])
+  (apply [{ Helper.applyCombineUnmergeMergeToPlainValues(*${d}, ${info}); }])
 >;
 
 // Fold (fabs (fabs x)) -> (fabs x).
@@ -504,7 +504,7 @@ def unmerge_cst : GICombineRule<
   (defs root:$d, unmerge_cst_matchinfo:$info),
   (match (wip_match_opcode G_UNMERGE_VALUES): $d,
   [{ return Helper.matchCombineUnmergeConstant(*${d}, ${info}); }]),
-  (apply [{ return Helper.applyCombineUnmergeConstant(*${d}, ${info}); }])
+  (apply [{ Helper.applyCombineUnmergeConstant(*${d}, ${info}); }])
 >;
 
 // Transform x,y<dead> = unmerge z -> x = trunc z.
@@ -512,7 +512,7 @@ def unmerge_dead_to_trunc : GICombineRule<
   (defs root:$d),
   (match (wip_match_opcode G_UNMERGE_VALUES): $d,
   [{ return Helper.matchCombineUnmergeWithDeadLanesToTrunc(*${d}); }]),
-  (apply [{ return Helper.applyCombineUnmergeWithDeadLanesToTrunc(*${d}); }])
+  (apply [{ Helper.applyCombineUnmergeWithDeadLanesToTrunc(*${d}); }])
 >;
 
 // Transform x,y = unmerge(zext(z)) -> x = zext z; y = 0.
@@ -520,7 +520,7 @@ def unmerge_zext_to_zext : GICombineRule<
   (defs root:$d),
   (match (wip_match_opcode G_UNMERGE_VALUES): $d,
   [{ return Helper.matchCombineUnmergeZExtToZExt(*${d}); }]),
-  (apply [{ return Helper.applyCombineUnmergeZExtToZExt(*${d}); }])
+  (apply [{ Helper.applyCombineUnmergeZExtToZExt(*${d}); }])
 >;
 
 // Fold trunc ([asz]ext x) -> x or ([asz]ext x) or (trunc x).
@@ -529,7 +529,7 @@ def trunc_ext_fold: GICombineRule <
   (defs root:$root, trunc_ext_fold_matchinfo:$matchinfo),
   (match (wip_match_opcode G_TRUNC):$root,
          [{ return Helper.matchCombineTruncOfExt(*${root}, ${matchinfo}); }]),
-  (apply [{ return Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }])
+  (apply [{ Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }])
 >;
 
 // Fold trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits().
@@ -538,7 +538,7 @@ def trunc_shl: GICombineRule <
   (defs root:$root, trunc_shl_matchinfo:$matchinfo),
   (match (wip_match_opcode G_TRUNC):$root,
          [{ return Helper.matchCombineTruncOfShl(*${root}, ${matchinfo}); }]),
-  (apply [{ return Helper.applyCombineTruncOfShl(*${root}, ${matchinfo}); }])
+  (apply [{ Helper.applyCombineTruncOfShl(*${root}, ${matchinfo}); }])
 >;
 
 // Transform (mul x, -1) -> (sub 0, x)
@@ -546,7 +546,7 @@ def mul_by_neg_one: GICombineRule <
   (defs root:$root),
   (match (wip_match_opcode G_MUL):$root,
          [{ return Helper.matchConstantOp(${root}->getOperand(2), -1); }]),
-  (apply [{ return Helper.applyCombineMulByNegativeOne(*${root}); }])
+  (apply [{ Helper.applyCombineMulByNegativeOne(*${root}); }])
 >;
 
 // Fold (xor (and x, y), y) -> (and (not x), y)
@@ -556,7 +556,7 @@ def xor_of_and_with_same_reg: GICombineRule <
   (defs root:$root, xor_of_and_with_same_reg_matchinfo:$matchinfo),
   (match (wip_match_opcode G_XOR):$root,
          [{ return Helper.matchXorOfAndWithSameReg(*${root}, ${matchinfo}); }]),
-  (apply [{ return Helper.applyXorOfAndWithSameReg(*${root}, ${matchinfo}); }])
+  (apply [{ Helper.applyXorOfAndWithSameReg(*${root}, ${matchinfo}); }])
 >;
 
 // Transform (ptr_add 0, x) -> (int_to_ptr x)
@@ -564,27 +564,27 @@ def ptr_add_with_zero: GICombineRule<
   (defs root:$root),
   (match (wip_match_opcode G_PTR_ADD):$root,
          [{ return Helper.matchPtrAddZero(*${root}); }]),
-  (apply [{ return Helper.applyPtrAddZero(*${root}); }])>;
+  (apply [{ Helper.applyPtrAddZero(*${root}); }])>;
 
 def regs_small_vec : GIDefMatchData<"SmallVector<Register, 4>">;
 def combine_insert_vec_elts_build_vector : GICombineRule<
   (defs root:$root, regs_small_vec:$info),
   (match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
     [{ return Helper.matchCombineInsertVecElts(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyCombineInsertVecElts(*${root}, ${info}); }])>;
+  (apply [{ Helper.applyCombineInsertVecElts(*${root}, ${info}); }])>;
 
 def load_or_combine : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
   (match (wip_match_opcode G_OR):$root,
     [{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>;
+  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
 def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">;
 def extend_through_phis : GICombineRule<
   (defs root:$root, extend_through_phis_matchdata:$matchinfo),
   (match (wip_match_opcode G_PHI):$root,
     [{ return Helper.matchExtendThroughPhis(*${root}, ${matchinfo}); }]),
-  (apply [{ return Helper.applyExtendThroughPhis(*${root}, ${matchinfo}); }])>;
+  (apply [{ Helper.applyExtendThroughPhis(*${root}, ${matchinfo}); }])>;
 
 // Currently only the one combine above.
 def insert_vec_elt_combines : GICombineGroup<
@@ -633,7 +633,7 @@ def bitfield_extract_from_and : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
   (match (wip_match_opcode G_AND):$root,
     [{ return Helper.matchBitfieldExtractFromAnd(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>;
+  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
 def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>;
 
@@ -641,10 +641,17 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
   (match (wip_match_opcode G_SEXT_INREG):$root,
     [{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]),
-  (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>;
+  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
 def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
                                             bitfield_extract_from_and]>;
+def reassoc_ptradd : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_PTR_ADD):$root,
+    [{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
+def reassocs : GICombineGroup<[reassoc_ptradd]>;
 
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@@ -678,9 +685,10 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
                                        mul_by_neg_one]>;
 
 def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
-    extract_vec_elt_combines, ptr_add_immed_chain, combines_for_extload,
+    extract_vec_elt_combines, combines_for_extload,
     combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
     simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands,
+    reassocs, ptr_add_immed_chain,
     shl_ashr_to_sext_inreg, sext_inreg_of_load,
     width_reduction_combines, select_combines,
     known_bits_simplifications, ext_ext_fold,
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 678bf822fe08e..ecf264aa337ff 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -740,29 +740,27 @@ bool LLParser::parseNamedMetadata() {
     return true;
 
   NamedMDNode *NMD = M->getOrInsertNamedMetadata(Name);
-
-  if (Lex.getKind() == lltok::rbrace) {
-    Lex.Lex();
-    return false;
-  }
-
-  do {
-    MDNode *N = nullptr;
-    // Parse uniqued MDNodes inline as a special case.
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS)                                      \
-  if (Lex.getKind() == lltok::MetadataVar && Lex.getStrVal() == #CLASS) {      \
-    if (parse##CLASS(N, /*IsDistinct=*/false))                                 \
-      return true;                                                             \
-    NMD->addOperand(N);                                                        \
-    continue;                                                                  \
-  }
-#include "llvm/IR/Metadata.def"
-    // Parse all other MDNodes as an MDNodeID.
-    if (parseToken(lltok::exclaim, "Expected '!' here") || parseMDNodeID(N)) {
-      return true;
-    }
-    NMD->addOperand(N);
-  } while (EatIfPresent(lltok::comma));
+  if (Lex.getKind() != lltok::rbrace)
+    do {
+      MDNode *N = nullptr;
+      // parse DIExpressions inline as a special case. They are still MDNodes,
+      // so they can still appear in named metadata. Remove this logic if they
+      // become plain Metadata.
+      if (Lex.getKind() == lltok::MetadataVar &&
+          Lex.getStrVal() == "DIExpression") {
+        if (parseDIExpression(N, /*IsDistinct=*/false))
+          return true;
+        // DIArgLists should only appear inline in a function, as they may
+        // contain LocalAsMetadata arguments which require a function context.
+      } else if (Lex.getKind() == lltok::MetadataVar &&
+                 Lex.getStrVal() == "DIArgList") {
+        return tokError("found DIArgList outside of function");
+      } else if (parseToken(lltok::exclaim, "Expected '!' here") ||
+                 parseMDNodeID(N)) {
+        return true;
+      }
+      NMD->addOperand(N);
+    } while (EatIfPresent(lltok::comma));
 
   return parseToken(lltok::rbrace, "expected end of metadata node");
 }
@@ -782,10 +780,9 @@ bool LLParser::parseStandaloneMetadata() {
   if (Lex.getKind() == lltok::Type)
     return tokError("unexpected type in metadata definition");
 
-  auto DistinctLoc = Lex.getLoc();
   bool IsDistinct = EatIfPresent(lltok::kw_distinct);
   if (Lex.getKind() == lltok::MetadataVar) {
-    if (parseSpecializedMDNode(Init, IsDistinct, DistinctLoc))
+    if (parseSpecializedMDNode(Init, IsDistinct))
       return true;
   } else if (parseToken(lltok::exclaim, "Expected '!' here") ||
              parseMDTuple(Init, IsDistinct))
@@ -4681,25 +4678,12 @@ bool LLParser::parseMDField(StringRef Name, FieldTy &Result) {
   return parseMDField(Loc, Name, Result);
 }
 
-bool LLParser::parseSpecializedMDNode(MDNode *&N, bool IsDistinct,
-                                      LocTy DistinctLoc) {
+bool LLParser::parseSpecializedMDNode(MDNode *&N, bool IsDistinct) {
   assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata type name");
 
-#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(CLASS)                        \
+#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)                                  \
   if (Lex.getStrVal() == #CLASS)                                               \
     return parse##CLASS(N, IsDistinct);
-#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED(CLASS)                          \
-  if (Lex.getStrVal() == #CLASS) {                                             \
-    if (IsDistinct)                                                            \
-      return error(DistinctLoc, "'distinct' not allowed for !" #CLASS);        \
-    return parse##CLASS(N, IsDistinct);                                        \
-  }
-#define HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT(CLASS)                         \
-  if (Lex.getStrVal() == #CLASS) {                                             \
-    if (!IsDistinct)                                                           \
-      return error(DistinctLoc, "missing 'distinct', required for !" #CLASS);  \
-    return parse##CLASS(N, IsDistinct);                                        \
-  }
 #include "llvm/IR/Metadata.def"
 
   return tokError("expected metadata type");
@@ -5043,6 +5027,9 @@ bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) {
 ///                      globals: !4, imports: !5, macros: !6, dwoId: 0x0abcd,
 ///                      sysroot: "/", sdk: "MacOSX.sdk")
 bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
+  if (!IsDistinct)
+    return Lex.Error("missing 'distinct', required for !DICompileUnit");
+
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(language, DwarfLangField, );                                        \
   REQUIRED(file, MDField, (/* AllowNull */ false));                            \
@@ -5406,7 +5393,7 @@ bool LLParser::parseDIExpression(MDNode *&Result, bool IsDistinct) {
 }
 
 bool LLParser::parseDIArgList(MDNode *&Result, bool IsDistinct) {
-  return tokError("!DIArgList cannot appear outside of a function");
+  return parseDIArgList(Result, IsDistinct, nullptr);
 }
 /// ParseDIArgList:
 ///   ::= !DIArgList(i32 7, i64 %0)
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index a2ad4a4207971..8493eb7a28b23 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -555,7 +555,7 @@ class MetadataLoader::MetadataLoaderImpl {
   }
 
   /// Upgrade the expression from previous versions.
-  Error upgradeDIExpression(uint64_t FromVersion, bool &IsDistinct,
+  Error upgradeDIExpression(uint64_t FromVersion,
                             MutableArrayRef<uint64_t> &Expr,
                             SmallVectorImpl<uint64_t> &Buffer) {
     auto N = Expr.size();
@@ -629,9 +629,6 @@ class MetadataLoader::MetadataLoaderImpl {
       LLVM_FALLTHROUGH;
     }
     case 3:
-      IsDistinct = false;
-      LLVM_FALLTHROUGH;
-    case 4:
       // Up-to-date!
       break;
     }
@@ -1984,12 +1981,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     auto Elts = MutableArrayRef<uint64_t>(Record).slice(1);
 
     SmallVector<uint64_t, 6> Buffer;
-    if (Error Err = upgradeDIExpression(Version, IsDistinct, Elts, Buffer))
+    if (Error Err = upgradeDIExpression(Version, Elts, Buffer))
       return Err;
 
-    if (IsDistinct)
-      return error("Invalid record");
-
     MetadataList.assignValue(
         GET_OR_DISTINCT(DIExpression, (Context, Elts)), NextMetadataNo);
     NextMetadataNo++;
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 1ad55e264acad..bdb973e8e421b 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1758,6 +1758,7 @@ void ModuleBitcodeWriter::writeDIFile(const DIFile *N,
 void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
                                              SmallVectorImpl<uint64_t> &Record,
                                              unsigned Abbrev) {
+  assert(N->isDistinct() && "Expected distinct compile units");
   Record.push_back(/* IsDistinct */ true);
   Record.push_back(N->getSourceLanguage());
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
@@ -2008,7 +2009,7 @@ void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N,
                                             SmallVectorImpl<uint64_t> &Record,
                                             unsigned Abbrev) {
   Record.reserve(N->getElements().size() + 1);
-  const uint64_t Version = 4 << 1;
+  const uint64_t Version = 3 << 1;
   Record.push_back((uint64_t)N->isDistinct() | Version);
   Record.append(N->elements_begin(), N->elements_end());
 
@@ -2153,20 +2154,6 @@ void ModuleBitcodeWriter::writeMetadataRecords(
     if (const MDNode *N = dyn_cast<MDNode>(MD)) {
       assert(N->isResolved() && "Expected forward references to be resolved");
 
-#ifndef NDEBUG
-      switch (N->getMetadataID()) {
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS)                                      \
-  case Metadata::CLASS##Kind:                                                  \
-    assert(!N->isDistinct() && "Expected non-distinct " #CLASS);               \
-    break;
-#define HANDLE_MDNODE_LEAF_DISTINCT(CLASS)                                     \
-  case Metadata::CLASS##Kind:                                                  \
-    assert(N->isDistinct() && "Expected distinct " #CLASS);                    \
-    break;
-#include "llvm/IR/Metadata.def"
-      }
-#endif
-
       switch (N->getMetadataID()) {
       default:
         llvm_unreachable("Invalid MDNode subclass");
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index bb02c101fd048..ad1b461db648e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -679,12 +679,11 @@ bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
   return false;
 }
 
-bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
+void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
   Builder.setInstrAndDebugLoc(MI);
   Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchSextInRegOfLoad(
@@ -722,7 +721,7 @@ bool CombinerHelper::matchSextInRegOfLoad(
   return true;
 }
 
-bool CombinerHelper::applySextInRegOfLoad(
+void CombinerHelper::applySextInRegOfLoad(
     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
   Register LoadReg;
@@ -745,7 +744,6 @@ bool CombinerHelper::applySextInRegOfLoad(
   Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
                          LoadDef->getOperand(1).getReg(), *NewMMO);
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
@@ -1691,7 +1689,7 @@ bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
   return Cst.hasValue();
 }
 
-bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
                                                      Optional<APFloat> &Cst) {
   assert(Cst.hasValue() && "Optional is unexpectedly empty!");
   Builder.setInstrAndDebugLoc(MI);
@@ -1700,7 +1698,6 @@ bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
   Register DstReg = MI.getOperand(0).getReg();
   Builder.buildFConstant(DstReg, *FPVal);
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
@@ -1720,6 +1717,13 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
   if (!MaybeImmVal)
     return false;
 
+  // Don't do this combine if there multiple uses of the first PTR_ADD,
+  // since we may be able to compute the second PTR_ADD as an immediate
+  // offset anyway. Folding the first offset into the second may cause us
+  // to go beyond the bounds of our legal addressing modes.
+  if (!MRI.hasOneNonDBGUse(Add2))
+    return false;
+
   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
     return false;
@@ -1736,7 +1740,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
   return true;
 }
 
-bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
+void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
                                            PtrAddChain &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
   MachineIRBuilder MIB(MI);
@@ -1746,7 +1750,6 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
   MI.getOperand(1).setReg(MatchInfo.Base);
   MI.getOperand(2).setReg(NewOffset.getReg(0));
   Observer.changedInstr(MI);
-  return true;
 }
 
 bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
@@ -1794,7 +1797,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
   return true;
 }
 
-bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
+void CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
                                           RegisterImmPair &MatchInfo) {
   unsigned Opcode = MI.getOpcode();
   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
@@ -1812,7 +1815,7 @@ bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
     if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
       Builder.buildConstant(MI.getOperand(0), 0);
       MI.eraseFromParent();
-      return true;
+      return;
     }
     // Arithmetic shift and saturating signed left shift have no effect beyond
     // scalar size.
@@ -1825,7 +1828,6 @@ bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
   MI.getOperand(1).setReg(MatchInfo.Reg);
   MI.getOperand(2).setReg(NewImm);
   Observer.changedInstr(MI);
-  return true;
 }
 
 bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
@@ -1909,7 +1911,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
   return true;
 }
 
-bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
+void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
                                               ShiftOfShiftedLogic &MatchInfo) {
   unsigned Opcode = MI.getOpcode();
   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
@@ -1941,7 +1943,6 @@ bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
   MatchInfo.Logic->eraseFromParent();
 
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
@@ -1956,7 +1957,7 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
   return (static_cast<int32_t>(ShiftVal) != -1);
 }
 
-bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
+void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
                                           unsigned &ShiftVal) {
   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
   MachineIRBuilder MIB(MI);
@@ -1966,7 +1967,6 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
   MI.getOperand(2).setReg(ShiftCst.getReg(0));
   Observer.changedInstr(MI);
-  return true;
 }
 
 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
@@ -2007,7 +2007,7 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
   return MinLeadingZeros >= ShiftAmt;
 }
 
-bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
+void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
                                              const RegisterImmPair &MatchData) {
   Register ExtSrcReg = MatchData.Reg;
   int64_t ShiftAmtVal = MatchData.Imm;
@@ -2019,7 +2019,6 @@ bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
       Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
   Builder.buildZExt(MI.getOperand(0), NarrowShift);
   MI.eraseFromParent();
-  return true;
 }
 
 static Register peekThroughBitcast(Register Reg,
@@ -2057,7 +2056,7 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
   return true;
 }
 
-bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
+void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
     MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
          "Expected an unmerge");
@@ -2078,7 +2077,6 @@ bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
       Builder.buildCast(DstReg, SrcReg);
   }
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
@@ -2106,7 +2104,7 @@ bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
   return true;
 }
 
-bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
+void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
                                                  SmallVectorImpl<APInt> &Csts) {
   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
          "Expected an unmerge");
@@ -2120,7 +2118,6 @@ bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
   }
 
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
@@ -2134,7 +2131,7 @@ bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
   return true;
 }
 
-bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
   Builder.setInstrAndDebugLoc(MI);
   Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
   // Truncating a vector is going to truncate every single lane,
@@ -2153,7 +2150,6 @@ bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
   } else
     Builder.buildTrunc(Dst0Reg, SrcReg);
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
@@ -2182,7 +2178,7 @@ bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
   return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
 }
 
-bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
+void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
          "Expected an unmerge");
 
@@ -2214,7 +2210,6 @@ bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
     replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
   }
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
@@ -2242,7 +2237,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
   return ShiftVal >= Size / 2 && ShiftVal < Size;
 }
 
-bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
+void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
                                                 const unsigned &ShiftVal) {
   Register DstReg = MI.getOperand(0).getReg();
   Register SrcReg = MI.getOperand(1).getReg();
@@ -2313,7 +2308,6 @@ bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
   }
 
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
@@ -2336,13 +2330,12 @@ bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
                   m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
 }
 
-bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
   Register DstReg = MI.getOperand(0).getReg();
   Builder.setInstr(MI);
   Builder.buildCopy(DstReg, Reg);
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
@@ -2351,13 +2344,12 @@ bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
   return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
 }
 
-bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
   Register DstReg = MI.getOperand(0).getReg();
   Builder.setInstr(MI);
   Builder.buildZExtOrTrunc(DstReg, Reg);
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineAddP2IToPtrAdd(
@@ -2385,7 +2377,7 @@ bool CombinerHelper::matchCombineAddP2IToPtrAdd(
   return false;
 }
 
-bool CombinerHelper::applyCombineAddP2IToPtrAdd(
+void CombinerHelper::applyCombineAddP2IToPtrAdd(
     MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
   Register Dst = MI.getOperand(0).getReg();
   Register LHS = MI.getOperand(1).getReg();
@@ -2402,7 +2394,6 @@ bool CombinerHelper::applyCombineAddP2IToPtrAdd(
   auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
   Builder.buildPtrToInt(Dst, PtrAdd);
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
@@ -2423,7 +2414,7 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
   return false;
 }
 
-bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
+void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
                                                   int64_t &NewCst) {
   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
   Register Dst = MI.getOperand(0).getReg();
@@ -2431,7 +2422,6 @@ bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
   Builder.setInstrAndDebugLoc(MI);
   Builder.buildConstant(Dst, NewCst);
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
@@ -2478,7 +2468,7 @@ bool CombinerHelper::matchCombineExtOfExt(
   return false;
 }
 
-bool CombinerHelper::applyCombineExtOfExt(
+void CombinerHelper::applyCombineExtOfExt(
     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
   assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
           MI.getOpcode() == TargetOpcode::G_SEXT ||
@@ -2493,7 +2483,7 @@ bool CombinerHelper::applyCombineExtOfExt(
     Observer.changingInstr(MI);
     MI.getOperand(1).setReg(Reg);
     Observer.changedInstr(MI);
-    return true;
+    return;
   }
 
   // Combine:
@@ -2506,13 +2496,10 @@ bool CombinerHelper::applyCombineExtOfExt(
     Builder.setInstrAndDebugLoc(MI);
     Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
     MI.eraseFromParent();
-    return true;
   }
-
-  return false;
 }
 
-bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
+void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
   Register DstReg = MI.getOperand(0).getReg();
   Register SrcReg = MI.getOperand(1).getReg();
@@ -2522,7 +2509,6 @@ bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
   Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
                    MI.getFlags());
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
@@ -2552,7 +2538,7 @@ bool CombinerHelper::matchCombineTruncOfExt(
   return false;
 }
 
-bool CombinerHelper::applyCombineTruncOfExt(
+void CombinerHelper::applyCombineTruncOfExt(
     MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
   Register SrcReg = MatchInfo.first;
@@ -2563,7 +2549,7 @@ bool CombinerHelper::applyCombineTruncOfExt(
   if (SrcTy == DstTy) {
     MI.eraseFromParent();
     replaceRegWith(MRI, DstReg, SrcReg);
-    return true;
+    return;
   }
   Builder.setInstrAndDebugLoc(MI);
   if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
@@ -2571,7 +2557,6 @@ bool CombinerHelper::applyCombineTruncOfExt(
   else
     Builder.buildTrunc(DstReg, SrcReg);
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchCombineTruncOfShl(
@@ -2598,7 +2583,7 @@ bool CombinerHelper::matchCombineTruncOfShl(
   return false;
 }
 
-bool CombinerHelper::applyCombineTruncOfShl(
+void CombinerHelper::applyCombineTruncOfShl(
     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
   Register DstReg = MI.getOperand(0).getReg();
@@ -2612,7 +2597,6 @@ bool CombinerHelper::applyCombineTruncOfShl(
   auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
   Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
@@ -2880,7 +2864,7 @@ bool CombinerHelper::matchCombineInsertVecElts(
   return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
 }
 
-bool CombinerHelper::applyCombineInsertVecElts(
+void CombinerHelper::applyCombineInsertVecElts(
     MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
   Builder.setInstr(MI);
   Register UndefReg;
@@ -2897,17 +2881,15 @@ bool CombinerHelper::applyCombineInsertVecElts(
   }
   Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
   MI.eraseFromParent();
-  return true;
 }
 
-bool CombinerHelper::applySimplifyAddToSub(
+void CombinerHelper::applySimplifyAddToSub(
     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
   Builder.setInstr(MI);
   Register SubLHS, SubRHS;
   std::tie(SubLHS, SubRHS) = MatchInfo;
   Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
@@ -3001,7 +2983,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
   return true;
 }
 
-bool CombinerHelper::applyBuildInstructionSteps(
+void CombinerHelper::applyBuildInstructionSteps(
     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
   assert(MatchInfo.InstrsToBuild.size() &&
          "Expected at least one instr to build?");
@@ -3014,7 +2996,6 @@ bool CombinerHelper::applyBuildInstructionSteps(
       OperandFn(Instr);
   }
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchAshrShlToSextInreg(
@@ -3034,7 +3015,8 @@ bool CombinerHelper::matchAshrShlToSextInreg(
   MatchInfo = std::make_tuple(Src, ShlCst);
   return true;
 }
-bool CombinerHelper::applyAshShlToSextInreg(
+
+void CombinerHelper::applyAshShlToSextInreg(
     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
   Register Src;
@@ -3044,7 +3026,6 @@ bool CombinerHelper::applyAshShlToSextInreg(
   Builder.setInstrAndDebugLoc(MI);
   Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
   MI.eraseFromParent();
-  return true;
 }
 
 /// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
@@ -3267,7 +3248,7 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI,
   return true;
 }
 
-bool CombinerHelper::applyNotCmp(MachineInstr &MI,
+void CombinerHelper::applyNotCmp(MachineInstr &MI,
                                  SmallVectorImpl<Register> &RegsToNegate) {
   for (Register Reg : RegsToNegate) {
     MachineInstr *Def = MRI.getVRegDef(Reg);
@@ -3297,7 +3278,6 @@ bool CombinerHelper::applyNotCmp(MachineInstr &MI,
 
   replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
   MI.eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchXorOfAndWithSameReg(
@@ -3331,7 +3311,7 @@ bool CombinerHelper::matchXorOfAndWithSameReg(
   return Y == SharedReg;
 }
 
-bool CombinerHelper::applyXorOfAndWithSameReg(
+void CombinerHelper::applyXorOfAndWithSameReg(
     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
   // Fold (xor (and x, y), y) -> (and (not x), y)
   Builder.setInstrAndDebugLoc(MI);
@@ -3343,7 +3323,6 @@ bool CombinerHelper::applyXorOfAndWithSameReg(
   MI.getOperand(1).setReg(Not->getOperand(0).getReg());
   MI.getOperand(2).setReg(Y);
   Observer.changedInstr(MI);
-  return true;
 }
 
 bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
@@ -3364,16 +3343,15 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
   return isBuildVectorAllZeros(*VecMI, MRI);
 }
 
-bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
+void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
   Builder.setInstrAndDebugLoc(MI);
   Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
   MI.eraseFromParent();
-  return true;
 }
 
 /// The second source operand is known to be a power of 2.
-bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
+void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
   Register DstReg = MI.getOperand(0).getReg();
   Register Src0 = MI.getOperand(1).getReg();
   Register Pow2Src1 = MI.getOperand(2).getReg();
@@ -3385,7 +3363,6 @@ bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
   auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
   Builder.buildAnd(DstReg, Src0, Add);
   MI.eraseFromParent();
-  return true;
 }
 
 Optional<SmallVector<Register, 8>>
@@ -3790,7 +3767,7 @@ bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
   return true;
 }
 
-bool CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
+void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
                                             MachineInstr *&ExtMI) {
   assert(MI.getOpcode() == TargetOpcode::G_PHI);
   Register DstReg = ExtMI->getOperand(0).getReg();
@@ -3834,7 +3811,6 @@ bool CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
   }
   Builder.insertInstr(NewPhi);
   ExtMI->eraseFromParent();
-  return true;
 }
 
 bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
@@ -3947,12 +3923,17 @@ void CombinerHelper::applyExtractAllEltsFromBuildVector(
   MI.eraseFromParent();
 }
 
-bool CombinerHelper::applyBuildFn(
+void CombinerHelper::applyBuildFn(
     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
   Builder.setInstrAndDebugLoc(MI);
   MatchInfo(Builder);
   MI.eraseFromParent();
-  return true;
+}
+
+void CombinerHelper::applyBuildFnNoErase(
+    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  Builder.setInstrAndDebugLoc(MI);
+  MatchInfo(Builder);
 }
 
 /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
@@ -4129,6 +4110,129 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
   return true;
 }
 
+bool CombinerHelper::reassociationCanBreakAddressingModePattern(
+    MachineInstr &PtrAdd) {
+  assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
+
+  Register Src1Reg = PtrAdd.getOperand(1).getReg();
+  MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
+  if (!Src1Def)
+    return false;
+
+  Register Src2Reg = PtrAdd.getOperand(2).getReg();
+
+  if (MRI.hasOneNonDBGUse(Src1Reg))
+    return false;
+
+  auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+  if (!C1)
+    return false;
+  auto C2 = getConstantVRegVal(Src2Reg, MRI);
+  if (!C2)
+    return false;
+
+  const APInt &C1APIntVal = *C1;
+  const APInt &C2APIntVal = *C2;
+  const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
+
+  for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
+    // This combine may end up running before ptrtoint/inttoptr combines
+    // manage to eliminate redundant conversions, so try to look through them.
+    MachineInstr *ConvUseMI = &UseMI;
+    unsigned ConvUseOpc = ConvUseMI->getOpcode();
+    while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
+           ConvUseOpc == TargetOpcode::G_PTRTOINT) {
+      Register DefReg = ConvUseMI->getOperand(0).getReg();
+      if (!MRI.hasOneNonDBGUse(DefReg))
+        break;
+      ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
+    }
+    auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
+                     ConvUseOpc == TargetOpcode::G_STORE;
+    if (!LoadStore)
+      continue;
+    // Is x[offset2] already not a legal addressing mode? If so then
+    // reassociating the constants breaks nothing (we test offset2 because
+    // that's the one we hope to fold into the load or store).
+    TargetLoweringBase::AddrMode AM;
+    AM.HasBaseReg = true;
+    AM.BaseOffs = C2APIntVal.getSExtValue();
+    unsigned AS =
+        MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
+    Type *AccessTy =
+        getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
+                      PtrAdd.getMF()->getFunction().getContext());
+    const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
+    if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+                                   AccessTy, AS))
+      continue;
+
+    // Would x[offset1+offset2] still be a legal addressing mode?
+    AM.BaseOffs = CombinedValue;
+    if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
+                                   AccessTy, AS))
+      return true;
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchReassocPtrAdd(
+    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+  // We're trying to match a few pointer computation patterns here for
+  // re-association opportunities.
+  // 1) Isolating a constant operand to be on the RHS, e.g.:
+  // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+  //
+  // 2) Folding two constants in each sub-tree as long as such folding
+  // doesn't break a legal addressing mode.
+  // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+  Register Src1Reg = MI.getOperand(1).getReg();
+  Register Src2Reg = MI.getOperand(2).getReg();
+  MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
+  MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
+
+  if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
+    // Try to match example 1).
+    if (RHS->getOpcode() != TargetOpcode::G_ADD)
+      return false;
+    auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+    if (!C2)
+      return false;
+
+    MatchInfo = [=,&MI](MachineIRBuilder &B) {
+      LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+      auto NewBase =
+          Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+      Observer.changingInstr(MI);
+      MI.getOperand(1).setReg(NewBase.getReg(0));
+      MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+      Observer.changedInstr(MI);
+    };
+  } else {
+    // Try to match example 2.
+    Register LHSSrc1 = LHS->getOperand(1).getReg();
+    Register LHSSrc2 = LHS->getOperand(2).getReg();
+    auto C1 = getConstantVRegVal(LHSSrc2, MRI);
+    if (!C1)
+      return false;
+    auto C2 = getConstantVRegVal(Src2Reg, MRI);
+    if (!C2)
+      return false;
+
+    MatchInfo = [=, &MI](MachineIRBuilder &B) {
+      auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+      Observer.changingInstr(MI);
+      MI.getOperand(1).setReg(LHSSrc1);
+      MI.getOperand(2).setReg(NewCst.getReg(0));
+      Observer.changedInstr(MI);
+    };
+  }
+  return !reassociationCanBreakAddressingModePattern(MI);
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index fb3c34a9bc493..34e1f9225d420 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1182,7 +1182,6 @@ bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
     if (parseMDNode(Node))
       return true;
   } else if (Token.is(MIToken::md_diexpr)) {
-    // FIXME: This should be driven off of the UNIQUED property in Metadata.def
     if (parseDIExpression(Node))
       return true;
   } else if (Token.is(MIToken::md_dilocation)) {
@@ -2326,7 +2325,6 @@ bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
     if (parseMDNode(Node))
       return true;
   } else if (Token.is(MIToken::md_diexpr)) {
-    // FIXME: This should be driven off of the UNIQUED property in Metadata.def
     if (parseDIExpression(Node))
       return true;
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5ea3de9d0db66..acf466f18dcdc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9736,14 +9736,14 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
   if (refineUniformBase(BasePtr, Index, DAG)) {
     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
     return DAG.getMaskedScatter(
-        DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+        DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
   }
 
   if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
     return DAG.getMaskedScatter(
-        DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+        DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
   }
 
@@ -9792,7 +9792,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
   if (refineUniformBase(BasePtr, Index, DAG)) {
     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
-                               PassThru.getValueType(), DL, Ops,
+                               MGT->getMemoryVT(), DL, Ops,
                                MGT->getMemOperand(), MGT->getIndexType(),
                                MGT->getExtensionType());
   }
@@ -9800,7 +9800,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
   if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
-                               PassThru.getValueType(), DL, Ops,
+                               MGT->getMemoryVT(), DL, Ops,
                                MGT->getMemOperand(), MGT->getIndexType(),
                                MGT->getExtensionType());
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c7f22dd3627f8..9b00a849a1d71 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7644,7 +7644,7 @@ SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
                         AM, ST->isTruncatingStore(), ST->isCompressingStore());
 }
 
-SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
+SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl,
                                       ArrayRef<SDValue> Ops,
                                       MachineMemOperand *MMO,
                                       ISD::MemIndexType IndexType,
@@ -7653,9 +7653,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
 
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
-  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(MemVT.getRawBits());
   ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
-      dl.getIROrder(), VTs, VT, MMO, IndexType, ExtTy));
+      dl.getIROrder(), VTs, MemVT, MMO, IndexType, ExtTy));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7663,9 +7663,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
     return SDValue(E, 0);
   }
 
-  IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]);
+  IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]);
   auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
-                                          VTs, VT, MMO, IndexType, ExtTy);
+                                          VTs, MemVT, MMO, IndexType, ExtTy);
   createOperands(N, Ops);
 
   assert(N->getPassThru().getValueType() == N->getValueType(0) &&
@@ -7691,7 +7691,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
   return V;
 }
 
-SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
+SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
                                        ArrayRef<SDValue> Ops,
                                        MachineMemOperand *MMO,
                                        ISD::MemIndexType IndexType,
@@ -7700,9 +7700,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
 
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
-  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(MemVT.getRawBits());
   ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
-      dl.getIROrder(), VTs, VT, MMO, IndexType, IsTrunc));
+      dl.getIROrder(), VTs, MemVT, MMO, IndexType, IsTrunc));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7710,9 +7710,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
     return SDValue(E, 0);
   }
 
-  IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]);
+  IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]);
   auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
-                                           VTs, VT, MMO, IndexType, IsTrunc);
+                                           VTs, MemVT, MMO, IndexType, IsTrunc);
   createOperands(N, Ops);
 
   assert(N->getMask().getValueType().getVectorElementCount() ==
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index d62f8f40b571c..66c93b6e6af20 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -1235,11 +1235,10 @@ void SlotTracker::CreateFunctionSlot(const Value *V) {
 void SlotTracker::CreateMetadataSlot(const MDNode *N) {
   assert(N && "Can't insert a null Value into SlotTracker!");
 
-  // Don't make slots for uniqued nodes. We just print them inline everywhere.
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS)                                      \
-  if (isa<CLASS>(N))                                                           \
+  // Don't make slots for DIExpressions or DIArgLists. We just print them inline
+  // everywhere.
+  if (isa<DIExpression>(N) || isa<DIArgList>(N))
     return;
-#include "llvm/IR/Metadata.def"
 
   unsigned DestSlot = mdnNext;
   if (!mdnMap.insert(std::make_pair(N, DestSlot)).second)
@@ -2357,7 +2356,9 @@ static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
 
 static void writeDIArgList(raw_ostream &Out, const DIArgList *N,
                            TypePrinting *TypePrinter, SlotTracker *Machine,
-                           const Module *Context) {
+                           const Module *Context, bool FromValue = false) {
+  assert(FromValue &&
+         "Unexpected DIArgList metadata outside of value argument");
   Out << "!DIArgList(";
   FieldSeparator FS;
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
@@ -2515,16 +2516,16 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
                                    TypePrinting *TypePrinter,
                                    SlotTracker *Machine, const Module *Context,
                                    bool FromValue) {
-  assert((FromValue || !(isa<LocalAsMetadata>(MD) || isa<DIArgList>(MD))) &&
-         "Unexpected function-local metadata outside of value argument");
-
-  // Write uniqued MDNodes inline when used as a value.
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS)                                      \
-  if (const CLASS *N = dyn_cast<CLASS>(MD)) {                                  \
-    write##CLASS(Out, N, TypePrinter, Machine, Context);                       \
-    return;                                                                    \
+  // Write DIExpressions and DIArgLists inline when used as a value. Improves
+  // readability of debug info intrinsics.
+  if (const DIExpression *Expr = dyn_cast<DIExpression>(MD)) {
+    writeDIExpression(Out, Expr, TypePrinter, Machine, Context);
+    return;
+  }
+  if (const DIArgList *ArgList = dyn_cast<DIArgList>(MD)) {
+    writeDIArgList(Out, ArgList, TypePrinter, Machine, Context, FromValue);
+    return;
   }
-#include "llvm/IR/Metadata.def"
 
   if (const MDNode *N = dyn_cast<MDNode>(MD)) {
     std::unique_ptr<SlotTracker> MachineStorage;
@@ -2555,6 +2556,9 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
 
   auto *V = cast<ValueAsMetadata>(MD);
   assert(TypePrinter && "TypePrinter required for metadata values");
+  assert((FromValue || !isa<LocalAsMetadata>(V)) &&
+         "Unexpected function-local metadata outside of value argument");
+
   TypePrinter->print(V->getValue()->getType(), Out);
   Out << ' ';
   WriteAsOperandInternal(Out, V->getValue(), TypePrinter, Machine, Context);
@@ -3443,17 +3447,15 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
     if (i)
       Out << ", ";
 
-    // Write UNIQUED nodes inline.
+    // Write DIExpressions inline.
     // FIXME: Ban DIExpressions in NamedMDNodes, they will serve no purpose.
     MDNode *Op = NMD->getOperand(i);
     assert(!isa<DIArgList>(Op) &&
            "DIArgLists should not appear in NamedMDNodes");
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS)                                      \
-  if (auto *N = dyn_cast<CLASS>(Op)) {                                         \
-    write##CLASS(Out, N, nullptr, nullptr, nullptr);                           \
-    continue;                                                                  \
-  }
-#include "llvm/IR/Metadata.def"
+    if (auto *Expr = dyn_cast<DIExpression>(Op)) {
+      writeDIExpression(Out, Expr, nullptr, nullptr, nullptr);
+      continue;
+    }
 
     int Slot = Machine.getMetadataSlot(Op);
     if (Slot == -1)
@@ -4710,18 +4712,12 @@ static void printMetadataImpl(raw_ostream &ROS, const Metadata &MD,
 
   TypePrinting TypePrinter(M);
 
-  WriteAsOperandInternal(OS, &MD, &TypePrinter, MST.getMachine(), M);
+  WriteAsOperandInternal(OS, &MD, &TypePrinter, MST.getMachine(), M,
+                         /* FromValue */ true);
 
   auto *N = dyn_cast<MDNode>(&MD);
-  if (OnlyAsOperand || !N) {
-    return;
-  }
-  // Uniqued MDNodes are always treated as if OnlyAsOperand, as they are
-  // printed inline.
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS)                                      \
-  if (isa<CLASS>(MD))                                                          \
+  if (OnlyAsOperand || !N || isa<DIExpression>(MD) || isa<DIArgList>(MD))
     return;
-#include "llvm/IR/Metadata.def"
 
   OS << " = ";
   WriteMDNodeBodyInternal(OS, N, &TypePrinter, MST.getMachine(), M);
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index eff01bffe760f..7b0dab799e1a9 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -1053,7 +1053,6 @@ DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope,
 DIExpression *DIExpression::getImpl(LLVMContext &Context,
                                     ArrayRef<uint64_t> Elements,
                                     StorageType Storage, bool ShouldCreate) {
-  assert(Storage != Distinct && "DIExpression cannot be distinct");
   DEFINE_GETIMPL_LOOKUP(DIExpression, (Elements));
   DEFINE_GETIMPL_STORE_NO_OPS(DIExpression, (Elements));
 }
@@ -1584,7 +1583,6 @@ DIMacroFile *DIMacroFile::getImpl(LLVMContext &Context, unsigned MIType,
 DIArgList *DIArgList::getImpl(LLVMContext &Context,
                               ArrayRef<ValueAsMetadata *> Args,
                               StorageType Storage, bool ShouldCreate) {
-  assert(Storage != Distinct && "DIArgList cannot be distinct");
   DEFINE_GETIMPL_LOOKUP(DIArgList, (Args));
   DEFINE_GETIMPL_STORE_NO_OPS(DIArgList, (Args));
 }
diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp
index 2533ce83600b5..99819602c5452 100644
--- a/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/llvm/lib/IR/LLVMContextImpl.cpp
@@ -60,7 +60,6 @@ LLVMContextImpl::~LLVMContextImpl() {
 #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)                                    \
   for (auto *I : CLASS##s)                                                     \
     I->dropAllReferences();
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)
 #include "llvm/IR/Metadata.def"
 
   // Also drop references that come from the Value bridges.
@@ -75,7 +74,6 @@ LLVMContextImpl::~LLVMContextImpl() {
 #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)                                    \
   for (CLASS * I : CLASS##s)                                                   \
     delete I;
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)
 #include "llvm/IR/Metadata.def"
 
   // Free the constants.
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 5caa8f60b694f..2ae23fdc95a8a 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -1381,7 +1381,6 @@ class LLVMContextImpl {
 
 #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)                                    \
   DenseSet<CLASS *, CLASS##Info> CLASS##s;
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)
 #include "llvm/IR/Metadata.def"
 
   // Optional map for looking up composite types by identifier.
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index 9ac388835be2e..4f87ef5377653 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -672,7 +672,6 @@ MDNode *MDNode::replaceWithPermanentImpl() {
 #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)                                    \
   case CLASS##Kind:                                                            \
     break;
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)
 #include "llvm/IR/Metadata.def"
   }
 
@@ -813,7 +812,6 @@ MDNode *MDNode::uniquify() {
     dispatchRecalculateHash(SubclassThis, ShouldRecalculateHash);              \
     return uniquifyImpl(SubclassThis, getContext().pImpl->CLASS##s);           \
   }
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)
 #include "llvm/IR/Metadata.def"
   }
 }
@@ -826,7 +824,6 @@ void MDNode::eraseFromStore() {
   case CLASS##Kind:                                                            \
     getContext().pImpl->CLASS##s.erase(cast<CLASS>(this));                     \
     break;
-#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)
 #include "llvm/IR/Metadata.def"
   }
 }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7802144fb2c98..b921a6c4b884a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4224,6 +4224,37 @@ defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
 defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
 defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
 
+// Constant vector values, used in the S/UQXTN patterns below.
+def VImmFF:   PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
+def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
+def VImm7F:   PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
+def VImm80:   PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
+def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
+def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
+
+// trunc(umin(X, 255)) -> UQXTRN v8i8
+def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
+          (UQXTNv8i8 V128:$Vn)>;
+// trunc(umin(X, 65535)) -> UQXTRN v4i16
+def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
+          (UQXTNv4i16 V128:$Vn)>;
+// trunc(smin(smax(X, -128), 128)) -> SQXTRN
+//  with reversed min/max
+def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
+                             (v8i16 VImm7F)))),
+          (SQXTNv8i8 V128:$Vn)>;
+def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
+                             (v8i16 VImm80)))),
+          (SQXTNv8i8 V128:$Vn)>;
+// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
+//  with reversed min/max
+def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
+                              (v4i32 VImm7FFF)))),
+          (SQXTNv4i16 V128:$Vn)>;
+def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
+                              (v4i32 VImm8000)))),
+          (SQXTNv4i16 V128:$Vn)>;
+
 //===----------------------------------------------------------------------===//
 // Advanced SIMD three vector instructions.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 6fbff72d1ac91..e1d1c52aa53ae 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -303,9 +303,14 @@ def GPRC : RegisterClass<"PPC", [i32,f32], 32, (add (sequence "R%u", 2, 12),
                                                     R31, R0, R1, FP, BP)> {
   // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
   // put it at the end of the list.
-  let AltOrders = [(add (sub GPRC, R2), R2)];
+  // On AIX, CSRs are allocated starting from R31 according to:
+  // https://www.ibm.com/docs/en/ssw_aix_72/assembler/assembler_pdf.pdf.
+  // This also helps setting the correct `NumOfGPRsSaved' in traceback table.
+  let AltOrders = [(add (sub GPRC, R2), R2),
+                   (add (sequence "R%u", 2, 12),
+                        (sequence "R%u", 31, 13), R0, R1, FP, BP)];
   let AltOrderSelect = [{
-    return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
+    return MF.getSubtarget<PPCSubtarget>().getGPRAllocationOrderIdx();
   }];
 }
 
@@ -314,9 +319,11 @@ def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
                                                 X31, X13, X0, X1, FP8, BP8)> {
   // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
   // put it at the end of the list.
-  let AltOrders = [(add (sub G8RC, X2), X2)];
+  let AltOrders = [(add (sub G8RC, X2), X2),
+                   (add (sequence "X%u", 2, 12),
+                        (sequence "X%u", 31, 13), X0, X1, FP8, BP8)];
   let AltOrderSelect = [{
-    return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
+    return MF.getSubtarget<PPCSubtarget>().getGPRAllocationOrderIdx();
   }];
 }
 
@@ -326,18 +333,22 @@ def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
 def GPRC_NOR0 : RegisterClass<"PPC", [i32,f32], 32, (add (sub GPRC, R0), ZERO)> {
   // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
   // put it at the end of the list.
-  let AltOrders = [(add (sub GPRC_NOR0, R2), R2)];
+  let AltOrders = [(add (sub GPRC_NOR0, R2), R2),
+                   (add (sequence "R%u", 2, 12),
+                        (sequence "R%u", 31, 13), R1, FP, BP, ZERO)];
   let AltOrderSelect = [{
-    return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
+    return MF.getSubtarget<PPCSubtarget>().getGPRAllocationOrderIdx();
   }];
 }
 
 def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> {
   // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
   // put it at the end of the list.
-  let AltOrders = [(add (sub G8RC_NOX0, X2), X2)];
+  let AltOrders = [(add (sub G8RC_NOX0, X2), X2),
+                   (add (sequence "X%u", 2, 12),
+                        (sequence "X%u", 31, 13), X1, FP8, BP8, ZERO8)];
   let AltOrderSelect = [{
-    return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
+    return MF.getSubtarget<PPCSubtarget>().getGPRAllocationOrderIdx();
   }];
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index abc7ea1d14128..56b7b8ab75494 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -411,6 +411,16 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
     return PredictableSelectIsExpensive;
   }
 
+  // Select allocation orders of GPRC and G8RC. It should be strictly consistent
+  // with corresponding AltOrders in PPCRegisterInfo.td.
+  unsigned getGPRAllocationOrderIdx() const {
+    if (is64BitELFABI())
+      return 1;
+    if (isAIXABI())
+      return 2;
+    return 0;
+  }
+
   // GlobalISEL
   const CallLowering *getCallLowering() const override;
   const RegisterBankInfo *getRegBankInfo() const override;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 6c000b8bb2450..6c6351c70e3a6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1317,8 +1317,9 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
       return Result;
 
   // If the incoming values are pointer casts of the same original value,
-  // replace the phi with a single cast.
-  if (PN.getType()->isPointerTy()) {
+  // replace the phi with a single cast iff we can insert a non-PHI instruction.
+  if (PN.getType()->isPointerTy() &&
+      PN.getParent()->getFirstInsertionPt() != PN.getParent()->end()) {
     Value *IV0 = PN.getIncomingValue(0);
     Value *IV0Stripped = IV0->stripPointerCasts();
     // Set to keep track of values known to be equal to IV0Stripped after
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 94c97bb1264ff..2946c0018c31f 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -276,7 +276,9 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
   // When ClearDSOLocalOnDeclarations is true, clear dso_local if GV is
   // converted to a declaration, to disable direct access. Don't do this if GV
   // is implicitly dso_local due to a non-default visibility.
-  if (ClearDSOLocalOnDeclarations && GV.isDeclarationForLinker() &&
+  if (ClearDSOLocalOnDeclarations &&
+      (GV.isDeclarationForLinker() ||
+       (isPerformingImport() && !doImportAsDefinition(&GV))) &&
       !GV.isImplicitDSOLocal()) {
     GV.setDSOLocal(false);
   } else if (VI && VI.isDSOLocal(ImportIndex.withDSOLocalPropagation())) {
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 6288a62326554..b8f6bf930fc93 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -4673,17 +4673,13 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
 
     if (BBI->mayHaveSideEffects()) {
       if (auto *SI = dyn_cast<StoreInst>(BBI)) {
+        // Temporarily disable removal of volatile stores preceding unreachable,
+        // pending a potential LangRef change permitting volatile stores to
+        // trap.
+        // TODO: Either remove this code, or properly integrate the check into
+        // isGuaranteedToTransferExecutionToSuccessor().
         if (SI->isVolatile())
           break;
-      } else if (auto *LI = dyn_cast<LoadInst>(BBI)) {
-        if (LI->isVolatile())
-          break;
-      } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
-        if (RMWI->isVolatile())
-          break;
-      } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
-        if (CXI->isVolatile())
-          break;
       } else if (isa<CatchPadInst>(BBI)) {
         // A catchpad may invoke exception object constructors and such, which
         // in some languages can be arbitrary code, so be conservative by
@@ -4692,8 +4688,9 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
         if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) !=
             EHPersonality::CoreCLR)
           break;
-      } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
-                 !isa<LandingPadInst>(BBI)) {
+      } else if (!isa<LoadInst>(BBI) && !isa<AtomicRMWInst>(BBI) &&
+                 !isa<AtomicCmpXchgInst>(BBI) && !isa<FenceInst>(BBI) &&
+                 !isa<VAArgInst>(BBI) && !isa<LandingPadInst>(BBI)) {
         break;
       }
       // Note that deleting LandingPad's here is in fact okay, although it
@@ -4703,8 +4700,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
     }
 
     // Delete this instruction (any uses are guaranteed to be dead)
-    if (!BBI->use_empty())
-      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+    BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
     BBI->eraseFromParent();
     Changed = true;
   }
diff --git a/llvm/test/Assembler/invalid-diarglist-outside-function.ll b/llvm/test/Assembler/invalid-diarglist-outside-function.ll
deleted file mode 100644
index 351cd0bc7b40f..0000000000000
--- a/llvm/test/Assembler/invalid-diarglist-outside-function.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-
-; CHECK: <stdin>:[[@LINE+1]]:6: error: !DIArgList cannot appear outside of a function
-!0 = !DIArgList()
diff --git a/llvm/test/Assembler/invalid-diexpression-distinct.ll b/llvm/test/Assembler/invalid-diexpression-distinct.ll
deleted file mode 100644
index 96628e37479b2..0000000000000
--- a/llvm/test/Assembler/invalid-diexpression-distinct.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-
-; CHECK: <stdin>:[[@LINE+1]]:6: error: 'distinct' not allowed for !DIExpression
-!0 = distinct !DIExpression()
diff --git a/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll b/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll
deleted file mode 100644
index d888e9a9eb827..0000000000000
--- a/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llvm-dis -o - %s.bc | FileCheck %s
-
-!llvm.dbg.cu = !{!1}
-!llvm.module.flags = !{!8, !9}
-
-!0 = distinct !DIGlobalVariable(name: "g", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true)
-!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
-!2 = !DIFile(filename: "a.c", directory: "/")
-!3 = !{}
-!4 = !{!7}
-!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-; CHECK: expr: !DIExpression()
-!6 = distinct !DIExpression()
-!7 = !DIGlobalVariableExpression(var: !0, expr: !6)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll.bc b/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll.bc
deleted file mode 100644
index 088e1a4b44885..0000000000000
Binary files a/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll.bc and /dev/null differ
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
new file mode 100644
index 0000000000000..1c5ed0eb39d21
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
@@ -0,0 +1,153 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+---
+name:            test1_noreassoc_legal_already_new_is_illegal
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: test1_noreassoc_legal_already_new_is_illegal
+    ; CHECK: liveins: $x0
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4777
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+    ; CHECK: G_STORE [[C2]](s32), [[PTR_ADD]](p0) :: (store (s32))
+    ; CHECK: $w0 = COPY [[LOAD]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_CONSTANT i64 4777
+    %4:_(s64) = G_CONSTANT i64 6
+    %9:_(s32) = G_CONSTANT i32 0
+    %10:_(p0) = G_PTR_ADD %0, %2(s64)
+    %11:_(p0) = G_PTR_ADD %10, %4(s64)
+    %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+    G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test2_reassoc_already_legal_new_also_legal
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: test2_reassoc_already_legal_new_also_legal
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+    ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
+    ; CHECK: $w0 = COPY [[LOAD]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_CONSTANT i64 10
+    %4:_(s64) = G_CONSTANT i64 6
+    %9:_(s32) = G_CONSTANT i32 0
+    %10:_(p0) = G_PTR_ADD %0, %2(s64)
+    %11:_(p0) = G_PTR_ADD %10, %4(s64)
+    %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+    G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test3_noreassoc_only_oneuse
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: test3_noreassoc_only_oneuse
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4783
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32))
+    ; CHECK: $w0 = COPY [[LOAD]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %10:_(s64) = G_CONSTANT i64 4783
+    %9:_(p0) = G_PTR_ADD %0, %10(s64)
+    %7:_(s32) = G_LOAD %9(p0) :: (load 4)
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test4_reassoc_existing_is_already_illegal
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: test4_reassoc_existing_is_already_illegal
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4096
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+    ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
+    ; CHECK: $w0 = COPY [[LOAD]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_CONSTANT i64 17
+    %4:_(s64) = G_CONSTANT i64 4079
+    %9:_(s32) = G_CONSTANT i32 0
+    %10:_(p0) = G_PTR_ADD %0, %2(s64)
+    %11:_(p0) = G_PTR_ADD %10, %4(s64)
+    %7:_(s32) = G_LOAD %11(p0) :: (load 4)
+    G_STORE %9(s32), %10(p0) :: (store 4) ; other use of %10
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test5_add_on_rhs
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test5_add_on_rhs
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8))
+    ; CHECK: $w0 = COPY [[LOAD]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = G_CONSTANT i64 1
+    %3:_(s64) = G_ADD %1, %2
+    %4:_(p0) = G_PTR_ADD %0, %3(s64)
+    %7:_(s32) = G_LOAD %4(p0) :: (load 1)
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/pr33172.ll b/llvm/test/CodeGen/AArch64/pr33172.ll
index 098d5358b02d0..e1b4cdc6603c9 100644
--- a/llvm/test/CodeGen/AArch64/pr33172.ll
+++ b/llvm/test/CodeGen/AArch64/pr33172.ll
@@ -22,7 +22,7 @@ entry:
   store i64 %wide.load8281058.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 16) to i64*), align 8
   store i64 %wide.load8291059.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 18) to i64*), align 8
   tail call void @llvm.memset.p0i8.i64(i8* align 8 bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i1 false) #2
-  unreachable
+  ret void
 }
 
 ; Function Attrs: argmemonly nounwind
diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 515f4d5bd114c..400cb0912ffb8 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -4,11 +4,7 @@
 define <4 x i16> @vqmovni32_smaxmin(<4 x i32> %s0) {
 ; CHECK-LABEL: vqmovni32_smaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.4s, #127, msl #8
-; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    mvni v1.4s, #127, msl #8
-; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    sqxtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp slt <4 x i32> %s0, <i32 32767, i32 32767, i32 32767, i32 32767>
@@ -22,11 +18,7 @@ entry:
 define <4 x i16> @vqmovni32_sminmax(<4 x i32> %s0) {
 ; CHECK-LABEL: vqmovni32_sminmax:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mvni v1.4s, #127, msl #8
-; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #127, msl #8
-; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    sqxtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp sgt <4 x i32> %s0, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
@@ -40,9 +32,7 @@ entry:
 define <4 x i16> @vqmovni32_umaxmin(<4 x i32> %s0) {
 ; CHECK-LABEL: vqmovni32_umaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    uqxtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp ult <4 x i32> %s0, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -54,11 +44,7 @@ entry:
 define <8 x i8> @vqmovni16_smaxmin(<8 x i16> %s0) {
 ; CHECK-LABEL: vqmovni16_smaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.8h, #127
-; CHECK-NEXT:    smin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    mvni v1.8h, #127
-; CHECK-NEXT:    smax v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
+; CHECK-NEXT:    sqxtn v0.8b, v0.8h
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp slt <8 x i16> %s0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
@@ -72,11 +58,7 @@ entry:
 define <8 x i8> @vqmovni16_sminmax(<8 x i16> %s0) {
 ; CHECK-LABEL: vqmovni16_sminmax:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mvni v1.8h, #127
-; CHECK-NEXT:    smax v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    movi v1.8h, #127
-; CHECK-NEXT:    smin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
+; CHECK-NEXT:    sqxtn v0.8b, v0.8h
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp sgt <8 x i16> %s0, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
@@ -90,9 +72,7 @@ entry:
 define <8 x i8> @vqmovni16_umaxmin(<8 x i16> %s0) {
 ; CHECK-LABEL: vqmovni16_umaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0xff00ff00ff00ff
-; CHECK-NEXT:    umin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
+; CHECK-NEXT:    uqxtn v0.8b, v0.8h
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp ult <8 x i16> %s0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
diff --git a/llvm/test/CodeGen/ARM/Windows/memset.ll b/llvm/test/CodeGen/ARM/Windows/memset.ll
index 8cb257c156606..d4d918a29c14b 100644
--- a/llvm/test/CodeGen/ARM/Windows/memset.ll
+++ b/llvm/test/CodeGen/ARM/Windows/memset.ll
@@ -7,7 +7,7 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
 define void @function() {
 entry:
   call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i1 false)
-  unreachable
+  ret void
 }
 
 ; CHECK: movs r1, #0
diff --git a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll
index 49dbb03135f5a..ab5f58c27e768 100644
--- a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 ;rdar://8003725
 
+declare void @llvm.trap()
+
 @G1 = external global i32
 @G2 = external global i32
 
@@ -38,6 +40,7 @@ for.body.lr.ph:                                   ; preds = %entry
   %1 = icmp sgt i32 %0, 1
   %smax = select i1 %1, i32 %0, i32 1
   call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i1 false)
+  call void @llvm.trap()
   unreachable
 
 for.cond1.preheader:                              ; preds = %entry
diff --git a/llvm/test/CodeGen/ARM/memfunc.ll b/llvm/test/CodeGen/ARM/memfunc.ll
index 0fe1f630c57a8..217b88a32de06 100644
--- a/llvm/test/CodeGen/ARM/memfunc.ll
+++ b/llvm/test/CodeGen/ARM/memfunc.ll
@@ -94,7 +94,7 @@ entry:
   ; CHECK-GNUEABI: bl memset
   call void @llvm.memset.p0i8.i32(i8* align 8 %dest, i8 0, i32 500, i1 false)
 
-  unreachable
+  ret void
 }
 
 ; Check that alloca arguments to memory intrinsics are automatically aligned if at least 8 bytes in size
@@ -140,7 +140,7 @@ entry:
   %2 = bitcast [9 x i8]* %arr2 to i8*
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false)
 
-  unreachable
+  ret void
 }
 
 ; Check that alloca arguments are not aligned if less than 8 bytes in size
@@ -179,7 +179,7 @@ entry:
   %2 = bitcast [7 x i8]* %arr2 to i8*
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false)
 
-  unreachable
+  ret void
 }
 
 ; Check that alloca arguments are not aligned if size+offset is less than 8 bytes
@@ -218,7 +218,7 @@ entry:
   %2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false)
 
-  unreachable
+  ret void
 }
 
 ; Check that alloca arguments are not aligned if the offset is not a multiple of 4
@@ -257,7 +257,7 @@ entry:
   %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false)
 
-  unreachable
+  ret void
 }
 
 ; Check that alloca arguments are not aligned if the offset is unknown
@@ -296,7 +296,7 @@ entry:
   %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false)
 
-  unreachable
+  ret void
 }
 
 ; Check that alloca arguments are not aligned if the GEP is not inbounds
@@ -335,7 +335,7 @@ entry:
   %2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false)
 
-  unreachable
+  ret void
 }
 
 ; Check that alloca arguments are not aligned when the offset is past the end of the allocation
@@ -374,7 +374,7 @@ entry:
   %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false)
 
-  unreachable
+  ret void
 }
 
 ; Check that global variables are aligned if they are large enough, but only if
@@ -401,7 +401,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr8, i32 0, i32 0), i32 %n, i1 false)
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr9, i32 0, i32 0), i32 %n, i1 false)
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr10, i32 0, i32 0), i32 %n, i1 false)
-  unreachable
+  ret void
 }
 
 ; CHECK: {{\.data|\.section.+data}}
diff --git a/llvm/test/CodeGen/Hexagon/branchfolder-keep-impdef.ll b/llvm/test/CodeGen/Hexagon/branchfolder-keep-impdef.ll
index 777952724ffb9..db56e0a2fafe5 100644
--- a/llvm/test/CodeGen/Hexagon/branchfolder-keep-impdef.ll
+++ b/llvm/test/CodeGen/Hexagon/branchfolder-keep-impdef.ll
@@ -19,7 +19,7 @@ b2:                                               ; preds = %b1, %b0
   %t1 = phi i8* [ %t0, %b1 ], [ undef, %b0 ]
   %t2 = getelementptr inbounds i8, i8* %t1, i32 %p0
   tail call void @llvm.memmove.p0i8.p0i8.i32(i8* undef, i8* %t2, i32 undef, i1 false) #1
-  unreachable
+  ret void
 }
 
 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0
diff --git a/llvm/test/CodeGen/Hexagon/reg-scavengebug.ll b/llvm/test/CodeGen/Hexagon/reg-scavengebug.ll
index d53799bc4d191..b712d1556cea1 100644
--- a/llvm/test/CodeGen/Hexagon/reg-scavengebug.ll
+++ b/llvm/test/CodeGen/Hexagon/reg-scavengebug.ll
@@ -155,10 +155,10 @@ b2:                                               ; preds = %b1
   %v120 = getelementptr <16 x i32>, <16 x i32>* %v2, i32 6
   %v121 = tail call <16 x i32> @llvm.hexagon.V6.vshufoh(<16 x i32> undef, <16 x i32> undef)
   store <16 x i32> %v121, <16 x i32>* %v120, align 64, !tbaa !0
-  unreachable
+  ret void
 
 b3:                                               ; preds = %b1
-  unreachable
+  ret void
 
 b4:                                               ; preds = %b0
   ret void
diff --git a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index 0faa1ff4f3cef..015017a5833d2 100644
--- a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -1,24 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s
 ;; Formerly crashed, see PR 1508
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-unknown-linux-gnu"
 	%struct.Range = type { i64, i64 }
 
-; CHECK: .cfi_startproc
-; CHECK: .cfi_personality 148, DW.ref.__gxx_personality_v0
-; CHECK: .cfi_lsda 20, .Lexception0
-; CHECK: .cfi_def_cfa_offset 176
-; CHECK: .cfi_offset r31, -8
-; CHECK: .cfi_offset lr, 16
-; CHECK: .cfi_def_cfa_register r31
-; CHECK: .cfi_offset r27, -40
-; CHECK: .cfi_offset r28, -32
-; CHECK: .cfi_offset r29, -24
-; CHECK: .cfi_offset r30, -16
-; CHECK: .cfi_endproc
-
-
 define void @Bork(i64 %range.0.0, i64 %range.0.1, i64 %size) personality i32 (...)* @__gxx_personality_v0 {
+; CHECK-LABEL: Bork:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    std 31, -8(1)
+; CHECK-NEXT:    std 0, 16(1)
+; CHECK-NEXT:    stdu 1, -176(1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-NEXT:    .cfi_offset r31, -8
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    mr 31, 1
+; CHECK-NEXT:    .cfi_def_cfa_register r31
+; CHECK-NEXT:    .cfi_offset r27, -40
+; CHECK-NEXT:    .cfi_offset r28, -32
+; CHECK-NEXT:    .cfi_offset r29, -24
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    std 29, 152(31) # 8-byte Folded Spill
+; CHECK-NEXT:    mr 29, 3
+; CHECK-NEXT:    rldic 3, 5, 3, 29
+; CHECK-NEXT:    std 27, 136(31) # 8-byte Folded Spill
+; CHECK-NEXT:    std 30, 160(31) # 8-byte Folded Spill
+; CHECK-NEXT:    mr 30, 4
+; CHECK-NEXT:    addi 3, 3, 15
+; CHECK-NEXT:    rldicl 3, 3, 60, 4
+; CHECK-NEXT:    mr 27, 1
+; CHECK-NEXT:    rldicl 3, 3, 4, 28
+; CHECK-NEXT:    addi 4, 31, 176
+; CHECK-NEXT:    neg 3, 3
+; CHECK-NEXT:    std 28, 144(31) # 8-byte Folded Spill
+; CHECK-NEXT:    stdux 4, 1, 3
+; CHECK-NEXT:    addi 3, 1, 112
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    bl Foo
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:  # %bb.1: # %bb30.preheader
+; CHECK-NEXT:    addi 28, 31, 120
+; CHECK-NEXT:    cmpldi 30, 0
+; CHECK-NEXT:    beq 0, .LBB0_4
+; CHECK-NEXT:  .LBB0_2: # %cond_true
+; CHECK-NEXT:    #
+; CHECK-NEXT:  .Ltmp3:
+; CHECK-NEXT:    mr 3, 29
+; CHECK-NEXT:    mr 4, 28
+; CHECK-NEXT:    bl Bar
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .Ltmp4:
+; CHECK-NEXT:  # %bb.3: # %invcont23
+; CHECK-NEXT:    #
+; CHECK-NEXT:    ld 3, 128(31)
+; CHECK-NEXT:    sub 30, 30, 3
+; CHECK-NEXT:    cmpldi 30, 0
+; CHECK-NEXT:    bne 0, .LBB0_2
+; CHECK-NEXT:  .LBB0_4: # %cleanup
+; CHECK-NEXT:    ld 30, 160(31) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 29, 152(31) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 28, 144(31) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 27, 136(31) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 1, 0(1)
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    ld 31, -8(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB0_5: # %unwind.loopexit.split-lp
+; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:    b .LBB0_7
+; CHECK-NEXT:  .LBB0_6: # %unwind.loopexit
+; CHECK-NEXT:  .Ltmp5:
+; CHECK-NEXT:  .LBB0_7: # %unwind
+; CHECK-NEXT:    ld 3, 0(1)
+; CHECK-NEXT:    mr 1, 27
+; CHECK-NEXT:    std 3, 0(1)
 entry:
 	%effectiveRange = alloca %struct.Range, align 8		; <%struct.Range*> [#uses=2]
 	%tmp4 = call i8* @llvm.stacksave()		; <i8*> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index eb095a75daf31..e483b1823707c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -2337,7 +2337,7 @@ define void @caller_mix() {
 
 ; ASM64PWR4:      mflr 0
 ; ASM64PWR4-DAG:  std 0, 16(1)
-; ASM64PWR4-DAG:  stdu 1, -256(1)
+; ASM64PWR4-DAG:  stdu 1, -240(1)
 ; ASM64PWR4-DAG:  std [[REG:[0-9]+]], 112(1)
 ; ASM64PWR4-DAG:  std [[REG:[0-9]+]], 120(1)
 ; ASM64PWR4-DAG:  std [[REG:[0-9]+]], 128(1)
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
index ac6c25de82d88..a83b55c500490 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
@@ -203,7 +203,7 @@ entry:
 
 ; CHECKASM-LABEL: .call_test_byval_mem3:
 
-; ASM32BIT:       stwu 1, -112(1)
+; ASM32BIT:       stwu 1, -96(1)
 ; ASM32BIT-DAG:   lwz [[REG:[0-9]+]], L..C{{[0-9]+}}(2)
 ; ASM32BIT-DAG:   addi 3, 1, 56
 ; ASM32BIT-DAG:   addi 4, [[REG]], 24
@@ -216,7 +216,7 @@ entry:
 ; ASM32BIT-DAG:   lwz 9, 16([[REG]])
 ; ASM32BIT-DAG:   lwz 10, 20([[REG]])
 ; ASM32BIT:       bl .test_byval_mem3
-; ASM32BIT:       addi 1, 1, 112
+; ASM32BIT:       addi 1, 1, 96
 
 ; The memcpy call was inlined in 64-bit so MIR test is redundant and omitted.
 ; ASM64BIT:       stdu 1, -128(1)
@@ -319,7 +319,7 @@ entry:
 ; 32BIT-NEXT:     BL_NOP <mcsymbol .test_byval_mem4>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1
 ; 32BIT-NEXT:     ADJCALLSTACKUP 316, 0, implicit-def dead $r1, implicit $r1
 
-; ASM32BIT:       stwu 1, -336(1)
+; ASM32BIT:       stwu 1, -320(1)
 ; ASM32BIT-NEXT:  stw [[REG1:[0-9]+]], {{[0-9]+}}(1)
 ; ASM32BIT:       lwz [[REG1]], L..C{{[0-9]+}}(2)
 ; ASM32BIT-DAG:   lhz [[REG2:[0-9]+]], 28([[REG1]])
@@ -338,7 +338,7 @@ entry:
 ; ASM32BIT-DAG:   lwz 9, 20([[REG1]])
 ; ASM32BIT-DAG:   lwz 10, 24([[REG1]])
 ; ASM32BIT:       bl .test_byval_mem4
-; ASM32BIT:       addi 1, 1, 336
+; ASM32BIT:       addi 1, 1, 320
 
 ; Confirm the expected memcpy call is independent of the call to test_byval_mem4.
 ; 64BIT:          ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
@@ -363,7 +363,7 @@ entry:
 ; 64BIT-NEXT:     BL8_NOP <mcsymbol .test_byval_mem4>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1
 ; 64BIT-NEXT:     ADJCALLSTACKUP 344, 0, implicit-def dead $r1, implicit $r1
 
-; ASM64BIT:       stdu 1, -368(1)
+; ASM64BIT:       stdu 1, -352(1)
 ; ASM64BIT-DAG:   ld [[REG1:[0-9]+]], L..C{{[0-9]+}}(2)
 ; ASM64BIT-DAG:   addi 3, 1, 112
 ; ASM64BIT-DAG:   addi 4, [[REG1]], 24
@@ -383,7 +383,7 @@ entry:
 ; ASM64BIT-DAG:   ld 9, 8([[REG1]])
 ; ASM64BIT-DAG:   ld 10, 16([[REG1]])
 ; ASM64BIT:       bl .test_byval_mem4
-; ASM64BIT:       addi 1, 1, 368
+; ASM64BIT:       addi 1, 1, 352
 
 define void @test_byval_mem4(i32, %struct_S31* byval(%struct_S31) align 1, %struct_S256* byval(%struct_S256) align 1 %s) {
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/aix-csr-alloc.ll b/llvm/test/CodeGen/PowerPC/aix-csr-alloc.ll
new file mode 100644
index 0000000000000..c41b742f3e153
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-csr-alloc.ll
@@ -0,0 +1,30 @@
+; REQUIRES: asserts
+; RUN: llc -mtriple=powerpc64-aix-xcoff -debug-only=regalloc < %s 2>&1 | \
+; RUN:   FileCheck --check-prefix=AIX-64 %s
+; RUN: llc -mtriple=powerpc-aix-xcoff -debug-only=regalloc < %s 2>&1 | \
+; RUN:   FileCheck --check-prefix=AIX-32 %s
+
+define i32 @g(i32 %a, i32 %b) {
+; AIX-64: AllocationOrder(G8RC_and_G8RC_NOX0) = [ $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x10 $x11 $x12 $x31 $x30 $x29 $x28 $x27 $x26 $x25 $x24 $x23 $x22 $x21 $x20 $x19 $x18 $x17 $x16 $x15 $x14 ]
+; AIX-64: AllocationOrder(G8RC) = [ $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x10 $x11 $x12 $x0 $x31 $x30 $x29 $x28 $x27 $x26 $x25 $x24 $x23 $x22 $x21 $x20 $x19 $x18 $x17 $x16 $x15 $x14 ]
+; AIX-32: AllocationOrder(GPRC) = [ $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10 $r11 $r12 $r0 $r31 $r30 $r29 $r28 $r27 $r26 $r25 $r24 $r23 $r22 $r21 $r20 $r19 $r18 $r17 $r16 $r15 $r14 $r13 ]
+; AIX-32: AllocationOrder(GPRC_and_GPRC_NOR0) = [ $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10 $r11 $r12 $r31 $r30 $r29 $r28 $r27 $r26 $r25 $r24 $r23 $r22 $r21 $r20 $r19 $r18 $r17 $r16 $r15 $r14 $r13 ]
+  %c = add i32 %a, %b
+  %d = shl i32 %a, 4
+  %cmp = icmp slt i32 %c, %d
+  %e = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %e
+}
+
+define float @f(float %a, float %b) {
+; AIX-32: AllocationOrder(F4RC) = [ $f0 $f1 $f2 $f3 $f4 $f5 $f6 $f7 $f8 $f9 $f10 $f11 $f12 $f13 $f31 $f30 $f29 $f28 $f27 $f26 $f25 $f24 $f23 $f22 $f21 $f20 $f19 $f18 $f17 $f16 $f15 $f14 ]
+  %c = fadd float %a, %b
+  ret float %c
+}
+
+define double @d(double %a, double %b) {
+; AIX-64: AllocationOrder(VFRC) = [ $vf2 $vf3 $vf4 $vf5 $vf0 $vf1 $vf6 $vf7 $vf8 $vf9 $vf10 $vf11 $vf12 $vf13 $vf14 $vf15 $vf16 $vf17 $vf18 $vf19 $vf31 $vf30 $vf29 $vf28 $vf27 $vf26 $vf25 $vf24 $vf23 $vf22 $vf21 $vf20 ]
+; AIX-64: AllocationOrder(F8RC) = [ $f0 $f1 $f2 $f3 $f4 $f5 $f6 $f7 $f8 $f9 $f10 $f11 $f12 $f13 $f31 $f30 $f29 $f28 $f27 $f26 $f25 $f24 $f23 $f22 $f21 $f20 $f19 $f18 $f17 $f16 $f15 $f14 ]
+  %c = fadd double %a, %b
+  ret double %c
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll b/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll
index 616a94f9a16b1..702a5d2c4f206 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll
@@ -14,10 +14,10 @@ declare hidden fastcc i32 @spam(%1*, %2*, %3*) unnamed_addr #0
 
 ; Function Attrs: nounwind
 define void @baz(%3* %0) local_unnamed_addr #2 {
-; AIX-64: std 30
-; AIX-64: .byte 0x02 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 2
-; AIX-32: stw 30
-; AIX-32: .byte 0x02 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 2
+; AIX-64: std 31
+; AIX-64: .byte 0x01 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 1
+; AIX-32: stw 31
+; AIX-32: .byte 0x01 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 1
   %2 = call signext i32 @wibble(%1* nonnull undef) #2
   %3 = call fastcc zeroext i32 @spam(%1* nonnull undef, %2* nonnull undef, %3* nonnull %0)
   unreachable
diff --git a/llvm/test/CodeGen/PowerPC/inc-of-add.ll b/llvm/test/CodeGen/PowerPC/inc-of-add.ll
index 2742c493e986d..0b06d7ed586bf 100644
--- a/llvm/test/CodeGen/PowerPC/inc-of-add.ll
+++ b/llvm/test/CodeGen/PowerPC/inc-of-add.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=ppc32-unknown-unknown | FileCheck %s --check-prefixes=ALL,PPC32
 ; RUN: llc < %s -mtriple=powerpc64-unknown-unknown | FileCheck %s --check-prefixes=ALL,PPC64,PPC64BE
-; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s --check-prefixes=ALL,PPC64,PPC64BE
+; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s --check-prefixes=ALL,PPC64,PPC64BE,AIX-PPC64
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown | FileCheck %s --check-prefixes=ALL,PPC64,PPC64LE
 
 ; These two forms are equivalent:
@@ -162,102 +162,102 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC32-NEXT:    addi 1, 1, 64
 ; PPC32-NEXT:    blr
 ;
-; PPC64BE-LABEL: vector_i128_i8:
-; PPC64BE:       # %bb.0:
-; PPC64BE-NEXT:    std 21, -88(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 21, 207(1)
-; PPC64BE-NEXT:    std 22, -80(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 24, -64(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 22, 199(1)
-; PPC64BE-NEXT:    lbz 23, 191(1)
-; PPC64BE-NEXT:    add 6, 21, 6
-; PPC64BE-NEXT:    lbz 21, 231(1)
-; PPC64BE-NEXT:    add 5, 22, 5
-; PPC64BE-NEXT:    lbz 22, 223(1)
-; PPC64BE-NEXT:    add 4, 23, 4
-; PPC64BE-NEXT:    lbz 23, 215(1)
-; PPC64BE-NEXT:    add 9, 21, 9
-; PPC64BE-NEXT:    lbz 25, 127(1)
-; PPC64BE-NEXT:    add 8, 22, 8
-; PPC64BE-NEXT:    lbz 21, 255(1)
-; PPC64BE-NEXT:    add 7, 23, 7
-; PPC64BE-NEXT:    lbz 24, 119(1)
-; PPC64BE-NEXT:    addi 9, 9, 1
-; PPC64BE-NEXT:    lbz 22, 247(1)
-; PPC64BE-NEXT:    add 25, 21, 25
-; PPC64BE-NEXT:    lbz 23, 239(1)
-; PPC64BE-NEXT:    addi 8, 8, 1
-; PPC64BE-NEXT:    lbz 28, 151(1)
-; PPC64BE-NEXT:    add 24, 22, 24
-; PPC64BE-NEXT:    lbz 21, 279(1)
-; PPC64BE-NEXT:    add 10, 23, 10
-; PPC64BE-NEXT:    lbz 27, 143(1)
-; PPC64BE-NEXT:    addi 10, 10, 1
-; PPC64BE-NEXT:    lbz 22, 271(1)
-; PPC64BE-NEXT:    add 28, 21, 28
-; PPC64BE-NEXT:    lbz 26, 135(1)
-; PPC64BE-NEXT:    addi 7, 7, 1
-; PPC64BE-NEXT:    lbz 23, 263(1)
-; PPC64BE-NEXT:    add 27, 22, 27
-; PPC64BE-NEXT:    lbz 11, 183(1)
-; PPC64BE-NEXT:    addi 6, 6, 1
-; PPC64BE-NEXT:    lbz 21, 311(1)
-; PPC64BE-NEXT:    add 26, 23, 26
-; PPC64BE-NEXT:    lbz 12, 175(1)
-; PPC64BE-NEXT:    addi 5, 5, 1
-; PPC64BE-NEXT:    lbz 0, 303(1)
-; PPC64BE-NEXT:    add 11, 21, 11
-; PPC64BE-NEXT:    lbz 30, 167(1)
-; PPC64BE-NEXT:    addi 11, 11, 1
-; PPC64BE-NEXT:    lbz 22, 295(1)
-; PPC64BE-NEXT:    add 12, 0, 12
-; PPC64BE-NEXT:    lbz 29, 159(1)
-; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    lbz 23, 287(1)
-; PPC64BE-NEXT:    add 30, 22, 30
-; PPC64BE-NEXT:    stb 11, 15(3)
-; PPC64BE-NEXT:    addi 11, 12, 1
-; PPC64BE-NEXT:    add 29, 23, 29
-; PPC64BE-NEXT:    stb 11, 14(3)
-; PPC64BE-NEXT:    addi 11, 30, 1
-; PPC64BE-NEXT:    stb 11, 13(3)
-; PPC64BE-NEXT:    addi 11, 29, 1
-; PPC64BE-NEXT:    stb 11, 12(3)
-; PPC64BE-NEXT:    addi 11, 28, 1
-; PPC64BE-NEXT:    stb 11, 11(3)
-; PPC64BE-NEXT:    addi 11, 27, 1
-; PPC64BE-NEXT:    stb 11, 10(3)
-; PPC64BE-NEXT:    addi 11, 26, 1
-; PPC64BE-NEXT:    stb 11, 9(3)
-; PPC64BE-NEXT:    addi 11, 25, 1
-; PPC64BE-NEXT:    stb 11, 8(3)
-; PPC64BE-NEXT:    addi 11, 24, 1
-; PPC64BE-NEXT:    stb 11, 7(3)
-; PPC64BE-NEXT:    stb 10, 6(3)
-; PPC64BE-NEXT:    stb 9, 5(3)
-; PPC64BE-NEXT:    stb 8, 4(3)
-; PPC64BE-NEXT:    stb 7, 3(3)
-; PPC64BE-NEXT:    stb 6, 2(3)
-; PPC64BE-NEXT:    stb 5, 1(3)
-; PPC64BE-NEXT:    stb 4, 0(3)
-; PPC64BE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 27, -40(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 26, -48(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 25, -56(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 24, -64(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 23, -72(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 22, -80(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 21, -88(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    blr
+; AIX-PPC64-LABEL: vector_i128_i8:
+; AIX-PPC64:       # %bb.0:
+; AIX-PPC64-NEXT:    std 22, -80(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    lbz 22, 207(1)
+; AIX-PPC64-NEXT:    std 23, -72(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 24, -64(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 26, -48(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 25, -56(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 28, -32(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 27, -40(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 31, -8(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    lbz 23, 199(1)
+; AIX-PPC64-NEXT:    lbz 24, 191(1)
+; AIX-PPC64-NEXT:    add 6, 22, 6
+; AIX-PPC64-NEXT:    lbz 22, 231(1)
+; AIX-PPC64-NEXT:    add 5, 23, 5
+; AIX-PPC64-NEXT:    lbz 23, 223(1)
+; AIX-PPC64-NEXT:    add 4, 24, 4
+; AIX-PPC64-NEXT:    lbz 24, 215(1)
+; AIX-PPC64-NEXT:    add 9, 22, 9
+; AIX-PPC64-NEXT:    lbz 26, 127(1)
+; AIX-PPC64-NEXT:    add 8, 23, 8
+; AIX-PPC64-NEXT:    lbz 22, 255(1)
+; AIX-PPC64-NEXT:    add 7, 24, 7
+; AIX-PPC64-NEXT:    lbz 25, 119(1)
+; AIX-PPC64-NEXT:    addi 9, 9, 1
+; AIX-PPC64-NEXT:    lbz 23, 247(1)
+; AIX-PPC64-NEXT:    add 26, 22, 26
+; AIX-PPC64-NEXT:    lbz 24, 239(1)
+; AIX-PPC64-NEXT:    addi 8, 8, 1
+; AIX-PPC64-NEXT:    lbz 29, 151(1)
+; AIX-PPC64-NEXT:    add 25, 23, 25
+; AIX-PPC64-NEXT:    lbz 22, 279(1)
+; AIX-PPC64-NEXT:    add 10, 24, 10
+; AIX-PPC64-NEXT:    lbz 28, 143(1)
+; AIX-PPC64-NEXT:    addi 10, 10, 1
+; AIX-PPC64-NEXT:    lbz 23, 271(1)
+; AIX-PPC64-NEXT:    add 29, 22, 29
+; AIX-PPC64-NEXT:    lbz 27, 135(1)
+; AIX-PPC64-NEXT:    addi 7, 7, 1
+; AIX-PPC64-NEXT:    lbz 24, 263(1)
+; AIX-PPC64-NEXT:    add 28, 23, 28
+; AIX-PPC64-NEXT:    lbz 11, 183(1)
+; AIX-PPC64-NEXT:    addi 6, 6, 1
+; AIX-PPC64-NEXT:    lbz 22, 311(1)
+; AIX-PPC64-NEXT:    add 27, 24, 27
+; AIX-PPC64-NEXT:    lbz 12, 175(1)
+; AIX-PPC64-NEXT:    addi 5, 5, 1
+; AIX-PPC64-NEXT:    lbz 0, 303(1)
+; AIX-PPC64-NEXT:    add 11, 22, 11
+; AIX-PPC64-NEXT:    lbz 31, 167(1)
+; AIX-PPC64-NEXT:    addi 11, 11, 1
+; AIX-PPC64-NEXT:    lbz 23, 295(1)
+; AIX-PPC64-NEXT:    add 12, 0, 12
+; AIX-PPC64-NEXT:    lbz 30, 159(1)
+; AIX-PPC64-NEXT:    addi 4, 4, 1
+; AIX-PPC64-NEXT:    lbz 24, 287(1)
+; AIX-PPC64-NEXT:    add 31, 23, 31
+; AIX-PPC64-NEXT:    stb 11, 15(3)
+; AIX-PPC64-NEXT:    addi 11, 12, 1
+; AIX-PPC64-NEXT:    add 30, 24, 30
+; AIX-PPC64-NEXT:    stb 11, 14(3)
+; AIX-PPC64-NEXT:    addi 11, 31, 1
+; AIX-PPC64-NEXT:    stb 11, 13(3)
+; AIX-PPC64-NEXT:    addi 11, 30, 1
+; AIX-PPC64-NEXT:    stb 11, 12(3)
+; AIX-PPC64-NEXT:    addi 11, 29, 1
+; AIX-PPC64-NEXT:    stb 11, 11(3)
+; AIX-PPC64-NEXT:    addi 11, 28, 1
+; AIX-PPC64-NEXT:    stb 11, 10(3)
+; AIX-PPC64-NEXT:    addi 11, 27, 1
+; AIX-PPC64-NEXT:    stb 11, 9(3)
+; AIX-PPC64-NEXT:    addi 11, 26, 1
+; AIX-PPC64-NEXT:    stb 11, 8(3)
+; AIX-PPC64-NEXT:    addi 11, 25, 1
+; AIX-PPC64-NEXT:    stb 11, 7(3)
+; AIX-PPC64-NEXT:    stb 10, 6(3)
+; AIX-PPC64-NEXT:    stb 9, 5(3)
+; AIX-PPC64-NEXT:    stb 8, 4(3)
+; AIX-PPC64-NEXT:    stb 7, 3(3)
+; AIX-PPC64-NEXT:    stb 6, 2(3)
+; AIX-PPC64-NEXT:    stb 5, 1(3)
+; AIX-PPC64-NEXT:    stb 4, 0(3)
+; AIX-PPC64-NEXT:    ld 31, -8(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 27, -40(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 26, -48(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 25, -56(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 24, -64(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 23, -72(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 22, -80(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    blr
 ;
 ; PPC64LE-LABEL: vector_i128_i8:
 ; PPC64LE:       # %bb.0:
@@ -310,54 +310,54 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; PPC32-NEXT:    addi 1, 1, 32
 ; PPC32-NEXT:    blr
 ;
-; PPC64BE-LABEL: vector_i128_i16:
-; PPC64BE:       # %bb.0:
-; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lhz 11, 118(1)
-; PPC64BE-NEXT:    lhz 12, 182(1)
-; PPC64BE-NEXT:    lhz 0, 174(1)
-; PPC64BE-NEXT:    lhz 30, 166(1)
-; PPC64BE-NEXT:    add 11, 12, 11
-; PPC64BE-NEXT:    lhz 29, 158(1)
-; PPC64BE-NEXT:    add 10, 0, 10
-; PPC64BE-NEXT:    lhz 28, 142(1)
-; PPC64BE-NEXT:    add 9, 30, 9
-; PPC64BE-NEXT:    lhz 27, 126(1)
-; PPC64BE-NEXT:    add 8, 29, 8
-; PPC64BE-NEXT:    lhz 26, 134(1)
-; PPC64BE-NEXT:    add 6, 28, 6
-; PPC64BE-NEXT:    lhz 25, 150(1)
-; PPC64BE-NEXT:    add 4, 27, 4
-; PPC64BE-NEXT:    add 5, 26, 5
-; PPC64BE-NEXT:    addi 11, 11, 1
-; PPC64BE-NEXT:    add 7, 25, 7
-; PPC64BE-NEXT:    addi 10, 10, 1
-; PPC64BE-NEXT:    addi 9, 9, 1
-; PPC64BE-NEXT:    addi 8, 8, 1
-; PPC64BE-NEXT:    addi 7, 7, 1
-; PPC64BE-NEXT:    addi 6, 6, 1
-; PPC64BE-NEXT:    addi 5, 5, 1
-; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    sth 11, 14(3)
-; PPC64BE-NEXT:    sth 10, 12(3)
-; PPC64BE-NEXT:    sth 9, 10(3)
-; PPC64BE-NEXT:    sth 8, 8(3)
-; PPC64BE-NEXT:    sth 7, 6(3)
-; PPC64BE-NEXT:    sth 6, 4(3)
-; PPC64BE-NEXT:    sth 5, 2(3)
-; PPC64BE-NEXT:    sth 4, 0(3)
-; PPC64BE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 27, -40(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 26, -48(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 25, -56(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    blr
+; AIX-PPC64-LABEL: vector_i128_i16:
+; AIX-PPC64:       # %bb.0:
+; AIX-PPC64-NEXT:    std 26, -48(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 27, -40(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 28, -32(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 31, -8(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    lhz 11, 118(1)
+; AIX-PPC64-NEXT:    lhz 12, 182(1)
+; AIX-PPC64-NEXT:    lhz 0, 174(1)
+; AIX-PPC64-NEXT:    lhz 31, 166(1)
+; AIX-PPC64-NEXT:    add 11, 12, 11
+; AIX-PPC64-NEXT:    lhz 30, 158(1)
+; AIX-PPC64-NEXT:    add 10, 0, 10
+; AIX-PPC64-NEXT:    lhz 29, 142(1)
+; AIX-PPC64-NEXT:    add 9, 31, 9
+; AIX-PPC64-NEXT:    lhz 28, 126(1)
+; AIX-PPC64-NEXT:    add 8, 30, 8
+; AIX-PPC64-NEXT:    lhz 27, 134(1)
+; AIX-PPC64-NEXT:    add 6, 29, 6
+; AIX-PPC64-NEXT:    lhz 26, 150(1)
+; AIX-PPC64-NEXT:    add 4, 28, 4
+; AIX-PPC64-NEXT:    add 5, 27, 5
+; AIX-PPC64-NEXT:    addi 11, 11, 1
+; AIX-PPC64-NEXT:    add 7, 26, 7
+; AIX-PPC64-NEXT:    addi 10, 10, 1
+; AIX-PPC64-NEXT:    addi 9, 9, 1
+; AIX-PPC64-NEXT:    addi 8, 8, 1
+; AIX-PPC64-NEXT:    addi 7, 7, 1
+; AIX-PPC64-NEXT:    addi 6, 6, 1
+; AIX-PPC64-NEXT:    addi 5, 5, 1
+; AIX-PPC64-NEXT:    addi 4, 4, 1
+; AIX-PPC64-NEXT:    sth 11, 14(3)
+; AIX-PPC64-NEXT:    sth 10, 12(3)
+; AIX-PPC64-NEXT:    sth 9, 10(3)
+; AIX-PPC64-NEXT:    sth 8, 8(3)
+; AIX-PPC64-NEXT:    sth 7, 6(3)
+; AIX-PPC64-NEXT:    sth 6, 4(3)
+; AIX-PPC64-NEXT:    sth 5, 2(3)
+; AIX-PPC64-NEXT:    sth 4, 0(3)
+; AIX-PPC64-NEXT:    ld 31, -8(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 27, -40(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 26, -48(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    blr
 ;
 ; PPC64LE-LABEL: vector_i128_i16:
 ; PPC64LE:       # %bb.0:
diff --git a/llvm/test/ThinLTO/X86/import-dsolocal.ll b/llvm/test/ThinLTO/X86/import-dsolocal.ll
new file mode 100644
index 0000000000000..11d41e70e19b6
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/import-dsolocal.ll
@@ -0,0 +1,124 @@
+; RUN: split-file %s %t
+; RUN: opt -module-summary %t/a.ll -o %t/a.bc
+; RUN: opt -module-summary %t/b.ll -o %t/b.bc
+
+;; With a small limit, *_aux are either imported declarations (external/linkonce_odr/weak_odr)
+;; or unimported (linkonce/weak). Check we discard dso_local.
+; RUN: llvm-lto2 run %t/a.bc %t/b.bc -o %t1 -save-temps -import-instr-limit=3 \
+; RUN:   -r=%t/a.bc,main,plx -r=%t/a.bc,extern, -r=%t/a.bc,linkonce, -r=%t/a.bc,linkonceodr, -r=%t/a.bc,weak, -r=%t/a.bc,weakodr, \
+; RUN:   -r=%t/b.bc,a,pl -r=%t/b.bc,b,pl -r=%t/b.bc,extern,pl -r=%t/b.bc,extern_aux,pl \
+; RUN:   -r=%t/b.bc,linkonce,pl -r=%t/b.bc,linkonce_aux,pl -r=%t/b.bc,linkonceodr,pl -r=%t/b.bc,linkonceodr_aux,pl \
+; RUN:   -r=%t/b.bc,weak,pl -r=%t/b.bc,weak_aux,pl -r=%t/b.bc,weakodr,pl -r=%t/b.bc,weakodr_aux,pl
+; RUN: llvm-dis %t1.1.3.import.bc -o - | FileCheck %s --check-prefixes=DEST,DEST1
+
+;; With a large limit, *_aux are either imported definitions (external/linkonce_odr/weak_odr)
+;; or unimported (linkonce/weak). Check we discard dso_local as well.
+; RUN: llvm-lto2 run %t/a.bc %t/b.bc -o %t2 -save-temps -import-instr-limit=10 \
+; RUN:   -r=%t/a.bc,main,plx -r=%t/a.bc,extern, -r=%t/a.bc,linkonce, -r=%t/a.bc,linkonceodr, -r=%t/a.bc,weak, -r=%t/a.bc,weakodr, \
+; RUN:   -r=%t/b.bc,a,pl -r=%t/b.bc,b,pl -r=%t/b.bc,extern,pl -r=%t/b.bc,extern_aux,pl \
+; RUN:   -r=%t/b.bc,linkonce,pl -r=%t/b.bc,linkonce_aux,pl -r=%t/b.bc,linkonceodr,pl -r=%t/b.bc,linkonceodr_aux,pl \
+; RUN:   -r=%t/b.bc,weak,pl -r=%t/b.bc,weak_aux,pl -r=%t/b.bc,weakodr,pl -r=%t/b.bc,weakodr_aux,pl
+; RUN: llvm-dis %t2.1.3.import.bc -o - | FileCheck %s --check-prefixes=DEST,DEST2
+
+; DEST:      @a = available_externally global i32 42, align 4
+; DEST-NEXT: @b = external global i32*, align 8
+; DEST:      declare void @linkonce()
+; DEST:      declare void @weak()
+; DEST:      define dso_local i32 @main()
+; DEST:      define available_externally void @extern()
+
+; DEST1:     declare i32 @extern_aux(i32*, i32**)
+; DEST1:     declare i32 @linkonceodr_aux(i32*, i32**)
+; DEST2:     define available_externally i32 @extern_aux(i32* %a, i32** %b)
+; DEST2:     define available_externally i32 @linkonceodr_aux(i32* %a, i32** %b)
+
+; DEST:      define available_externally void @weakodr()
+
+; DEST1:     declare i32 @weakodr_aux(i32*, i32**)
+; DEST2:     define available_externally i32 @weakodr_aux(i32* %a, i32** %b)
+
+;--- a.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @extern()
+declare void @linkonce()
+declare void @linkonceodr()
+declare void @weak()
+declare void @weakodr()
+
+define i32 @main() {
+  call void @extern()
+  call void @linkonce()
+  call void @linkonceodr()
+  call void @weak()
+  call void @weakodr()
+  ret i32 0
+}
+
+;--- b.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = dso_local global i32 42, align 4
+@b = dso_local global i32* @a, align 8
+
+define dso_local void @extern() {
+  call i32 @extern_aux(i32* @a, i32** @b)
+  ret void
+}
+
+define dso_local i32 @extern_aux(i32* %a, i32** %b) {
+  %p = load i32*, i32** %b, align 8
+  store i32 33, i32* %p, align 4
+  %v = load i32, i32* %a, align 4
+  ret i32 %v
+}
+
+define linkonce dso_local void @linkonce() {
+  call i32 @linkonce_aux(i32* @a, i32** @b)
+  ret void
+}
+
+define linkonce i32 @linkonce_aux(i32* %a, i32** %b) {
+  %p = load i32*, i32** %b, align 8
+  store i32 33, i32* %p, align 4
+  %v = load i32, i32* %a, align 4
+  ret i32 %v
+}
+
+define linkonce_odr dso_local void @linkonceodr() {
+  call i32 @linkonceodr_aux(i32* @a, i32** @b)
+  ret void
+}
+
+define linkonce_odr i32 @linkonceodr_aux(i32* %a, i32** %b) {
+  %p = load i32*, i32** %b, align 8
+  store i32 33, i32* %p, align 4
+  %v = load i32, i32* %a, align 4
+  ret i32 %v
+}
+
+define weak dso_local void @weak() {
+  call i32 @weak_aux(i32* @a, i32** @b)
+  ret void
+}
+
+define weak i32 @weak_aux(i32* %a, i32** %b) {
+  %p = load i32*, i32** %b, align 8
+  store i32 33, i32* %p, align 4
+  %v = load i32, i32* %a, align 4
+  ret i32 %v
+}
+
+define weak_odr dso_local void @weakodr() {
+  call i32 @weakodr_aux(i32* @a, i32** @b)
+  ret void
+}
+
+define weak_odr i32 @weakodr_aux(i32* %a, i32** %b) {
+  %p = load i32*, i32** %b, align 8
+  store i32 33, i32* %p, align 4
+  %v = load i32, i32* %a, align 4
+  ret i32 %v
+}
diff --git a/llvm/test/ThinLTO/X86/index-const-prop-linkage.ll b/llvm/test/ThinLTO/X86/index-const-prop-linkage.ll
index 9eb85da926983..80f3f11e2c543 100644
--- a/llvm/test/ThinLTO/X86/index-const-prop-linkage.ll
+++ b/llvm/test/ThinLTO/X86/index-const-prop-linkage.ll
@@ -10,7 +10,7 @@
 ; - available_externally linkage
 ; - reference from @llvm.used
 ; CHECK:      @llvm.used = appending global [1 x i32*] [i32* @g2]
-; CHECK-NEXT: @g1 = external dso_local global i32, align 4
+; CHECK-NEXT: @g1 = external global i32, align 4
 ; CHECK-NEXT: @g2 = available_externally global i32 42, align 4
 ; CHECK-NEXT: @g3 = available_externally global i32 42, align 4
 
diff --git a/llvm/test/Transforms/InstCombine/catchswitch-phi.ll b/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
new file mode 100644
index 0000000000000..28695704097b2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1"
+target triple = "wasm32-unknown-unknown"
+
+%struct.quux = type { i32 }
+%struct.blam = type <{ %struct.quux }>
+
+declare void @foo()
+declare void @bar(%struct.quux*)
+declare i32 @__gxx_wasm_personality_v0(...)
+
+define void @test() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
+bb:
+  %tmp0 = alloca %struct.blam, align 4
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %tmp1 = getelementptr inbounds %struct.blam, %struct.blam* %tmp0, i32 0, i32 0
+  invoke void @foo()
+          to label %bb3 unwind label %bb4
+
+bb2:                                              ; preds = %bb
+  %tmp2 = getelementptr inbounds %struct.blam, %struct.blam* %tmp0, i32 0, i32 0
+  invoke void @foo()
+          to label %bb3 unwind label %bb4
+
+bb3:                                              ; preds = %bb2, %bb1
+  unreachable
+
+bb4:                                              ; preds = %bb2, %bb1
+  ; This PHI should not be combined into a non-PHI instruction, because
+  ; catchswitch BB cannot have any non-PHI instruction other than catchswitch
+  ; itself.
+  ; CHECK: bb4:
+  ; CHECK-NEXT: phi
+  ; CHECK-NEXT: catchswitch
+  %tmp3 = phi %struct.quux* [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+  %tmp4 = catchswitch within none [label %bb5] unwind label %bb7
+
+bb5:                                              ; preds = %bb4
+  %tmp5 = catchpad within %tmp4 [i8* null]
+  invoke void @foo() [ "funclet"(token %tmp5) ]
+          to label %bb6 unwind label %bb7
+
+bb6:                                              ; preds = %bb5
+  unreachable
+
+bb7:                                              ; preds = %bb5, %bb4
+  %tmp6 = cleanuppad within none []
+  call void @bar(%struct.quux* %tmp3) [ "funclet"(token %tmp6) ]
+  unreachable
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/empty-catchpad.ll b/llvm/test/Transforms/SimplifyCFG/empty-catchpad.ll
index 6e9141568df67..d6e87f8fa486b 100644
--- a/llvm/test/Transforms/SimplifyCFG/empty-catchpad.ll
+++ b/llvm/test/Transforms/SimplifyCFG/empty-catchpad.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
 
 declare void @f()
@@ -5,9 +6,22 @@ declare void @llvm.foo(i32) nounwind
 declare void @ProcessCLRException()
 
 define void @test1() personality void ()* @ProcessCLRException {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[EXN_DISPATCH:%.*]]
+; CHECK:       exn.dispatch:
+; CHECK-NEXT:    [[CS:%.*]] = catchswitch within none [label %pad1] unwind to caller
+; CHECK:       pad1:
+; CHECK-NEXT:    [[CP1:%.*]] = catchpad within [[CS]] [i32 1]
+; CHECK-NEXT:    call void @llvm.foo(i32 1)
+; CHECK-NEXT:    catchret from [[CP1]] to label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   invoke void @f()
-    to label %exit unwind label %exn.dispatch
+  to label %exit unwind label %exn.dispatch
 exn.dispatch:
   %cs = catchswitch within none [label %pad1, label %pad2] unwind to caller
 pad1:
@@ -21,43 +35,51 @@ exit:
   ret void
 }
 ; Remove unreachble catch2, leave catch1 as-is
-; CHECK-LABEL: define void @test1()
-; CHECK: %cs = catchswitch within none [label %pad1] unwind to caller
-; CHECK-NOT: catchpad
-; CHECK: %cp1 = catchpad within %cs [i32 1]
-; CHECK-NOT: catchpad
 
 ; Remove both catchpads and the catchswitch from exn.dispatch
-; CHECK-LABEL: define void @test2()
 define void @test2() personality void ()* @ProcessCLRException {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @f()
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[VIA_CATCHSWITCH:%.*]] unwind label [[CLEANUP_INNER:%.*]]
+; CHECK:       cleanup.inner:
+; CHECK-NEXT:    [[CP_INNER:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    call void @llvm.foo(i32 0)
+; CHECK-NEXT:    cleanupret from [[CP_INNER]] unwind to caller
+; CHECK:       via.catchswitch:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[DISPATCH_INNER:%.*]]
+; CHECK:       dispatch.inner:
+; CHECK-NEXT:    [[CS_INNER:%.*]] = catchswitch within none [label %pad.inner] unwind to caller
+; CHECK:       pad.inner:
+; CHECK-NEXT:    [[CATCH_INNER:%.*]] = catchpad within [[CS_INNER]] [i32 0]
+; CHECK-NEXT:    call void @llvm.foo(i32 1)
+; CHECK-NEXT:    catchret from [[CATCH_INNER]] to label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   invoke void @f()
-    to label %via.cleanup unwind label %exn.dispatch
-  ; CHECK-NOT: invoke
-  ; CHECK: call void @f()
+  to label %via.cleanup unwind label %exn.dispatch
 via.cleanup:
   invoke void @f()
-    to label %via.catchswitch unwind label %cleanup.inner
+  to label %via.catchswitch unwind label %cleanup.inner
 cleanup.inner:
   %cp.inner = cleanuppad within none []
   call void @llvm.foo(i32 0)
   cleanupret from %cp.inner unwind label %exn.dispatch
-  ; CHECK: cleanupret from %cp.inner unwind to caller
 via.catchswitch:
   invoke void @f()
-    to label %exit unwind label %dispatch.inner
+  to label %exit unwind label %dispatch.inner
 dispatch.inner:
   %cs.inner = catchswitch within none [label %pad.inner] unwind label %exn.dispatch
-  ; CHECK: %cs.inner = catchswitch within none [label %pad.inner] unwind to caller
 pad.inner:
   %catch.inner = catchpad within %cs.inner [i32 0]
-  ; CHECK: %catch.inner = catchpad within %cs.inner
   call void @llvm.foo(i32 1)
   catchret from %catch.inner to label %exit
 exn.dispatch:
   %cs = catchswitch within none [label %pad1, label %pad2] unwind to caller
-  ; CHECK-NOT: catchswitch within
-  ; CHECK-NOT: catchpad
 pad1:
   catchpad within %cs [i32 1]
   unreachable
@@ -70,36 +92,55 @@ exit:
 
 ; Same as @test2, but exn.dispatch catchswitch has an unwind dest that
 ; preds need to be reidrected to
-; CHECK-LABEL: define void @test3()
 define void @test3() personality void ()* @ProcessCLRException {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[VIA_CLEANUP:%.*]] unwind label [[CLEANUP:%.*]]
+; CHECK:       via.cleanup:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[VIA_CATCHSWITCH:%.*]] unwind label [[CLEANUP_INNER:%.*]]
+; CHECK:       cleanup.inner:
+; CHECK-NEXT:    [[CP_INNER:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    call void @llvm.foo(i32 0)
+; CHECK-NEXT:    cleanupret from [[CP_INNER]] unwind label [[CLEANUP]]
+; CHECK:       via.catchswitch:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[DISPATCH_INNER:%.*]]
+; CHECK:       dispatch.inner:
+; CHECK-NEXT:    [[CS_INNER:%.*]] = catchswitch within none [label %pad.inner] unwind label [[CLEANUP]]
+; CHECK:       pad.inner:
+; CHECK-NEXT:    [[CATCH_INNER:%.*]] = catchpad within [[CS_INNER]] [i32 0]
+; CHECK-NEXT:    call void @llvm.foo(i32 1)
+; CHECK-NEXT:    catchret from [[CATCH_INNER]] to label [[EXIT]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    [[CP:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    call void @llvm.foo(i32 0)
+; CHECK-NEXT:    cleanupret from [[CP]] unwind to caller
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   invoke void @f()
-    to label %via.cleanup unwind label %exn.dispatch
-  ; CHECK: invoke void @f()
-  ; CHECK-NEXT: to label %via.cleanup unwind label %cleanup
+  to label %via.cleanup unwind label %exn.dispatch
 via.cleanup:
   invoke void @f()
-    to label %via.catchswitch unwind label %cleanup.inner
+  to label %via.catchswitch unwind label %cleanup.inner
 cleanup.inner:
   %cp.inner = cleanuppad within none []
   call void @llvm.foo(i32 0)
   cleanupret from %cp.inner unwind label %exn.dispatch
-  ; CHECK: cleanupret from %cp.inner unwind label %cleanup
 via.catchswitch:
   invoke void @f()
-    to label %exit unwind label %dispatch.inner
+  to label %exit unwind label %dispatch.inner
 dispatch.inner:
   %cs.inner = catchswitch within none [label %pad.inner] unwind label %exn.dispatch
-  ; CHECK: %cs.inner = catchswitch within none [label %pad.inner] unwind label %cleanup
 pad.inner:
   %catch.inner = catchpad within %cs.inner [i32 0]
-  ; CHECK: %catch.inner = catchpad within %cs.inner
   call void @llvm.foo(i32 1)
   catchret from %catch.inner to label %exit
 exn.dispatch:
   %cs = catchswitch within none [label %pad1, label %pad2] unwind label %cleanup
-  ; CHECK-NOT: catchswitch within
-  ; CHECK-NOT: catchpad
 pad1:
   catchpad within %cs [i32 1]
   unreachable
diff --git a/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
index b277cb6cf4f9a..e437f40cbe753 100644
--- a/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
+++ b/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
@@ -10,11 +10,8 @@ define void @test1(i32 %x) nounwind {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[BB:%.*]], label [[RETURN:%.*]]
-; CHECK:       bb:
-; CHECK-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* null, align 4
-; CHECK-NEXT:    unreachable
-; CHECK:       return:
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -34,11 +31,8 @@ define void @test1_no_null_opt(i32 %x) nounwind #0 {
 ; CHECK-LABEL: @test1_no_null_opt(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[BB:%.*]], label [[RETURN:%.*]]
-; CHECK:       bb:
-; CHECK-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* null, align 4
-; CHECK-NEXT:    unreachable
-; CHECK:       return:
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -127,11 +121,8 @@ F:
 define void @test5(i1 %C, i32* %P) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]]
-; CHECK:       T:
-; CHECK-NEXT:    [[TMP0:%.*]] = cmpxchg volatile i32* [[P:%.*]], i32 0, i32 1 seq_cst seq_cst, align 4
-; CHECK-NEXT:    unreachable
-; CHECK:       F:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[C:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -147,11 +138,8 @@ F:
 define void @test6(i1 %C, i32* %P) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]]
-; CHECK:       T:
-; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw volatile xchg i32* [[P:%.*]], i32 0 seq_cst, align 4
-; CHECK-NEXT:    unreachable
-; CHECK:       F:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[C:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/wineh-unreachable.ll b/llvm/test/Transforms/SimplifyCFG/wineh-unreachable.ll
index c951294233ffa..0b5e97bc96071 100644
--- a/llvm/test/Transforms/SimplifyCFG/wineh-unreachable.ll
+++ b/llvm/test/Transforms/SimplifyCFG/wineh-unreachable.ll
@@ -1,14 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s
 
 declare void @Personality()
 declare void @f()
 
-; CHECK-LABEL: define void @test1()
 define void @test1() personality i8* bitcast (void ()* @Personality to i8*) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @f()
+; CHECK-NEXT:    ret void
+;
 entry:
-  ; CHECK: call void @f()
   invoke void @f()
-    to label %exit unwind label %unreachable.unwind
+  to label %exit unwind label %unreachable.unwind
 exit:
   ret void
 unreachable.unwind:
@@ -16,20 +20,28 @@ unreachable.unwind:
   unreachable
 }
 
-; CHECK-LABEL: define void @test2()
 define void @test2() personality i8* bitcast (void ()* @Personality to i8*) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[CATCH_PAD:%.*]]
+; CHECK:       catch.pad:
+; CHECK-NEXT:    [[CS1:%.*]] = catchswitch within none [label %catch.body] unwind to caller
+; CHECK:       catch.body:
+; CHECK-NEXT:    [[CATCH:%.*]] = catchpad within [[CS1]] []
+; CHECK-NEXT:    call void @f()
+; CHECK-NEXT:    catchret from [[CATCH]] to label [[UNREACHABLE:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+; CHECK:       unreachable:
+; CHECK-NEXT:    unreachable
+;
 entry:
   invoke void @f()
-    to label %exit unwind label %catch.pad
+  to label %exit unwind label %catch.pad
 catch.pad:
   %cs1 = catchswitch within none [label %catch.body] unwind label %unreachable.unwind
-  ; CHECK: catch.pad:
-  ; CHECK-NEXT: catchswitch within none [label %catch.body] unwind to caller
 catch.body:
-  ; CHECK:      catch.body:
-  ; CHECK-NEXT:   catchpad within %cs1
-  ; CHECK-NEXT:   call void @f()
-  ; CHECK-NEXT:   unreachable
   %catch = catchpad within %cs1 []
   call void @f()
   catchret from %catch to label %unreachable
@@ -42,18 +54,25 @@ unreachable:
   unreachable
 }
 
-; CHECK-LABEL: define void @test3()
 define void @test3() personality i8* bitcast (void ()* @Personality to i8*) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[CLEANUP_PAD:%.*]]
+; CHECK:       cleanup.pad:
+; CHECK-NEXT:    [[CLEANUP:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    call void @f()
+; CHECK-NEXT:    unreachable
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   invoke void @f()
-    to label %exit unwind label %cleanup.pad
+  to label %exit unwind label %cleanup.pad
 cleanup.pad:
-  ; CHECK: %cleanup = cleanuppad within none []
-  ; CHECK-NEXT: call void @f()
-  ; CHECK-NEXT: unreachable
   %cleanup = cleanuppad within none []
   invoke void @f()
-    to label %cleanup.ret unwind label %unreachable.unwind
+  to label %cleanup.ret unwind label %unreachable.unwind
 cleanup.ret:
   ; This cleanupret should be rewritten to unreachable,
   ; and merged into the pred block.
@@ -65,11 +84,22 @@ unreachable.unwind:
   unreachable
 }
 
-; CHECK-LABEL: define void @test5()
 define void @test5() personality i8* bitcast (void ()* @Personality to i8*) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[CATCH_PAD:%.*]]
+; CHECK:       catch.pad:
+; CHECK-NEXT:    [[CS1:%.*]] = catchswitch within none [label %catch.body] unwind to caller
+; CHECK:       catch.body:
+; CHECK-NEXT:    [[CATCH:%.*]] = catchpad within [[CS1]] []
+; CHECK-NEXT:    catchret from [[CATCH]] to label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    unreachable
+;
 entry:
   invoke void @f()
-          to label %exit unwind label %catch.pad
+  to label %exit unwind label %catch.pad
 
 catch.pad:
   %cs1 = catchswitch within none [label %catch.body] unwind to caller
@@ -82,15 +112,25 @@ exit:
   unreachable
 }
 
-; CHECK-LABEL: define void @test6()
 define void @test6() personality i8* bitcast (void ()* @Personality to i8*) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[CATCH_PAD:%.*]]
+; CHECK:       catch.pad:
+; CHECK-NEXT:    [[CS1:%.*]] = catchswitch within none [label %catch.body] unwind to caller
+; CHECK:       catch.body:
+; CHECK-NEXT:    [[CATCH:%.*]] = catchpad within [[CS1]] [i8* null, i32 0, i8* null]
+; CHECK-NEXT:    catchret from [[CATCH]] to label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   invoke void @f()
-          to label %exit unwind label %catch.pad
+  to label %exit unwind label %catch.pad
 
 catch.pad:
   %cs1 = catchswitch within none [label %catch.body, label %catch.body] unwind to caller
-  ; CHECK: catchswitch within none [label %catch.body] unwind to caller
 
 catch.body:
   %catch = catchpad within %cs1 [i8* null, i32 0, i8* null]
@@ -100,15 +140,25 @@ exit:
   ret void
 }
 
-; CHECK-LABEL: define void @test7()
 define void @test7() personality i8* bitcast (void ()* @Personality to i8*) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[CATCH_PAD:%.*]]
+; CHECK:       catch.pad:
+; CHECK-NEXT:    [[CS1:%.*]] = catchswitch within none [label %catch.body] unwind to caller
+; CHECK:       catch.body:
+; CHECK-NEXT:    [[CATCH:%.*]] = catchpad within [[CS1]] [i8* null, i32 0, i8* null]
+; CHECK-NEXT:    catchret from [[CATCH]] to label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   invoke void @f()
-          to label %exit unwind label %catch.pad
+  to label %exit unwind label %catch.pad
 
 catch.pad:
   %cs1 = catchswitch within none [label %catch.body, label %catch.body2] unwind to caller
-  ; CHECK: catchswitch within none [label %catch.body] unwind to caller
 
 catch.body:
   %catch = catchpad within %cs1 [i8* null, i32 0, i8* null]
@@ -122,15 +172,25 @@ exit:
   ret void
 }
 
-; CHECK-LABEL: define void @test8()
 define void @test8() personality i8* bitcast (void ()* @Personality to i8*) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[CATCH_PAD:%.*]]
+; CHECK:       catch.pad:
+; CHECK-NEXT:    [[CS1:%.*]] = catchswitch within none [label %catch.body] unwind to caller
+; CHECK:       catch.body:
+; CHECK-NEXT:    [[CATCH:%.*]] = catchpad within [[CS1]] [i8* null, i32 0, i8* null]
+; CHECK-NEXT:    catchret from [[CATCH]] to label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   invoke void @f()
-          to label %exit unwind label %catch.pad
+  to label %exit unwind label %catch.pad
 
 catch.pad:
   %cs1 = catchswitch within none [label %catch.body, label %catch.body2] unwind to caller
-  ; CHECK: catchswitch within none [label %catch.body] unwind to caller
 
 catch.body2:
   %catch2 = catchpad within %cs1 [i8* null, i32 0, i8* null]
@@ -144,15 +204,28 @@ exit:
   ret void
 }
 
-; CHECK-LABEL: define void @test9()
 define void @test9() personality i8* bitcast (void ()* @Personality to i8*) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @f()
+; CHECK-NEXT:    to label [[EXIT:%.*]] unwind label [[CATCH_PAD:%.*]]
+; CHECK:       catch.pad:
+; CHECK-NEXT:    [[CS1:%.*]] = catchswitch within none [label [[CATCH_BODY:%.*]], label %catch.body2] unwind to caller
+; CHECK:       catch.body:
+; CHECK-NEXT:    [[CATCH:%.*]] = catchpad within [[CS1]] [i8* null, i32 0, i8* null]
+; CHECK-NEXT:    catchret from [[CATCH]] to label [[EXIT]]
+; CHECK:       catch.body2:
+; CHECK-NEXT:    [[CATCH2:%.*]] = catchpad within [[CS1]] [i8* null, i32 64, i8* null]
+; CHECK-NEXT:    catchret from [[CATCH2]] to label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   invoke void @f()
-          to label %exit unwind label %catch.pad
+  to label %exit unwind label %catch.pad
 
 catch.pad:
   %cs1 = catchswitch within none [label %catch.body, label %catch.body2] unwind to caller
-  ; CHECK: catchswitch within none [label %catch.body, label %catch.body2] unwind to caller
 
 catch.body:
   %catch = catchpad within %cs1 [i8* null, i32 0, i8* null]
diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h
new file mode 100644
index 0000000000000..d3c5217eace07
--- /dev/null
+++ b/mlir/include/mlir-c/Dialect/LLVM.h
@@ -0,0 +1,30 @@
+//===-- mlir-c/Dialect/LLVM.h - C API for LLVM --------------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
+// Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_C_DIALECT_LLVM_H
+#define MLIR_C_DIALECT_LLVM_H
+
+#include "mlir-c/IR.h"
+#include "mlir-c/Registration.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(LLVM, llvm);
+
+/// Creates an llvm.ptr type.
+MLIR_CAPI_EXPORTED MlirType mlirLLVMPointerTypeGet(MlirType pointee,
+                                                   unsigned addressSpace);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MLIR_C_DIALECT_LLVM_H
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
index 4141c68a5e379..d7496b30a1c3f 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
@@ -20,34 +20,33 @@
 namespace mlir {
 namespace sparse_tensor {
 
-/// Tensor expression kind.
-enum class Kind { kTensor, kInvariant, kMulF, kMulI, kAddF, kAddI };
-
 /// Dimension level type for a tensor (undef means index does not appear).
 enum class Dim { kSparse, kDense, kSingle, kUndef };
 
-/// Children expressions of a binary TensorExp.
+/// Tensor expression kind.
+enum class Kind {
+  // Leaf.
+  kTensor,
+  kInvariant,
+  kZero,
+  // Operation.
+  kMulF,
+  kMulI,
+  kAddF,
+  kAddI,
+  kSubF,
+  kSubI
+};
+
+/// Children subexpressions of tensor operations.
 struct Children {
   unsigned e0;
   unsigned e1;
 };
 
 /// Tensor expression. Represents a MLIR expression in tensor index notation.
-/// For tensors, e0 denotes the tensor index. For invariants, the IR value is
-/// stored directly. For binary operations, e0 and e1 denote the index of the
-/// children tensor expressions.
 struct TensorExp {
-  TensorExp(Kind k, unsigned x, unsigned y, Value v) : kind(k), val(v) {
-    assert((kind == Kind::kTensor && x != -1u && y == -1u && !val) ||
-           (kind == Kind::kInvariant && x == -1u && y == -1u && val) ||
-           (kind >= Kind::kMulF && x != -1u && y != -1u && !val));
-    if (kind == Kind::kTensor) {
-      tensor = x;
-    } else if (kind >= Kind::kMulF) {
-      children.e0 = x;
-      children.e1 = y;
-    }
-  }
+  TensorExp(Kind k, unsigned x, unsigned y, Value v);
 
   /// Tensor expression kind.
   Kind kind;
@@ -56,7 +55,7 @@ struct TensorExp {
     /// Expressions representing tensors simply have a tensor number.
     unsigned tensor;
 
-    /// Binary operations hold the indices of their child expressions.
+    /// Tensor operations hold the indices of their children.
     Children children;
   };
 
@@ -69,10 +68,8 @@ struct TensorExp {
 /// loop indices (encoded in a bitvector) and the index of the corresponding
 /// tensor expression.
 struct LatPoint {
-  LatPoint(unsigned n, unsigned e, unsigned b) : bits(n, false), exp(e) {
-    bits.set(b);
-  }
-  LatPoint(const llvm::BitVector &b, unsigned e) : bits(b), exp(e) {}
+  LatPoint(unsigned n, unsigned e, unsigned b);
+  LatPoint(const llvm::BitVector &b, unsigned e);
 
   /// Conjunction of tensor loop indices as bitvector. This represents
   /// all indices involved in the tensor expression
@@ -103,7 +100,8 @@ class Merger {
         dims(t + 1, std::vector<Dim>(l, Dim::kUndef)) {}
 
   /// Adds a tensor expression. Returns its index.
-  unsigned addExp(Kind k, unsigned e0, unsigned e1 = -1u, Value v = Value());
+  unsigned addExp(Kind k, unsigned e0 = -1u, unsigned e1 = -1u,
+                  Value v = Value());
   unsigned addExp(Kind k, Value v) { return addExp(k, -1u, -1u, v); }
 
   /// Adds an iteration lattice point. Returns its index.
@@ -126,6 +124,12 @@ class Merger {
   /// Returns the index of the new set.
   unsigned takeDisj(Kind kind, unsigned s0, unsigned s1);
 
+  /// Maps a zero operand over a lattice set, i.e. each lattice point on an
+  /// expression E is simply copied over, but with 0 OP E as new expression.
+  /// This is useful to deal with disjunctive, but non-commutative operators.
+  /// Returns the index of the new set.
+  unsigned mapZero(Kind kind, unsigned s0);
+
   /// Optimizes the iteration lattice points in the given set. This
   /// method should be called right before code generation to avoid
   /// generating redundant loops and conditions.
@@ -135,7 +139,7 @@ class Merger {
   /// within the given set using just two basic rules:
   /// (1) multiple dense conditions are reduced to single dense, and
   /// (2) a *singleton* sparse/dense is reduced to sparse/random access.
-  llvm::BitVector simplifyCond(unsigned s, unsigned p0);
+  llvm::BitVector simplifyCond(unsigned s0, unsigned p0);
 
   /// Returns true if Li > Lj.
   bool latGT(unsigned i, unsigned j) const;
diff --git a/mlir/lib/CAPI/Dialect/CMakeLists.txt b/mlir/lib/CAPI/Dialect/CMakeLists.txt
index 053fce30d5ddc..ab8ac73c7c53b 100644
--- a/mlir/lib/CAPI/Dialect/CMakeLists.txt
+++ b/mlir/lib/CAPI/Dialect/CMakeLists.txt
@@ -27,6 +27,15 @@ add_mlir_public_c_api_library(MLIRCAPIGPU
   MLIRPass
 )
 
+add_mlir_public_c_api_library(MLIRCAPILLVM
+  LLVM.cpp
+
+  PARTIAL_SOURCES_INTENDED
+  LINK_LIBS PUBLIC
+  MLIRCAPIIR
+  MLIRLLVMIR
+)
+
 add_mlir_public_c_api_library(MLIRCAPILinalg
   Linalg.cpp
   LinalgPasses.cpp
diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp
new file mode 100644
index 0000000000000..be0e6c5d0ca57
--- /dev/null
+++ b/mlir/lib/CAPI/Dialect/LLVM.cpp
@@ -0,0 +1,21 @@
+//===- LLVM.cpp - C Interface for LLVM dialect ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir-c/Dialect/LLVM.h"
+#include "mlir/CAPI/Registration.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
+
+using namespace mlir;
+using namespace mlir::LLVM;
+
+MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(LLVM, llvm, LLVMDialect)
+
+MlirType mlirLLVMPointerTypeGet(MlirType pointee, unsigned addressSpace) {
+  return wrap(LLVMPointerType::get(unwrap(pointee), addressSpace));
+}
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index 813fe683ae619..775f3a140f823 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -624,24 +624,36 @@ static void genReductionEnd(Merger &merger, CodeGen &codegen,
 /// Recursively generates tensor expression.
 static Value genExp(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter,
                     linalg::GenericOp op, unsigned exp) {
+  Location loc = op.getLoc();
   if (merger.exp(exp).kind == Kind::kTensor)
     return genTensorLoad(merger, codegen, rewriter, op, exp);
-  else if (merger.exp(exp).kind == Kind::kInvariant)
+  if (merger.exp(exp).kind == Kind::kInvariant)
+    return genInvariantValue(merger, codegen, rewriter, exp);
+  if (merger.exp(exp).kind == Kind::kZero) {
+    Type tp = op.getOutputTensorTypes()[0].getElementType();
+    merger.exp(exp).val =
+        rewriter.create<ConstantOp>(loc, tp, rewriter.getZeroAttr(tp));
     return genInvariantValue(merger, codegen, rewriter, exp);
+  }
   Value v0 = genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e0);
   Value v1 = genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e1);
   switch (merger.exp(exp).kind) {
   case Kind::kTensor:
   case Kind::kInvariant:
+  case Kind::kZero:
     llvm_unreachable("handled above");
   case Kind::kMulF:
-    return rewriter.create<MulFOp>(op.getLoc(), v0, v1);
+    return rewriter.create<MulFOp>(loc, v0, v1);
   case Kind::kMulI:
-    return rewriter.create<MulIOp>(op.getLoc(), v0, v1);
+    return rewriter.create<MulIOp>(loc, v0, v1);
   case Kind::kAddF:
-    return rewriter.create<AddFOp>(op.getLoc(), v0, v1);
+    return rewriter.create<AddFOp>(loc, v0, v1);
   case Kind::kAddI:
-    return rewriter.create<AddIOp>(op.getLoc(), v0, v1);
+    return rewriter.create<AddIOp>(loc, v0, v1);
+  case Kind::kSubF:
+    return rewriter.create<SubFOp>(loc, v0, v1);
+  case Kind::kSubI:
+    return rewriter.create<SubIOp>(loc, v0, v1);
   }
   llvm_unreachable("unexpected expression kind");
 }
@@ -671,7 +683,8 @@ static void genInvariants(Merger &merger, CodeGen &codegen,
       merger.exp(exp).val =
           hoist ? genTensorLoad(merger, codegen, rewriter, op, exp) : Value();
     }
-  } else if (merger.exp(exp).kind != Kind::kInvariant) {
+  } else if (merger.exp(exp).kind != Kind::kInvariant &&
+             merger.exp(exp).kind != Kind::kZero) {
     // Traverse into the binary operations. Note that we only hoist
     // tensor loads, since subsequent MLIR/LLVM passes know how to
     // deal with all other kinds of derived loop invariants.
diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
index 6150c15a0ad18..2a1ad9ad56df2 100644
--- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
@@ -14,6 +14,39 @@
 namespace mlir {
 namespace sparse_tensor {
 
+//
+// Constructors.
+//
+
+TensorExp::TensorExp(Kind k, unsigned x, unsigned y, Value v)
+    : kind(k), val(v) {
+  switch (kind) {
+  case Kind::kTensor:
+    assert(x != -1u && y == -1u && !v);
+    tensor = x;
+    break;
+  case Kind::kInvariant:
+    assert(x == -1u && y == -1u && v);
+    break;
+  case Kind::kZero:
+    assert(x == -1u && y == -1u && !v);
+    break;
+  default:
+    assert(x != -1u && y != -1u && !v);
+    children.e0 = x;
+    children.e1 = y;
+    break;
+  }
+}
+
+LatPoint::LatPoint(unsigned n, unsigned e, unsigned b)
+    : bits(n, false), simple(), exp(e) {
+  bits.set(b);
+}
+
+LatPoint::LatPoint(const llvm::BitVector &b, unsigned e)
+    : bits(b), simple(), exp(e) {}
+
 //
 // Lattice methods.
 //
@@ -56,13 +89,28 @@ unsigned Merger::takeConj(Kind kind, unsigned s0, unsigned s1) {
 
 unsigned Merger::takeDisj(Kind kind, unsigned s0, unsigned s1) {
   unsigned s = takeConj(kind, s0, s1);
+  // Followed by all in s0 and s1.
   for (unsigned p : latSets[s0])
     latSets[s].push_back(p);
+  if (Kind::kSubF <= kind && kind <= Kind::kSubI)
+    s1 = mapZero(kind, s1);
   for (unsigned p : latSets[s1])
     latSets[s].push_back(p);
   return s;
 }
 
+unsigned Merger::mapZero(Kind kind, unsigned s0) {
+  assert(Kind::kSubF <= kind && kind <= Kind::kSubI);
+  unsigned s = addSet();
+  unsigned z = addExp(Kind::kZero);
+  for (unsigned p : latSets[s0]) {
+    unsigned e = addExp(kind, z, latPoints[p].exp);
+    latPoints.push_back(LatPoint(latPoints[p].bits, e));
+    latSets[s].push_back(latPoints.size() - 1);
+  }
+  return s;
+}
+
 unsigned Merger::optimizeSet(unsigned s0) {
   unsigned s = addSet();
   assert(latSets[s0].size() != 0);
@@ -93,11 +141,11 @@ unsigned Merger::optimizeSet(unsigned s0) {
   return s;
 }
 
-llvm::BitVector Merger::simplifyCond(unsigned s, unsigned p0) {
+llvm::BitVector Merger::simplifyCond(unsigned s0, unsigned p0) {
   // First determine if this lattice point is a *singleton*, i.e.,
   // the last point in a lattice, no other is less than this one.
   bool isSingleton = true;
-  for (unsigned p1 : latSets[s]) {
+  for (unsigned p1 : latSets[s0]) {
     if (p0 != p1 && latGT(p0, p1)) {
       isSingleton = false;
       break;
@@ -148,6 +196,23 @@ bool Merger::hasAnyDimOf(const llvm::BitVector &bits, Dim d) const {
 // Print methods (for debugging).
 //
 
+static char kindToOpSymbol(Kind kind) {
+  switch (kind) {
+  case Kind::kMulF:
+  case Kind::kMulI:
+    return '*';
+  case Kind::kAddF:
+  case Kind::kAddI:
+    return '+';
+  case Kind::kSubF:
+  case Kind::kSubI:
+    return '-';
+  default:
+    break;
+  }
+  llvm_unreachable("unexpected kind");
+}
+
 void Merger::dumpExp(unsigned e) const {
   switch (tensorExps[e].kind) {
   case Kind::kTensor:
@@ -160,22 +225,15 @@ void Merger::dumpExp(unsigned e) const {
   case Kind::kInvariant:
     llvm::dbgs() << "invariant";
     break;
-  default:
-  case Kind::kMulI:
-    llvm::dbgs() << "(";
-    dumpExp(tensorExps[e].children.e0);
-    llvm::dbgs() << " * ";
-    dumpExp(tensorExps[e].children.e1);
-    llvm::dbgs() << ")";
+  case Kind::kZero:
+    llvm::dbgs() << "zero";
     break;
-  case Kind::kAddF:
-  case Kind::kAddI:
+  default:
     llvm::dbgs() << "(";
     dumpExp(tensorExps[e].children.e0);
-    llvm::dbgs() << " + ";
+    llvm::dbgs() << " " << kindToOpSymbol(tensorExps[e].kind) << " ";
     dumpExp(tensorExps[e].children.e1);
     llvm::dbgs() << ")";
-    break;
   }
 }
 
@@ -184,7 +242,7 @@ void Merger::dumpLat(unsigned p) const {
   dumpBits(latPoints[p].bits);
   llvm::dbgs() << " :";
   dumpBits(latPoints[p].simple);
-  llvm::dbgs() << " / ";
+  llvm::dbgs() << " : ";
   dumpExp(latPoints[p].exp);
   llvm::dbgs() << " )\n";
 }
@@ -230,28 +288,34 @@ void Merger::dumpBits(const llvm::BitVector &bits) const {
 
 unsigned Merger::buildLattices(unsigned e, unsigned idx) {
   Kind kind = tensorExps[e].kind;
-  if (kind == Kind::kTensor || kind == Kind::kInvariant) {
+  switch (kind) {
+  case Kind::kTensor:
+  case Kind::kInvariant:
+  case Kind::kZero: {
     // Either the index is really used in the tensor expression, or it is
     // set to the undefined index in that dimension. An invariant expression
     // is set to a synthetic tensor with undefined indices only.
     unsigned s = addSet();
-    unsigned t =
-        kind == Kind::kTensor ? tensorExps[e].children.e0 : syntheticTensor;
+    unsigned t = kind == Kind::kTensor ? tensorExps[e].tensor : syntheticTensor;
     latSets[s].push_back(addLat(t, idx, e));
     return s;
   }
-  unsigned s0 = buildLattices(tensorExps[e].children.e0, idx);
-  unsigned s1 = buildLattices(tensorExps[e].children.e1, idx);
-  switch (kind) {
-  case Kind::kTensor:
-  case Kind::kInvariant:
-    llvm_unreachable("handled above");
   case Kind::kMulF:
   case Kind::kMulI:
-    return takeConj(kind, s0, s1);
+    return takeConj(kind, // take binary conjunction
+                    buildLattices(tensorExps[e].children.e0, idx),
+                    buildLattices(tensorExps[e].children.e1, idx));
+  case Kind::kSubF:
+  case Kind::kSubI:
+    if (tensorExps[tensorExps[e].children.e0].kind == Kind::kZero)
+      return mapZero(kind, // maps to 0-y with just y's lattices
+                     buildLattices(tensorExps[e].children.e1, idx));
+    LLVM_FALLTHROUGH;
   case Kind::kAddF:
   case Kind::kAddI:
-    return takeDisj(kind, s0, s1);
+    return takeDisj(kind, // take binary disjunction
+                    buildLattices(tensorExps[e].children.e0, idx),
+                    buildLattices(tensorExps[e].children.e1, idx));
   }
   llvm_unreachable("unexpected expression kind");
 }
@@ -281,7 +345,18 @@ Optional<unsigned> Merger::buildTensorExp(linalg::GenericOp op, Value val) {
   Operation *def = val.getDefiningOp();
   if (def->getBlock() != &op.region().front())
     return addExp(Kind::kInvariant, val);
-  // Construct binary operations if subexpressions could be built.
+  // Construct unary operations if subexpression can be built.
+  if (def->getNumOperands() == 1) {
+    auto x = buildTensorExp(op, def->getOperand(0));
+    if (x.hasValue()) {
+      unsigned e0 = addExp(Kind::kZero);
+      unsigned e1 = x.getValue();
+      if (isa<NegFOp>(def))
+        return addExp(Kind::kSubF, e0, e1);
+      // TODO: no negi in std?
+    }
+  }
+  // Construct binary operations if subexpressions can be built.
   if (def->getNumOperands() == 2) {
     auto x = buildTensorExp(op, def->getOperand(0));
     auto y = buildTensorExp(op, def->getOperand(1));
@@ -296,6 +371,10 @@ Optional<unsigned> Merger::buildTensorExp(linalg::GenericOp op, Value val) {
         return addExp(Kind::kAddF, e0, e1);
       if (isa<AddIOp>(def))
         return addExp(Kind::kAddI, e0, e1);
+      if (isa<SubFOp>(def))
+        return addExp(Kind::kSubF, e0, e1);
+      if (isa<SubIOp>(def))
+        return addExp(Kind::kSubI, e0, e1);
     }
   }
   // Cannot build.
diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
index 1a7d2e80d56f7..3342f9a7482bd 100644
--- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
@@ -1816,6 +1816,72 @@ ContractionOpToMatmulOpLowering::matchAndRewrite(vector::ContractionOp op,
   return success();
 }
 
+namespace {
+struct IteratorType {
+  IteratorType(StringRef strRef) : strRef(strRef) {}
+  bool isOfType(Attribute attr) const {
+    auto sAttr = attr.dyn_cast<StringAttr>();
+    return sAttr && sAttr.getValue() == strRef;
+  }
+  StringRef strRef;
+};
+struct Par : public IteratorType {
+  Par() : IteratorType(getParallelIteratorTypeName()) {}
+};
+struct Red : public IteratorType {
+  Red() : IteratorType(getReductionIteratorTypeName()) {}
+};
+
+// Unroll outer-products along reduction.
+struct UnrolledOuterProductEmitter {
+  using MapList = ArrayRef<ArrayRef<AffineExpr>>;
+
+  UnrolledOuterProductEmitter(PatternRewriter &rewriter,
+                              vector::ContractionOp op)
+      : rewriter(rewriter), loc(op.getLoc()), kind(op.kind()),
+        iterators(op.iterator_types()), maps(op.getIndexingMaps()), op(op) {}
+
+  Value t(Value v) {
+    static constexpr std::array<int64_t, 2> perm = {1, 0};
+    return rewriter.create<vector::TransposeOp>(loc, v, perm);
+  }
+
+  bool iters(ArrayRef<IteratorType> its) {
+    if (its.size() != iterators.size())
+      return false;
+    for (int i = 0, e = its.size(); i != e; ++i) {
+      if (!its[i].isOfType(iterators[i]))
+        return false;
+    }
+    return true;
+  }
+
+  bool layout(MapList l) {
+    auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); };
+    return maps == infer(l);
+  }
+
+  LogicalResult outer_prod(Value lhs, Value rhs, Value res, int reductionSize) {
+    assert(reductionSize > 0);
+    for (int64_t k = 0; k < reductionSize; ++k) {
+      Value a = rewriter.create<vector::ExtractOp>(loc, lhs, k);
+      Value b = rewriter.create<vector::ExtractOp>(loc, rhs, k);
+      res = rewriter.create<vector::OuterProductOp>(loc, res.getType(), a, b,
+                                                    res, kind);
+    }
+    rewriter.replaceOp(op, res);
+    return success();
+  }
+
+  PatternRewriter &rewriter;
+  Location loc;
+  vector::CombiningKind kind;
+  ArrayAttr iterators;
+  SmallVector<AffineMap, 4> maps;
+  Operation *op;
+};
+} // namespace
+
 /// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul
 /// semantics to a reduction_size-unrolled sequence:
 /// ```
@@ -1844,104 +1910,68 @@ LogicalResult ContractionOpToOuterProductOpLowering::matchAndRewrite(
   if (failed(filter(op)))
     return failure();
 
-  Location loc = op.getLoc();
-  int64_t reductionSize = 0;
   VectorType lhsType = op.getLhsType();
   Value lhs = op.lhs(), rhs = op.rhs(), res = op.acc();
 
   // Set up the parallel/reduction structure in right form.
-  using MapList = ArrayRef<ArrayRef<AffineExpr>>;
-  auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); };
   AffineExpr m, n, k;
   bindDims(rewriter.getContext(), m, n, k);
-  static constexpr std::array<int64_t, 2> perm = {1, 0};
-  auto iteratorTypes = op.iterator_types().getValue();
-  SmallVector<AffineMap, 4> maps = op.getIndexingMaps();
-  if (isParallelIterator(iteratorTypes[0]) &&
-      isParallelIterator(iteratorTypes[1]) &&
-      isReductionIterator(iteratorTypes[2])) {
-    //
-    // Two outer parallel, one inner reduction (matmat flavor).
-    //
-    if (maps == infer({{m, k}, {k, n}, {m, n}})) {
-      // This is the classical row-major matmul. Just permute the lhs.
-      reductionSize = lhsType.getDimSize(1);
-      lhs = rewriter.create<vector::TransposeOp>(loc, lhs, perm);
-    } else if (maps == infer({{m, k}, {n, k}, {m, n}})) {
-      // TODO: may be better to fail and use some vector<k> -> scalar reduction.
-      reductionSize = lhsType.getDimSize(1);
-      lhs = rewriter.create<vector::TransposeOp>(loc, lhs, perm);
-      rhs = rewriter.create<vector::TransposeOp>(loc, rhs, perm);
-    } else if (maps == infer({{k, m}, {k, n}, {m, n}})) {
-      // No need to permute anything.
-      reductionSize = lhsType.getDimSize(0);
-    } else if (maps == infer({{k, m}, {n, k}, {m, n}})) {
-      // Just permute the rhs.
-      reductionSize = lhsType.getDimSize(0);
-      rhs = rewriter.create<vector::TransposeOp>(loc, rhs, perm);
-    } else if (maps == infer({{m, k}, {k, n}, {n, m}})) {
-      // This is the classical row-major matmul. Just permute the lhs.
-      reductionSize = lhsType.getDimSize(1);
-      Value tmp = rhs;
-      rhs = rewriter.create<vector::TransposeOp>(loc, lhs, perm);
-      lhs = tmp;
-    } else if (maps == infer({{m, k}, {n, k}, {n, m}})) {
-      // TODO: may be better to fail and use some vector<k> -> scalar reduction.
-      reductionSize = lhsType.getDimSize(1);
-      Value tmp = rhs;
-      rhs = rewriter.create<vector::TransposeOp>(loc, lhs, perm);
-      lhs = rewriter.create<vector::TransposeOp>(loc, tmp, perm);
-    } else if (maps == infer({{k, m}, {k, n}, {n, m}})) {
-      // No need to permute anything, but still swap lhs and rhs.
-      reductionSize = lhsType.getDimSize(0);
-      std::swap(lhs, rhs);
-    } else if (maps == infer({{k, m}, {n, k}, {n, m}})) {
-      // Just permute the rhs.
-      reductionSize = lhsType.getDimSize(0);
-      Value tmp = lhs;
-      lhs = rewriter.create<vector::TransposeOp>(loc, rhs, perm);
-      rhs = tmp;
-    } else {
-      return failure();
+
+  //
+  // Two outer parallel, one inner reduction (matmat flavor).
+  //
+  UnrolledOuterProductEmitter e(rewriter, op);
+  if (e.iters({Par(), Par(), Red()})) {
+    // Classical row-major matmul:  Just permute the lhs.
+    if (e.layout({{m, k}, {k, n}, {m, n}}))
+      return e.outer_prod(e.t(lhs), rhs, res, lhsType.getDimSize(1));
+    // TODO: may be better to fail and use some vector<k> -> scalar reduction.
+    if (e.layout({{m, k}, {n, k}, {m, n}})) {
+      Value tlhs = e.t(lhs);
+      return e.outer_prod(tlhs, e.t(rhs), res, lhsType.getDimSize(1));
     }
-  } else if (isParallelIterator(iteratorTypes[0]) &&
-             isReductionIterator(iteratorTypes[1])) {
-    //
-    // One outer parallel, one inner reduction (matvec flavor)
-    //
-    if (maps == infer({{m, n}, {n}, {m}})) {
-      // Case mat-vec: transpose.
-      reductionSize = lhsType.getDimSize(1);
-      lhs = rewriter.create<vector::TransposeOp>(loc, lhs, perm);
-    } else if (maps == infer({{n, m}, {n}, {m}})) {
-      // Case mat-trans-vec: ready to go.
-      reductionSize = lhsType.getDimSize(0);
-    } else if (maps == infer({{n}, {m, n}, {m}})) {
-      // Case vec-mat: swap and transpose.
-      reductionSize = lhsType.getDimSize(0);
-      std::swap(lhs, rhs);
-      lhs = rewriter.create<vector::TransposeOp>(loc, lhs, perm);
-    } else if (maps == infer({{n}, {n, m}, {m}})) {
-      // Case vec-mat-trans: swap and ready to go.
-      reductionSize = lhsType.getDimSize(0);
-      std::swap(lhs, rhs);
-    } else {
-      return failure();
+    // No need to permute anything.
+    if (e.layout({{k, m}, {k, n}, {m, n}}))
+      return e.outer_prod(lhs, rhs, res, lhsType.getDimSize(0));
+    // Just permute the rhs.
+    if (e.layout({{k, m}, {n, k}, {m, n}}))
+      return e.outer_prod(lhs, e.t(rhs), res, lhsType.getDimSize(0));
+    // Transposed output: swap RHS and LHS.
+    // Classical row-major matmul: permute the lhs.
+    if (e.layout({{m, k}, {k, n}, {n, m}}))
+      return e.outer_prod(rhs, e.t(lhs), res, lhsType.getDimSize(1));
+    // TODO: may be better to fail and use some vector<k> -> scalar reduction.
+    if (e.layout({{m, k}, {n, k}, {n, m}})) {
+      Value trhs = e.t(rhs);
+      return e.outer_prod(trhs, e.t(lhs), res, lhsType.getDimSize(1));
     }
-  } else {
+    if (e.layout({{k, m}, {k, n}, {n, m}}))
+      return e.outer_prod(rhs, lhs, res, lhsType.getDimSize(0));
+    if (e.layout({{k, m}, {n, k}, {n, m}}))
+      return e.outer_prod(e.t(rhs), lhs, res, lhsType.getDimSize(0));
     return failure();
   }
-  assert(reductionSize > 0);
-
-  // Unroll outer-products along reduction.
-  for (int64_t k = 0; k < reductionSize; ++k) {
-    Value a = rewriter.create<vector::ExtractOp>(op.getLoc(), lhs, k);
-    Value b = rewriter.create<vector::ExtractOp>(op.getLoc(), rhs, k);
-    res = rewriter.create<vector::OuterProductOp>(op.getLoc(), res.getType(), a,
-                                                  b, res, op.kind());
+
+  //
+  // One outer parallel, one inner reduction (matvec flavor)
+  //
+  if (e.iters({Par(), Red()})) {
+    // Case mat-vec: transpose.
+    if (e.layout({{m, n}, {n}, {m}}))
+      return e.outer_prod(e.t(lhs), rhs, res, lhsType.getDimSize(1));
+    // Case mat-trans-vec: ready to go.
+    if (e.layout({{n, m}, {n}, {m}}))
+      return e.outer_prod(lhs, rhs, res, lhsType.getDimSize(0));
+    // Case vec-mat: swap and transpose.
+    if (e.layout({{n}, {m, n}, {m}}))
+      return e.outer_prod(e.t(rhs), lhs, res, lhsType.getDimSize(0));
+    // Case vec-mat-trans: swap and ready to go.
+    if (e.layout({{n}, {n, m}, {m}}))
+      return e.outer_prod(rhs, lhs, res, lhsType.getDimSize(0));
+    return failure();
   }
-  rewriter.replaceOp(op, res);
-  return success();
+
+  return failure();
 }
 
 LogicalResult
diff --git a/mlir/test/CAPI/CMakeLists.txt b/mlir/test/CAPI/CMakeLists.txt
index 6e377e5b987c5..a0a812936a99f 100644
--- a/mlir/test/CAPI/CMakeLists.txt
+++ b/mlir/test/CAPI/CMakeLists.txt
@@ -25,6 +25,10 @@ _add_capi_test_executable(mlir-capi-ir-test
   ir.c
 )
 
+_add_capi_test_executable(mlir-capi-llvm-test
+  llvm.c
+)
+
 _add_capi_test_executable(mlir-capi-pass-test
   pass.c
 )
diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c
new file mode 100644
index 0000000000000..bbabb6f18898a
--- /dev/null
+++ b/mlir/test/CAPI/llvm.c
@@ -0,0 +1,48 @@
+//===- llvm.c - Test of llvm APIs -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
+// Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: mlir-capi-llvm-test 2>&1 | FileCheck %s
+
+#include "mlir-c/Dialect/LLVM.h"
+#include "mlir-c/IR.h"
+#include "mlir-c/BuiltinTypes.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// CHECK-LABEL: testTypeCreation()
+static void testTypeCreation(MlirContext ctx) {
+  fprintf(stderr, "testTypeCreation()\n");
+  MlirType i32 = mlirIntegerTypeGet(ctx, 32);
+
+  const char *i32p_text = "!llvm.ptr<i32>";
+  MlirType i32p = mlirLLVMPointerTypeGet(i32, 0);
+  MlirType i32p_ref = mlirTypeParseGet(ctx, mlirStringRefCreateFromCString(i32p_text));
+  // CHECK: !llvm.ptr<i32>: 1
+  fprintf(stderr, "%s: %d\n", i32p_text, mlirTypeEqual(i32p, i32p_ref));
+
+  const char *i32p4_text = "!llvm.ptr<i32, 4>";
+  MlirType i32p4 = mlirLLVMPointerTypeGet(i32, 4);
+  MlirType i32p4_ref = mlirTypeParseGet(ctx, mlirStringRefCreateFromCString(i32p4_text));
+  // CHECK: !llvm.ptr<i32, 4>: 1
+  fprintf(stderr, "%s: %d\n", i32p4_text, mlirTypeEqual(i32p4, i32p4_ref));
+}
+
+int main() {
+  MlirContext ctx = mlirContextCreate();
+  mlirDialectHandleRegisterDialect(mlirGetDialectHandle__llvm__(), ctx);
+  mlirContextGetOrLoadDialect(ctx, mlirStringRefCreateFromCString("llvm"));
+  testTypeCreation(ctx);
+  mlirContextDestroy(ctx);
+  return 0;
+}
+
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index 416cfee7efade..1340ec3f786e2 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -66,6 +66,7 @@ set(MLIR_TEST_DEPENDS
   FileCheck count not
   mlir-capi-execution-engine-test
   mlir-capi-ir-test
+  mlir-capi-llvm-test
   mlir-capi-pass-test
   mlir-capi-sparse-tensor-test
   mlir-cpu-runner
diff --git a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
new file mode 100644
index 0000000000000..86a009183c057
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
@@ -0,0 +1,215 @@
+// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+// RUN: mlir-opt %s -sparsification | FileCheck %s
+
+#SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
+
+#trait1 = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> (i)>   // x (out)
+  ],
+  iterator_types = ["parallel"],
+  doc = "x(i) = OP a(i)"
+}
+
+#trait2 = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> (i)>,  // b
+    affine_map<(i) -> (i)>   // x (out)
+  ],
+  iterator_types = ["parallel"],
+  doc = "x(i) = a(i) OP b(i)"
+}
+
+// CHECK-LABEL:   func @neg(
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> {
+// CHECK:           %[[VAL_2:.*]] = constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = constant 0.000000e+00 : f64
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_2]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] {
+// CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xindex>
+// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<?xf64>
+// CHECK:             %[[VAL_14:.*]] = subf %[[VAL_4]], %[[VAL_13]] : f64
+// CHECK:             memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xf64>
+// CHECK:           }
+// CHECK:           %[[VAL_15:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xf64>
+// CHECK:           return %[[VAL_15]] : tensor<32xf64>
+// CHECK:         }
+func @neg(%arga: tensor<32xf64, #SV>,
+          %argx: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> {
+  %0 = linalg.generic #trait1
+     ins(%arga: tensor<32xf64, #SV>)
+    outs(%argx: tensor<32xf64>) {
+      ^bb(%a: f64, %x: f64):
+        %0 = negf %a : f64
+        linalg.yield %0 : f64
+  } -> tensor<32xf64>
+  return %0 : tensor<32xf64>
+}
+
+// CHECK-LABEL:   func @add(
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xf64>,
+// CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> {
+// CHECK:           %[[VAL_3:.*]] = constant 32 : index
+// CHECK:           %[[VAL_4:.*]] = constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = constant true
+// CHECK:           %[[VAL_6:.*]] = constant 1 : index
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
+// CHECK:             %[[VAL_17:.*]] = cmpi ult, %[[VAL_15]], %[[VAL_13]] : index
+// CHECK:             scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_18:.*]]: index, %[[VAL_19:.*]]: index):
+// CHECK:             %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK:             %[[VAL_21:.*]] = cmpi eq, %[[VAL_20]], %[[VAL_19]] : index
+// CHECK:             scf.if %[[VAL_21]] {
+// CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xf64>
+// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<32xf64>
+// CHECK:               %[[VAL_24:.*]] = addf %[[VAL_22]], %[[VAL_23]] : f64
+// CHECK:               memref.store %[[VAL_24]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf64>
+// CHECK:             } else {
+// CHECK:               scf.if %[[VAL_5]] {
+// CHECK:                 %[[VAL_25:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<32xf64>
+// CHECK:                 memref.store %[[VAL_25]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf64>
+// CHECK:               } else {
+// CHECK:               }
+// CHECK:             }
+// CHECK:             %[[VAL_26:.*]] = cmpi eq, %[[VAL_20]], %[[VAL_19]] : index
+// CHECK:             %[[VAL_27:.*]] = addi %[[VAL_18]], %[[VAL_6]] : index
+// CHECK:             %[[VAL_28:.*]] = select %[[VAL_26]], %[[VAL_27]], %[[VAL_18]] : index
+// CHECK:             %[[VAL_29:.*]] = addi %[[VAL_19]], %[[VAL_6]] : index
+// CHECK:             scf.yield %[[VAL_28]], %[[VAL_29]] : index, index
+// CHECK:           }
+// CHECK:           scf.for %[[VAL_30:.*]] = %[[VAL_31:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
+// CHECK:             %[[VAL_32:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_30]]] : memref<32xf64>
+// CHECK:             memref.store %[[VAL_32]], %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<32xf64>
+// CHECK:           }
+// CHECK:           %[[VAL_33:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf64>
+// CHECK:           return %[[VAL_33]] : tensor<32xf64>
+// CHECK:         }
+func @add(%arga: tensor<32xf64, #SV>,
+          %argb: tensor<32xf64>,
+          %argx: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> {
+  %0 = linalg.generic #trait2
+     ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>)
+    outs(%argx: tensor<32xf64>) {
+      ^bb(%a: f64, %b: f64, %x: f64):
+        %0 = addf %a, %b : f64
+        linalg.yield %0 : f64
+  } -> tensor<32xf64>
+  return %0 : tensor<32xf64>
+}
+
+// CHECK-LABEL:   func @sub(
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xf64>,
+// CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> {
+// CHECK:           %[[VAL_3:.*]] = constant 32 : index
+// CHECK:           %[[VAL_4:.*]] = constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = constant true
+// CHECK:           %[[VAL_6:.*]] = constant 1 : index
+// CHECK:           %[[VAL_7:.*]] = constant 0.000000e+00 : f64
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
+// CHECK:             %[[VAL_18:.*]] = cmpi ult, %[[VAL_16]], %[[VAL_14]] : index
+// CHECK:             scf.condition(%[[VAL_18]]) %[[VAL_16]], %[[VAL_17]] : index, index
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_19:.*]]: index, %[[VAL_20:.*]]: index):
+// CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:             %[[VAL_22:.*]] = cmpi eq, %[[VAL_21]], %[[VAL_20]] : index
+// CHECK:             scf.if %[[VAL_22]] {
+// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<?xf64>
+// CHECK:               %[[VAL_24:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_20]]] : memref<32xf64>
+// CHECK:               %[[VAL_25:.*]] = subf %[[VAL_23]], %[[VAL_24]] : f64
+// CHECK:               memref.store %[[VAL_25]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf64>
+// CHECK:             } else {
+// CHECK:               scf.if %[[VAL_5]] {
+// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_20]]] : memref<32xf64>
+// CHECK:                 %[[VAL_27:.*]] = subf %[[VAL_7]], %[[VAL_26]] : f64
+// CHECK:                 memref.store %[[VAL_27]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf64>
+// CHECK:               } else {
+// CHECK:               }
+// CHECK:             }
+// CHECK:             %[[VAL_28:.*]] = cmpi eq, %[[VAL_21]], %[[VAL_20]] : index
+// CHECK:             %[[VAL_29:.*]] = addi %[[VAL_19]], %[[VAL_6]] : index
+// CHECK:             %[[VAL_30:.*]] = select %[[VAL_28]], %[[VAL_29]], %[[VAL_19]] : index
+// CHECK:             %[[VAL_31:.*]] = addi %[[VAL_20]], %[[VAL_6]] : index
+// CHECK:             scf.yield %[[VAL_30]], %[[VAL_31]] : index, index
+// CHECK:           }
+// CHECK:           scf.for %[[VAL_32:.*]] = %[[VAL_33:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
+// CHECK:             %[[VAL_34:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_32]]] : memref<32xf64>
+// CHECK:             %[[VAL_35:.*]] = subf %[[VAL_7]], %[[VAL_34]] : f64
+// CHECK:             memref.store %[[VAL_35]], %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<32xf64>
+// CHECK:           }
+// CHECK:           %[[VAL_36:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xf64>
+// CHECK:           return %[[VAL_36]] : tensor<32xf64>
+// CHECK:         }
+func @sub(%arga: tensor<32xf64, #SV>,
+          %argb: tensor<32xf64>,
+          %argx: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> {
+  %0 = linalg.generic #trait2
+     ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>)
+    outs(%argx: tensor<32xf64>) {
+      ^bb(%a: f64, %b: f64, %x: f64):
+        %0 = subf %a, %b : f64
+        linalg.yield %0 : f64
+  } -> tensor<32xf64>
+  return %0 : tensor<32xf64>
+}
+
+// CHECK-LABEL:   func @mul(
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xf64>,
+// CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> {
+// CHECK:           %[[VAL_3:.*]] = constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
+// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
+// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf64>
+// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_13]]] : memref<32xf64>
+// CHECK:             %[[VAL_16:.*]] = mulf %[[VAL_14]], %[[VAL_15]] : f64
+// CHECK:             memref.store %[[VAL_16]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xf64>
+// CHECK:           }
+// CHECK:           %[[VAL_17:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xf64>
+// CHECK:           return %[[VAL_17]] : tensor<32xf64>
+// CHECK:         }
+func @mul(%arga: tensor<32xf64, #SV>,
+          %argb: tensor<32xf64>,
+          %argx: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> {
+  %0 = linalg.generic #trait2
+     ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>)
+    outs(%argx: tensor<32xf64>) {
+      ^bb(%a: f64, %b: f64, %x: f64):
+        %0 = mulf %a, %b : f64
+        linalg.yield %0 : f64
+  } -> tensor<32xf64>
+  return %0 : tensor<32xf64>
+}
diff --git a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
new file mode 100644
index 0000000000000..f306b66240994
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+// RUN: mlir-opt %s -sparsification | FileCheck %s
+
+#SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
+
+#trait2 = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> (i)>,  // b
+    affine_map<(i) -> (i)>   // x (out)
+  ],
+  iterator_types = ["parallel"],
+  doc = "x(i) = a(i) OP b(i)"
+}
+
+// CHECK-LABEL:   func @add(
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
+// CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64> {linalg.inplaceable = true}) -> tensor<32xi64> {
+// CHECK:           %[[VAL_3:.*]] = constant 32 : index
+// CHECK:           %[[VAL_4:.*]] = constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = constant true
+// CHECK:           %[[VAL_6:.*]] = constant 1 : index
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xi64>
+// CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
+// CHECK:             %[[VAL_17:.*]] = cmpi ult, %[[VAL_15]], %[[VAL_13]] : index
+// CHECK:             scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_18:.*]]: index, %[[VAL_19:.*]]: index):
+// CHECK:             %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK:             %[[VAL_21:.*]] = cmpi eq, %[[VAL_20]], %[[VAL_19]] : index
+// CHECK:             scf.if %[[VAL_21]] {
+// CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xi64>
+// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<32xi64>
+// CHECK:               %[[VAL_24:.*]] = addi %[[VAL_22]], %[[VAL_23]] : i64
+// CHECK:               memref.store %[[VAL_24]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xi64>
+// CHECK:             } else {
+// CHECK:               scf.if %[[VAL_5]] {
+// CHECK:                 %[[VAL_25:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<32xi64>
+// CHECK:                 memref.store %[[VAL_25]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xi64>
+// CHECK:               } else {
+// CHECK:               }
+// CHECK:             }
+// CHECK:             %[[VAL_26:.*]] = cmpi eq, %[[VAL_20]], %[[VAL_19]] : index
+// CHECK:             %[[VAL_27:.*]] = addi %[[VAL_18]], %[[VAL_6]] : index
+// CHECK:             %[[VAL_28:.*]] = select %[[VAL_26]], %[[VAL_27]], %[[VAL_18]] : index
+// CHECK:             %[[VAL_29:.*]] = addi %[[VAL_19]], %[[VAL_6]] : index
+// CHECK:             scf.yield %[[VAL_28]], %[[VAL_29]] : index, index
+// CHECK:           }
+// CHECK:           scf.for %[[VAL_30:.*]] = %[[VAL_31:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
+// CHECK:             %[[VAL_32:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_30]]] : memref<32xi64>
+// CHECK:             memref.store %[[VAL_32]], %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<32xi64>
+// CHECK:           }
+// CHECK:           %[[VAL_33:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xi64>
+// CHECK:           return %[[VAL_33]] : tensor<32xi64>
+// CHECK:         }
+func @add(%arga: tensor<32xi64, #SV>,
+          %argb: tensor<32xi64>,
+          %argx: tensor<32xi64> {linalg.inplaceable = true}) -> tensor<32xi64> {
+  %0 = linalg.generic #trait2
+     ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
+    outs(%argx: tensor<32xi64>) {
+      ^bb(%a: i64, %b: i64, %x: i64):
+        %0 = addi %a, %b : i64
+        linalg.yield %0 : i64
+  } -> tensor<32xi64>
+  return %0 : tensor<32xi64>
+}
+
+// CHECK-LABEL:   func @sub(
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
+// CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64> {linalg.inplaceable = true}) -> tensor<32xi64> {
+// CHECK:           %[[VAL_3:.*]] = constant 32 : index
+// CHECK:           %[[VAL_4:.*]] = constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = constant true
+// CHECK:           %[[VAL_6:.*]] = constant 1 : index
+// CHECK:           %[[VAL_7:.*]] = constant 0 : i64
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xi64>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
+// CHECK:             %[[VAL_18:.*]] = cmpi ult, %[[VAL_16]], %[[VAL_14]] : index
+// CHECK:             scf.condition(%[[VAL_18]]) %[[VAL_16]], %[[VAL_17]] : index, index
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_19:.*]]: index, %[[VAL_20:.*]]: index):
+// CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:             %[[VAL_22:.*]] = cmpi eq, %[[VAL_21]], %[[VAL_20]] : index
+// CHECK:             scf.if %[[VAL_22]] {
+// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<?xi64>
+// CHECK:               %[[VAL_24:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_20]]] : memref<32xi64>
+// CHECK:               %[[VAL_25:.*]] = subi %[[VAL_23]], %[[VAL_24]] : i64
+// CHECK:               memref.store %[[VAL_25]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xi64>
+// CHECK:             } else {
+// CHECK:               scf.if %[[VAL_5]] {
+// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_20]]] : memref<32xi64>
+// CHECK:                 %[[VAL_27:.*]] = subi %[[VAL_7]], %[[VAL_26]] : i64
+// CHECK:                 memref.store %[[VAL_27]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xi64>
+// CHECK:               } else {
+// CHECK:               }
+// CHECK:             }
+// CHECK:             %[[VAL_28:.*]] = cmpi eq, %[[VAL_21]], %[[VAL_20]] : index
+// CHECK:             %[[VAL_29:.*]] = addi %[[VAL_19]], %[[VAL_6]] : index
+// CHECK:             %[[VAL_30:.*]] = select %[[VAL_28]], %[[VAL_29]], %[[VAL_19]] : index
+// CHECK:             %[[VAL_31:.*]] = addi %[[VAL_20]], %[[VAL_6]] : index
+// CHECK:             scf.yield %[[VAL_30]], %[[VAL_31]] : index, index
+// CHECK:           }
+// CHECK:           scf.for %[[VAL_32:.*]] = %[[VAL_33:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
+// CHECK:             %[[VAL_34:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_32]]] : memref<32xi64>
+// CHECK:             %[[VAL_35:.*]] = subi %[[VAL_7]], %[[VAL_34]] : i64
+// CHECK:             memref.store %[[VAL_35]], %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<32xi64>
+// CHECK:           }
+// CHECK:           %[[VAL_36:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xi64>
+// CHECK:           return %[[VAL_36]] : tensor<32xi64>
+// CHECK:         }
+func @sub(%arga: tensor<32xi64, #SV>,
+          %argb: tensor<32xi64>,
+          %argx: tensor<32xi64> {linalg.inplaceable = true}) -> tensor<32xi64> {
+  %0 = linalg.generic #trait2
+     ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
+    outs(%argx: tensor<32xi64>) {
+      ^bb(%a: i64, %b: i64, %x: i64):
+        %0 = subi %a, %b : i64
+        linalg.yield %0 : i64
+  } -> tensor<32xi64>
+  return %0 : tensor<32xi64>
+}
+
+// CHECK-LABEL:   func @mul(
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
+// CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64> {linalg.inplaceable = true}) -> tensor<32xi64> {
+// CHECK:           %[[VAL_3:.*]] = constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xi64>
+// CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
+// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
+// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xi64>
+// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_13]]] : memref<32xi64>
+// CHECK:             %[[VAL_16:.*]] = muli %[[VAL_14]], %[[VAL_15]] : i64
+// CHECK:             memref.store %[[VAL_16]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xi64>
+// CHECK:           }
+// CHECK:           %[[VAL_17:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xi64>
+// CHECK:           return %[[VAL_17]] : tensor<32xi64>
+// CHECK:         }
+func @mul(%arga: tensor<32xi64, #SV>,
+          %argb: tensor<32xi64>,
+          %argx: tensor<32xi64> {linalg.inplaceable = true}) -> tensor<32xi64> {
+  %0 = linalg.generic #trait2
+     ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
+    outs(%argx: tensor<32xi64>) {
+      ^bb(%a: i64, %b: i64, %x: i64):
+        %0 = muli %a, %b : i64
+        linalg.yield %0 : i64
+  } -> tensor<32xi64>
+  return %0 : tensor<32xi64>
+}