15 files changed, 504 insertions, 21 deletions
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
new file mode 100644
index 00000000000..dcbfaaa3d77
--- /dev/null
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -0,0 +1,64 @@
+; RUN: opt %s -deadargelim -S | FileCheck %s
+; PR14016
+
+; Check that debug info metadata for subprograms stores pointers to
+; updated LLVM functions.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@x = global i32 0, align 4
+
+define void @_Z3runv() uwtable {
+entry:
+  call void @_ZN12_GLOBAL__N_18dead_argEPv(i8* null), !dbg !10
+  call void (...)* @_ZN12_GLOBAL__N_111dead_varargEz(), !dbg !12
+  ret void, !dbg !13
+}
+
+; Argument will be deleted
+define internal void @_ZN12_GLOBAL__N_18dead_argEPv(i8* %foo) nounwind uwtable {
+entry:
+  %0 = load i32* @x, align 4, !dbg !14
+  %inc = add nsw i32 %0, 1, !dbg !14
+  store i32 %inc, i32* @x, align 4, !dbg !14
+  ret void, !dbg !16
+}
+
+; Vararg will be deleted
+define internal void @_ZN12_GLOBAL__N_111dead_varargEz(...) nounwind uwtable {
+entry:
+  %0 = load i32* @x, align 4, !dbg !17
+  %inc = add nsw i32 %0, 1, !dbg !17
+  store i32 %inc, i32* @x, align 4, !dbg !17
+  ret void, !dbg !19
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", metadata !"clang version 3.2 (trunk 165305)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !8, metadata !9}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
+!6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
+
+; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
+
+!9 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
+
+; CHECK: metadata !"dead_arg"{{.*}}void ()* @_ZN12_GLOBAL__N_18dead_argEPv
+
+!10 = metadata !{i32 8, i32 14, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !5, i32 8, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
+!12 = metadata !{i32 8, i32 27, metadata !11, null}
+!13 = metadata !{i32 8, i32 42, metadata !11, null}
+!14 = metadata !{i32 4, i32 28, metadata !15, null}
+!15 = metadata !{i32 786443, metadata !9, i32 4, i32 26, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
+!16 = metadata !{i32 4, i32 33, metadata !15, null}
+!17 = metadata !{i32 5, i32 25, metadata !18, null}
+!18 = metadata !{i32 786443, metadata !8, i32 5, i32 23, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
+!19 = metadata !{i32 5, i32 30, metadata !18, null}
diff --git a/test/Transforms/EarlyCSE/commute.ll b/test/Transforms/EarlyCSE/commute.ll
new file mode 100644
index 00000000000..f84a7dd1aae
--- /dev/null
+++ b/test/Transforms/EarlyCSE/commute.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -S -early-cse | FileCheck %s
+
+; CHECK: @test1
+define void @test1(float %A, float %B, float* %PA, float* %PB) {
+  ; CHECK-NEXT: fadd
+  ; CHECK-NEXT: store
+  ; CHECK-NEXT: store
+  ; CHECK-NEXT: ret
+  %C = fadd float %A, %B
+  store float %C, float* %PA
+  %D = fadd float %B, %A
+  store float %D, float* %PB
+  ret void
+}
+
+; CHECK: @test2
+define void @test2(float %A, float %B, i1* %PA, i1* %PB) {
+  ; CHECK-NEXT: fcmp
+  ; CHECK-NEXT: store
+  ; CHECK-NEXT: store
+  ; CHECK-NEXT: ret
+  %C = fcmp eq float %A, %B
+  store i1 %C, i1* %PA
+  %D = fcmp eq float %B, %A
+  store i1 %D, i1* %PB
+  ret void
+}
+
+; CHECK: @test3
+define void @test3(float %A, float %B, i1* %PA, i1* %PB) {
+  ; CHECK-NEXT: fcmp
+  ; CHECK-NEXT: store
+  ; CHECK-NEXT: store
+  ; CHECK-NEXT: ret
+  %C = fcmp uge float %A, %B
+  store i1 %C, i1* %PA
+  %D = fcmp ule float %B, %A
+  store i1 %D, i1* %PB
+  ret void
+}
+
+; CHECK: @test4
+define void @test4(i32 %A, i32 %B, i1* %PA, i1* %PB) {
+  ; CHECK-NEXT: icmp
+  ; CHECK-NEXT: store
+  ; CHECK-NEXT: store
+  ; CHECK-NEXT: ret
+  %C = icmp eq i32 %A, %B
+  store i1 %C, i1* %PA
+  %D = icmp eq i32 %B, %A
+  store i1 %D, i1* %PB
+  ret void
+}
+
+; CHECK: @test5
+define void @test5(i32 %A, i32 %B, i1* %PA, i1* %PB) {
+  ; CHECK-NEXT: icmp
+  ; CHECK-NEXT: store
+  ; CHECK-NEXT: store
+  ; CHECK-NEXT: ret
+  %C = icmp sgt i32 %A, %B
+  store i1 %C, i1* %PA
+  %D = icmp slt i32 %B, %A
+  store i1 %D, i1* %PB
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll b/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
index 708a961272b..0c88e83975c 100644
--- a/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
+++ b/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
@@ -39,11 +39,11 @@
 	%"struct.llvm::SymbolTable" = type opaque
 	%"struct.llvm::SymbolTableListTraits<llvm::Argument,llvm::Function,llvm::Function,llvm::ilist_traits<llvm::Argument> >" = type { %"struct.llvm::Function"*, %"struct.llvm::Function"* }
 	%"struct.llvm::SymbolTableListTraits<llvm::Instruction,llvm::BasicBlock,llvm::Function,llvm::ilist_traits<llvm::Instruction> >" = type { %"struct.llvm::Function"*, %"struct.llvm::BasicBlock"* }
-	%"struct.llvm::TargetData" = type { %"struct.llvm::FunctionPass", i1, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%"struct.llvm::DataLayout" = type { %"struct.llvm::FunctionPass", i1, i8, i8, i8, i8, i8, i8, i8, i8 }
 	%"struct.llvm::TargetFrameInfo" = type { i32 (...)**, i32, i32, i32 }
 	%"struct.llvm::TargetInstrDescriptor" = type { i8*, i32, i32, i32, i1, i32, i32, i32, i32, i32, i32*, i32* }
 	%"struct.llvm::TargetInstrInfo" = type { i32 (...)**, %"struct.llvm::TargetInstrDescriptor"*, i32, i32 }
-	%"struct.llvm::TargetMachine" = type { i32 (...)**, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >", %"struct.llvm::TargetData", %"struct.llvm::IntrinsicLowering"* }
+	%"struct.llvm::TargetMachine" = type { i32 (...)**, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >", %"struct.llvm::DataLayout", %"struct.llvm::IntrinsicLowering"* }
 	%"struct.llvm::TargetRegClassInfo" = type { i32 (...)**, i32, i32, i32 }
 	%"struct.llvm::TargetRegInfo" = type { i32 (...)**, %"struct.std::vector<const llvm::TargetRegClassInfo*,std::allocator<const llvm::TargetRegClassInfo*> >", %"struct.llvm::TargetMachine"* }
 	%"struct.llvm::Type" = type { %"struct.llvm::Value", i32, i32, i1, i32, %"struct.llvm::Type"*, %"struct.std::vector<llvm::PATypeHandle,std::allocator<llvm::PATypeHandle> >" }
diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll
index 3335be781df..62af42b9d68 100644
--- a/test/Transforms/IndVarSimplify/crash.ll
+++ b/test/Transforms/IndVarSimplify/crash.ll
@@ -87,3 +87,29 @@ entry:
 main.f.exit:                                      ; preds = %"3.i"
   unreachable
 }
+
+
+; PR13967
+
+define void @f() nounwind ssp {
+bb:
+  br label %bb4
+
+bb4:
+  %tmp = phi i64 [ %tmp5, %bb7 ], [ undef, %bb ]
+  %tmp5 = add nsw i64 %tmp, 1
+  %extract.t1 = trunc i64 %tmp5 to i32
+  br i1 false, label %bb6, label %bb7
+
+bb6:
+  br label %bb7
+
+bb7:
+  %.off0 = phi i32 [ undef, %bb6 ], [ %extract.t1, %bb4 ]
+  %tmp8 = icmp eq i32 %.off0, 0
+  br i1 %tmp8, label %bb9, label %bb4
+
+bb9:
+  ret void
+}
+
diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll
index 27916b98603..4ea1bd9beb3 100644
--- a/test/Transforms/InstCombine/align-addr.ll
+++ b/test/Transforms/InstCombine/align-addr.ll
@@ -58,3 +58,19 @@ define double @test2(double* %p, double %n) nounwind {
   store double %n, double* %p
   ret double %t
 }
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @use(i8*)
+
+%struct.s = type { i32, i32, i32, i32 }
+
+define void @test3(%struct.s* sret %a4) {
+; Check that the alignment is bumped up the alignment of the sret type.
+; CHECK: @test3
+  %a4.cast = bitcast %struct.s* %a4 to i8*
+  call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i32 4, i1 false)
+  call void @use(i8* %a4.cast)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/and-fcmp.ll b/test/Transforms/InstCombine/and-fcmp.ll
index 838c2f73fb9..40c44c09a8c 100644
--- a/test/Transforms/InstCombine/and-fcmp.ll
+++ b/test/Transforms/InstCombine/and-fcmp.ll
@@ -10,7 +10,7 @@ define zeroext i8 @t1(float %x, float %y) nounwind {
 ; CHECK: fcmp oeq float %x, %y
 ; CHECK-NOT: fcmp ueq float %x, %y
 ; CHECK-NOT: fcmp ord float %x, %y
-; CHECK-NOW: and
+; CHECK-NOT: and
 }
 
 define zeroext i8 @t2(float %x, float %y) nounwind {
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 61ba3c7e6cc..597b69dee3d 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -9,11 +9,11 @@ declare void @g(%a*)
 define float @f() {
 entry:
   %a_var = alloca %a
-  %b_var = alloca %b
+  %b_var = alloca %b, align 1
   call void @g(%a* %a_var)
   %a_i8 = bitcast %a* %a_var to i8*
   %b_i8 = bitcast %b* %b_var to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 1, i1 false)
   %tmp1 = getelementptr %b* %b_var, i32 0, i32 0
   %tmp2 = load float* %tmp1
   ret float %tmp2
diff --git a/test/Transforms/MemCpyOpt/align.ll b/test/Transforms/MemCpyOpt/align.ll
index b1f900d9da4..1b98f6ad383 100644
--- a/test/Transforms/MemCpyOpt/align.ll
+++ b/test/Transforms/MemCpyOpt/align.ll
@@ -1,12 +1,15 @@
-; RUN: opt < %s -S -memcpyopt | FileCheck %s
+; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
 ; The resulting memset is only 4-byte aligned, despite containing
 ; a 16-byte aligned store in the middle.
 
-; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
-
 define void @foo(i32* %p) {
+; CHECK: @foo
+; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
   %a0 = getelementptr i32* %p, i64 0
   store i32 0, i32* %a0, align 4
   %a1 = getelementptr i32* %p, i64 1
@@ -17,3 +20,18 @@ define void @foo(i32* %p) {
   store i32 0, i32* %a3, align 4
   ret void
 }
+
+; Replacing %a8 with %a4 in the memset requires boosting the alignment of %a4.
+
+define void @bar() {
+; CHECK: @bar
+; CHECK: %a4 = alloca i32, align 8
+; CHECK-NOT: memcpy
+  %a4 = alloca i32, align 4
+  %a8 = alloca i32, align 8
+  %a8.cast = bitcast i32* %a8 to i8*
+  %a4.cast = bitcast i32* %a4 to i8*
+  call void @llvm.memset.p0i8.i64(i8* %a8.cast, i8 0, i64 4, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a4.cast, i8* %a8.cast, i64 4, i32 4, i1 false)
+  ret void
+}
diff --git a/test/Transforms/SROA/alignment.ll b/test/Transforms/SROA/alignment.ll
index 4f35a8a0ee3..945ad910021 100644
--- a/test/Transforms/SROA/alignment.ll
+++ b/test/Transforms/SROA/alignment.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 
@@ -31,8 +31,8 @@ entry:
 define void @test2() {
 ; CHECK: @test2
 ; CHECK: alloca i16
-; CHECK: load i8* %{{.*}}, align 1
-; CHECK: store i8 42, i8* %{{.*}}, align 1
+; CHECK: load i8* %{{.*}}
+; CHECK: store i8 42, i8* %{{.*}}
 ; CHECK: ret void
 
 entry:
@@ -41,8 +41,8 @@ entry:
   %cast1 = bitcast i8* %gep1 to i16*
   store volatile i16 0, i16* %cast1
   %gep2 = getelementptr { i8, i8, i8, i8 }* %a, i32 0, i32 2
-  %result = load i8* %gep2, align 2
-  store i8 42, i8* %gep2, align 2
+  %result = load i8* %gep2
+  store i8 42, i8* %gep2
   ret void
 }
 
@@ -114,3 +114,89 @@ entry:
 
   ret void
 }
+
+define void @test5() {
+; Test that we preserve underaligned loads and stores when splitting.
+; CHECK: @test5
+; CHECK: alloca [9 x i8]
+; CHECK: alloca [9 x i8]
+; CHECK: store volatile double 0.0{{.*}}, double* %{{.*}}, align 1
+; CHECK: load i16* %{{.*}}, align 1
+; CHECK: load double* %{{.*}}, align 1
+; CHECK: store volatile double %{{.*}}, double* %{{.*}}, align 1
+; CHECK: load i16* %{{.*}}, align 1
+; CHECK: ret void
+
+entry:
+  %a = alloca [18 x i8]
+  %raw1 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 0
+  %ptr1 = bitcast i8* %raw1 to double*
+  store volatile double 0.0, double* %ptr1, align 1
+  %weird_gep1 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 7
+  %weird_cast1 = bitcast i8* %weird_gep1 to i16*
+  %weird_load1 = load i16* %weird_cast1, align 1
+
+  %raw2 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 9
+  %ptr2 = bitcast i8* %raw2 to double*
+  %d1 = load double* %ptr1, align 1
+  store volatile double %d1, double* %ptr2, align 1
+  %weird_gep2 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 16
+  %weird_cast2 = bitcast i8* %weird_gep2 to i16*
+  %weird_load2 = load i16* %weird_cast2, align 1
+
+  ret void
+}
+
+define void @test6() {
+; Test that we promote alignment when the underlying alloca switches to one
+; that innately provides it.
+; CHECK: @test6
+; CHECK: alloca double
+; CHECK: alloca double
+; CHECK-NOT: align
+; CHECK: ret void
+
+entry:
+  %a = alloca [16 x i8]
+  %raw1 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 0
+  %ptr1 = bitcast i8* %raw1 to double*
+  store volatile double 0.0, double* %ptr1, align 1
+
+  %raw2 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 8
+  %ptr2 = bitcast i8* %raw2 to double*
+  %val = load double* %ptr1, align 1
+  store volatile double %val, double* %ptr2, align 1
+
+  ret void
+}
+
+define void @test7(i8* %out) {
+; Test that we properly compute the destination alignment when rewriting
+; memcpys as direct loads or stores.
+; CHECK: @test7
+; CHECK-NOT: alloca
+
+entry:
+  %a = alloca [16 x i8]
+  %raw1 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 0
+  %ptr1 = bitcast i8* %raw1 to double*
+  %raw2 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 8
+  %ptr2 = bitcast i8* %raw2 to double*
+
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i32 0, i1 false)
+; CHECK: %[[val2:.*]] = load double* %{{.*}}, align 1
+; CHECK: %[[val1:.*]] = load double* %{{.*}}, align 1
+
+  %val1 = load double* %ptr2, align 1
+  %val2 = load double* %ptr1, align 1
+
+  store double %val1, double* %ptr1, align 1
+  store double %val2, double* %ptr2, align 1
+
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %raw1, i32 16, i32 0, i1 false)
+; CHECK: store double %[[val1]], double* %{{.*}}, align 1
+; CHECK: store double %[[val2]], double* %{{.*}}, align 1
+
+  ret void
+; CHECK: ret void
+}
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index e58cef63bad..e7767ef5e96 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 ; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
 
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 declare void @llvm.lifetime.start(i64, i8* nocapture)
 declare void @llvm.lifetime.end(i64, i8* nocapture)
@@ -862,8 +862,7 @@ define void @PR13916.1() {
 ; Ensure that we handle overlapping memcpy intrinsics correctly, especially in
 ; the case where there is a directly identical value for both source and dest.
 ; CHECK: @PR13916.1
-; FIXME: We shouldn't leave this alloca around.
-; CHECK: alloca
+; CHECK-NOT: alloca
 ; CHECK: ret void
 
 entry:
@@ -878,8 +877,7 @@ define void @PR13916.2() {
 ; different pointer value chains, but during rewriting we coalesce them into the
 ; same value.
 ; CHECK: @PR13916.2
-; FIXME: We shouldn't leave this alloca around.
-; CHECK: alloca
+; CHECK-NOT: alloca
 ; CHECK: ret void
 
 entry:
@@ -897,3 +895,76 @@ if.end:
   %tmp2 = load i8* %gep
   ret void
 }
+
+define void @PR13990() {
+; Ensure we can handle cases where processing one alloca causes the other
+; alloca to become dead and get deleted. This might crash or fail under
+; Valgrind if we regress.
+; CHECK: @PR13990
+; CHECK-NOT: alloca
+; CHECK: unreachable
+; CHECK: unreachable
+
+entry:
+  %tmp1 = alloca i8*
+  %tmp2 = alloca i8*
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  store i8* undef, i8** %tmp2
+  br i1 undef, label %bb2, label %bb3
+
+bb2:
+  %tmp50 = select i1 undef, i8** %tmp2, i8** %tmp1
+  br i1 undef, label %bb3, label %bb4
+
+bb3:
+  unreachable
+
+bb4:
+  unreachable
+}
+
+define double @PR13969(double %x) {
+; Check that we detect when promotion will un-escape an alloca and iterate to
+; re-try running SROA over that alloca. Without that, the two allocas that are
+; stored into a dead alloca don't get rewritten and promoted.
+; CHECK: @PR13969
+
+entry:
+  %a = alloca double
+  %b = alloca double*
+  %c = alloca double
+; CHECK-NOT: alloca
+
+  store double %x, double* %a
+  store double* %c, double** %b
+  store double* %a, double** %b
+  store double %x, double* %c
+  %ret = load double* %a
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+  ret double %ret
+; CHECK: ret double %x
+}
+
+%PR14034.struct = type { { {} }, i32, %PR14034.list }
+%PR14034.list = type { %PR14034.list*, %PR14034.list* }
+
+define void @PR14034() {
+; This test case tries to form GEPs into the empty leading struct members, and
+; subsequently crashed (under valgrind) before we fixed the PR. The important
+; thing is to handle empty structs gracefully.
+; CHECK: @PR14034
+
+entry:
+  %a = alloca %PR14034.struct
+  %list = getelementptr %PR14034.struct* %a, i32 0, i32 2
+  %prev = getelementptr %PR14034.list* %list, i32 0, i32 1
+  store %PR14034.list* undef, %PR14034.list** %prev
+  %cast0 = bitcast %PR14034.struct* undef to i8*
+  %cast1 = bitcast %PR14034.struct* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i32 0, i1 false)
+  ret void
+}
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
new file mode 100644
index 00000000000..532f8690cf9
--- /dev/null
+++ b/test/Transforms/SROA/big-endian.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define i8 @test1() {
+; We fully promote these to the i24 load or store size, resulting in just masks
+; and other operations that instcombine will fold, but no alloca. Note this is
+; the same as test12 in basictest.ll, but here we assert big-endian byte
+; ordering.
+;
+; CHECK: @test1
+
+entry:
+  %a = alloca [3 x i8]
+  %b = alloca [3 x i8]
+; CHECK-NOT: alloca
+
+  %a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
+  store i8 0, i8* %a0ptr
+  %a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
+  store i8 0, i8* %a1ptr
+  %a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
+  store i8 0, i8* %a2ptr
+  %aiptr = bitcast [3 x i8]* %a to i24*
+  %ai = load i24* %aiptr
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+; CHECK:      %[[mask0:.*]] = and i24 undef, 65535
+; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281
+; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], -256
+
+  %biptr = bitcast [3 x i8]* %b to i24*
+  store i24 %ai, i24* %biptr
+  %b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0
+  %b0 = load i8* %b0ptr
+  %b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1
+  %b1 = load i8* %b1ptr
+  %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
+  %b2 = load i8* %b2ptr
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+; CHECK:      %[[shift0:.*]] = lshr i24 %[[mask2]], 16
+; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8
+; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8
+; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
+; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[mask2]] to i8
+
+  %bsum0 = add i8 %b0, %b1
+  %bsum1 = add i8 %bsum0, %b2
+  ret i8 %bsum1
+; CHECK:      %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
+; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
+; CHECK-NEXT: ret i8 %[[sum1]]
+}
+
+define i64 @test2() {
+; Test for various mixed sizes of integer loads and stores all getting
+; promoted.
+;
+; CHECK: @test2
+
+entry:
+  %a = alloca [7 x i8]
+; CHECK-NOT: alloca
+
+  %a0ptr = getelementptr [7 x i8]* %a, i64 0, i32 0
+  %a1ptr = getelementptr [7 x i8]* %a, i64 0, i32 1
+  %a2ptr = getelementptr [7 x i8]* %a, i64 0, i32 2
+  %a3ptr = getelementptr [7 x i8]* %a, i64 0, i32 3
+
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+
+  %a0i16ptr = bitcast i8* %a0ptr to i16*
+  store i16 1, i16* %a0i16ptr
+; CHECK:      %[[mask:.*]] = and i56 undef, 1099511627775
+; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1099511627776
+
+  %a1i4ptr = bitcast i8* %a1ptr to i4*
+  store i4 1, i4* %a1i4ptr
+; CHECK:      %[[mask:.*]] = and i56 %[[or]], -16492674416641
+; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1099511627776
+
+  store i8 1, i8* %a2ptr
+; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -1095216660481
+; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 4294967296
+
+  %a3i24ptr = bitcast i8* %a3ptr to i24*
+  store i24 1, i24* %a3i24ptr
+; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -4294967041
+; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 256
+
+  %a2i40ptr = bitcast i8* %a2ptr to i40*
+  store i40 1, i40* %a2i40ptr
+; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -1099511627776
+; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1
+
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+
+  %aiptr = bitcast [7 x i8]* %a to i56*
+  %ai = load i56* %aiptr
+  %ret = zext i56 %ai to i64
+  ret i64 %ret
+; CHECK:      %[[ret:.*]] = zext i56 %[[or]] to i64
+; CHECK-NEXT: ret i64 %[[ret]]
+}
diff --git a/test/Transforms/SROA/fca.ll b/test/Transforms/SROA/fca.ll
index 6ddaed2f30c..c30a5cc974f 100644
--- a/test/Transforms/SROA/fca.ll
+++ b/test/Transforms/SROA/fca.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 ; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 define { i32, i32 } @test0(i32 %x, i32 %y) {
 ; CHECK: @test0
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index 6f5833a772f..2b0724c7fd4 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 define i32 @test1() {
 ; CHECK: @test1
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index 9cbab385c21..80757475a5d 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 %S1 = type { i64, [42 x float] }
 
diff --git a/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll b/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
index 65d888ea01e..028fb074563 100644
--- a/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
+++ b/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
@@ -85,3 +85,31 @@ if.end7:                                          ; preds = %if.else, %if.then4,
 ; CHECK: if.end7:
 ; CHECK: phi i32* [ %a, %if.then ], [ null, %if.then4 ], [ null, %if.else ]
 }
+
+define i32 @test4(i32* %a, i32 %b, i32* %c, i32 %d) nounwind {
+entry:
+  %tobool = icmp eq i32 %b, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @bar() nounwind
+  br label %if.end7
+
+if.else:                                          ; preds = %entry
+  %tobool3 = icmp eq i32 %d, 0
+  br i1 %tobool3, label %if.end7, label %if.then4
+
+if.then4:                                         ; preds = %if.else
+  tail call void @bar() nounwind
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.else, %if.then4, %if.then
+  %x.0 = phi i32* [ %a, %if.then ], [ null, %if.then4 ], [ null, %if.else ]
+  %gep = getelementptr i32* %x.0, i32 10
+  %tmp9 = load i32* %gep
+  %tmp10 = or i32 %tmp9, 1
+  store i32 %tmp10, i32* %gep
+  ret i32 %tmp9
+; CHECK: @test4
+; CHECK-NOT: phi
+}