summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2016-02-15 17:19:33 -0500
committerNicolai Hähnle <nicolai.haehnle@amd.com>2016-04-06 13:30:25 -0500
commit5f553c71f94a651a58a3df7d96f355456bad0ad6 (patch)
tree7518c5119745e6db7b29d04d6af074c17c2351e1
parent036a6e8ef008ddd8f9c5000442ce86116333fa35 (diff)
[Sink] Don't move calls to readonly functions across stores
Summary: This is conservative because it doesn't consider the argmemonly attribute. Perhaps somebody who knows this part of the code better can improve this in a separate patch. For now, it fixes a real miscompilation I encountered in the AMDGPU backend. Reviewers: hfinkel, majnemer, tstellarAMD, sunfish Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D17279
-rw-r--r--lib/Transforms/Scalar/Sink.cpp7
-rw-r--r--test/Transforms/Sink/call.ll68
2 files changed, 73 insertions, 2 deletions
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index eb09f584dc6..c4bf2be1906 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -175,11 +175,14 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
Inst->mayThrow())
return false;
- // Convergent operations cannot be made control-dependent on additional
- // values.
if (auto CS = CallSite(Inst)) {
+ // Convergent operations cannot be made control-dependent on additional
+ // values.
if (CS.hasFnAttr(Attribute::Convergent))
return false;
+
+ if (Inst->mayReadFromMemory() && !Stores.empty())
+ return false;
}
return true;
diff --git a/test/Transforms/Sink/call.ll b/test/Transforms/Sink/call.ll
new file mode 100644
index 00000000000..263b88ca153
--- /dev/null
+++ b/test/Transforms/Sink/call.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -basicaa -sink -S | FileCheck %s
+
+declare i32 @f_load_global() nounwind readonly
+declare void @f_store_global(i32) nounwind
+declare i32 @f_readnone(i32) nounwind readnone
+
+@A = external global i32
+
+; Sink readonly call if no stores are in the way.
+;
+; CHECK-LABEL: @test1(
+; CHECK: true:
+; CHECK-NEXT: %l = call i32 @f_load_global
+; CHECK-NEXT: ret i32 %l
+define i32 @test1(i1 %z) {
+ %l = call i32 @f_load_global()
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}
+
+; But don't sink if there is a store ...
+;
+; CHECK-LABEL: @test2(
+; CHECK: call i32 @f_load_global
+; CHECK-NEXT: store i32
+define i32 @test2(i1 %z) {
+ %l = call i32 @f_load_global()
+ store i32 0, i32* @A
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}
+
+; ... or a non-readonly call
+;
+; CHECK-LABEL: @test3(
+; CHECK: call i32 @f_load_global
+; CHECK-NEXT: call void @f_store_global
+define i32 @test3(i1 %z) {
+ %l = call i32 @f_load_global()
+ call void @f_store_global(i32 0)
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}
+
+; readnone calls are sunk across stores.
+;
+; CHECK-LABEL: @test4(
+; CHECK: true:
+; CHECK-NEXT: %l = call i32 @f_readnone(
+; CHECK-NEXT: ret i32 %l
+define i32 @test4(i1 %z) {
+ %l = call i32 @f_readnone(i32 0)
+ store i32 0, i32* @A
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}