diff options
-rw-r--r-- | lib/Transforms/Utils/LoopUnrollRuntime.cpp | 13 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/runtime-loop1.ll | 2 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/tripcount-overflow.ll | 30 |
3 files changed, 42 insertions, 3 deletions
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 4241fcaa880..3d9133684db 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -295,6 +295,10 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) return false; + // If BECount is INT_MAX, we can't compute trip-count without overflow. + if (BECount->isAllOnesValue()) + return false; + // Add 1 since the backedge count doesn't include the first loop iteration const SCEV *TripCountSC = SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); @@ -357,11 +361,16 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; + // If unroll count is 2 and we can't overflow in tripcount computation (which + // is BECount + 1), then we don't need a loop for prologue, and we can unroll + // it. We can be sure that we don't overflow only if tripcount is a constant. + bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount)); + // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. - CloneLoopBlocks(L, ModVal, Count == 2, PH, PEnd, NewBlocks, LoopBlocks, VMap, - LI); + CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks, + VMap, LI); // Insert the cloned blocks into function just before the original loop F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0], diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll index 5ff75e33f7f..38b4f32354a 100644 --- a/test/Transforms/LoopUnroll/runtime-loop1.ll +++ b/test/Transforms/LoopUnroll/runtime-loop1.ll @@ -3,7 +3,7 @@ ; This tests that setting the unroll count works ; CHECK: for.body.prol: -; CHECK: br label %for.body.preheader.split +; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split ; CHECK: for.body: ; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll new file mode 100644 index 00000000000..d59368578ec --- /dev/null +++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; When prologue is fully unrolled, the branch on its end is unconditional. +; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow, +; like in this example, where it comes from an argument. +; +; This test is based on an example from here: +; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out +; +; CHECK: while.body.prol: +; CHECK: br i1 +; CHECK: entry.split: + +; Function Attrs: nounwind readnone ssp uwtable +define i32 @foo(i32 %N) #0 { +entry: + br label %while.body + +while.body: ; preds = %while.body, %entry + %i = phi i32 [ 0, %entry ], [ %inc, %while.body ] + %cmp = icmp eq i32 %i, %N + %inc = add i32 %i, 1 + br i1 %cmp, label %while.end, label %while.body + +while.end: ; preds = %while.body + ret i32 %i +} + +attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } |