summaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-04-15 22:00:26 +0000
committerNadav Rotem <nrotem@apple.com>2013-04-15 22:00:26 +0000
commite9a4411db4d3a05965630f668daf8071bf2d3513 (patch)
tree910f2e83862c8fe7ce21f401a16d00e9ebaf6ffd /lib/Transforms/Vectorize
parentc9363ef67a5716a282bd7b4810c61da522bb7fd9 (diff)
SLPVectorizer: Make it a function pass and add code for hoisting the vector-gather sequence out of loops.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179562 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize')
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp367
-rw-r--r--lib/Transforms/Vectorize/VecUtils.cpp9
-rw-r--r--lib/Transforms/Vectorize/VecUtils.h37
3 files changed, 254 insertions, 159 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ea33801fd21..6d4c36aacdc 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -45,13 +46,13 @@ SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
namespace {
/// The SLPVectorizer Pass.
-struct SLPVectorizer : public BasicBlockPass {
+struct SLPVectorizer : public FunctionPass {
typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
/// Pass identification, replacement for typeid
static char ID;
- explicit SLPVectorizer() : BasicBlockPass(ID) {
+ explicit SLPVectorizer() : FunctionPass(ID) {
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
}
@@ -59,182 +60,256 @@ struct SLPVectorizer : public BasicBlockPass {
DataLayout *DL;
TargetTransformInfo *TTI;
AliasAnalysis *AA;
+ LoopInfo *LI;
+
+ virtual bool runOnFunction(Function &F) {
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ TTI = &getAnalysis<TargetTransformInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ LI = &getAnalysis<LoopInfo>();
+
+ StoreRefs.clear();
+ bool Changed = false;
+
+ // Must have DataLayout. We can't require it because some tests run w/o
+ // triple.
+ if (!DL)
+ return false;
+
+ for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
+ BasicBlock *BB = it;
+ bool BBChanged = false;
+
+ // Use the bollom up slp vectorizer to construct chains that start with
+ // he store instructions.
+ BoUpSLP R(BB, SE, DL, TTI, AA);
+
+ // Vectorize trees that end at reductions.
+ BBChanged |= vectorizeReductions(BB, R);
+
+ // Vectorize trees that end at stores.
+ if (collectStores(BB, R)) {
+ DEBUG(dbgs()<<"SLP: Found stores to vectorize.\n");
+ BBChanged |= vectorizeStoreChains(R);
+ }
+
+ // Try to hoist some of the scalarization code to the preheader.
+ if (BBChanged) hoistGatherSequence(LI, BB, R);
+
+ Changed |= BBChanged;
+ }
+
+ if (Changed) {
+ DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
+ DEBUG(verifyFunction(F));
+ }
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<LoopInfo>();
+ }
+
+private:
/// \brief Collect memory references and sort them according to their base
/// object. We sort the stores to their base objects to reduce the cost of the
/// quadratic search on the stores. TODO: We can further reduce this cost
/// if we flush the chain creation every time we run into a memory barrier.
- bool collectStores(BasicBlock *BB, BoUpSLP &R) {
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- StoreInst *SI = dyn_cast<StoreInst>(it);
- if (!SI)
- continue;
+ bool collectStores(BasicBlock *BB, BoUpSLP &R);
- // Check that the pointer points to scalars.
- if (SI->getValueOperand()->getType()->isAggregateType())
- return false;
+ /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
+ bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
- // Find the base of the GEP.
- Value *Ptr = SI->getPointerOperand();
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
- Ptr = GEP->getPointerOperand();
+ /// \brief Try to vectorize a chain that may start at the operands of \V;
+ bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
- // Save the store locations.
- StoreRefs[Ptr].push_back(SI);
- }
- return true;
+ /// \brief Vectorize the stores that were collected in StoreRefs.
+ bool vectorizeStoreChains(BoUpSLP &R);
+
+ /// \brief Try to hoist gather sequences outside of the loop in cases where
+ /// all of the sources are loop invariant.
+ void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
+
+ /// \brief Scan the basic block and look for reductions that may start a
+ /// vectorization chain.
+ bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R);
+
+private:
+ StoreListMap StoreRefs;
+};
+
+bool SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
+ StoreRefs.clear();
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ StoreInst *SI = dyn_cast<StoreInst>(it);
+ if (!SI)
+ continue;
+
+ // Check that the pointer points to scalars.
+ if (SI->getValueOperand()->getType()->isAggregateType())
+ return false;
+
+ // Find the base of the GEP.
+ Value *Ptr = SI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+ Ptr = GEP->getPointerOperand();
+
+ // Save the store locations.
+ StoreRefs[Ptr].push_back(SI);
}
+ return true;
+}
+
+bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
+ if (!A || !B) return false;
+ BoUpSLP::ValueList VL;
+ VL.push_back(A);
+ VL.push_back(B);
+ int Cost = R.getTreeCost(VL);
+ int ExtrCost = R.getScalarizationCost(VL);
+ DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
+ " Cost of extract:" << ExtrCost << ".\n");
+ if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
+ DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
+ R.vectorizeArith(VL);
+ return true;
+}
- bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
- if (!A || !B) return false;
- BoUpSLP::ValueList VL;
- VL.push_back(A);
- VL.push_back(B);
- int Cost = R.getTreeCost(VL);
- int ExtrCost = R.getScalarizationCost(VL);
- DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
- " Cost of extract:" << ExtrCost << ".\n");
- if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
- DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
- R.vectorizeArith(VL);
+bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
+ if (!V) return false;
+ // Try to vectorize V.
+ if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
return true;
- }
- bool tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
- if (!V) return false;
- // Try to vectorize V.
- if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+ BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+ BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
+ // Try to skip B.
+ if (B && B->hasOneUse()) {
+ BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
+ BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
+ if (tryToVectorizePair(A, B0, R)) {
+ B->moveBefore(V);
return true;
-
- BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
- BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
- // Try to skip B.
- if (B && B->hasOneUse()) {
- BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
- BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
- if (tryToVectorizePair(A, B0, R)) {
- B->moveBefore(V);
- return true;
- }
- if (tryToVectorizePair(A, B1, R)) {
- B->moveBefore(V);
- return true;
- }
}
-
- // Try to slip A.
- if (A && A->hasOneUse()) {
- BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
- BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
- if (tryToVectorizePair(A0, B, R)) {
- A->moveBefore(V);
- return true;
- }
- if (tryToVectorizePair(A1, B, R)) {
- A->moveBefore(V);
- return true;
- }
+ if (tryToVectorizePair(A, B1, R)) {
+ B->moveBefore(V);
+ return true;
}
- return 0;
}
- bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
- bool Changed = false;
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- if (isa<DbgInfoIntrinsic>(it)) continue;
-
- // Try to vectorize reductions that use PHINodes.
- if (PHINode *P = dyn_cast<PHINode>(it)) {
- // Check that the PHI is a reduction PHI.
- if (P->getNumIncomingValues() != 2) return Changed;
- Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
- (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
- 0));
- // Check if this is a Binary Operator.
- BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
- if (!BI)
- continue;
-
- Value *Inst = BI->getOperand(0);
- if (Inst == P) Inst = BI->getOperand(1);
- Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
- continue;
- }
-
- // Try to vectorize trees that start at compare instructions.
- if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
- if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
- Changed |= true;
- continue;
- }
- for (int i = 0; i < 2; ++i)
- if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
- Changed |= tryToVectorize(BI, R);
- continue;
- }
+ // Try to slip A.
+ if (A && A->hasOneUse()) {
+ BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
+ BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
+ if (tryToVectorizePair(A0, B, R)) {
+ A->moveBefore(V);
+ return true;
+ }
+ if (tryToVectorizePair(A1, B, R)) {
+ A->moveBefore(V);
+ return true;
}
-
- return Changed;
}
+ return 0;
+}
- bool vectorizeStoreChains(BoUpSLP &R) {
- bool Changed = false;
- // Attempt to sort and vectorize each of the store-groups.
- for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
- it != e; ++it) {
- if (it->second.size() < 2)
+bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
+ bool Changed = false;
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ if (isa<DbgInfoIntrinsic>(it)) continue;
+
+ // Try to vectorize reductions that use PHINodes.
+ if (PHINode *P = dyn_cast<PHINode>(it)) {
+ // Check that the PHI is a reduction PHI.
+ if (P->getNumIncomingValues() != 2) return Changed;
+ Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
+ (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
+ 0));
+ // Check if this is a Binary Operator.
+ BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
+ if (!BI)
continue;
- DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
- it->second.size() << ".\n");
+ Value *Inst = BI->getOperand(0);
+ if (Inst == P) Inst = BI->getOperand(1);
+ Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+ continue;
+ }
- Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
+ // Try to vectorize trees that start at compare instructions.
+ if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
+ if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
+ Changed |= true;
+ continue;
+ }
+ for (int i = 0; i < 2; ++i)
+ if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
+ Changed |= tryToVectorize(BI, R);
+ continue;
}
- return Changed;
}
- virtual bool runOnBasicBlock(BasicBlock &BB) {
- SE = &getAnalysis<ScalarEvolution>();
- DL = getAnalysisIfAvailable<DataLayout>();
- TTI = &getAnalysis<TargetTransformInfo>();
- AA = &getAnalysis<AliasAnalysis>();
- StoreRefs.clear();
-
- // Must have DataLayout. We can't require it because some tests run w/o
- // triple.
- if (!DL)
- return false;
-
- // Use the bollom up slp vectorizer to construct chains that start with
- // he store instructions.
- BoUpSLP R(&BB, SE, DL, TTI, AA);
+ return Changed;
+}
- // Vectorize trees that end at reductions.
- bool Changed = vectorizeReductions(&BB, R);
+bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
+ bool Changed = false;
+ // Attempt to sort and vectorize each of the store-groups.
+ for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
+ it != e; ++it) {
+ if (it->second.size() < 2)
+ continue;
- // Vectorize trees that end at stores.
- if (collectStores(&BB, R)) {
- DEBUG(dbgs()<<"SLP: Found stores to vectorize.\n");
- Changed |= vectorizeStoreChains(R);
- }
+ DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
+ it->second.size() << ".\n");
- if (Changed) {
- DEBUG(dbgs()<<"SLP: vectorized \""<<BB.getParent()->getName()<<"\"\n");
- DEBUG(verifyFunction(*BB.getParent()));
- }
- return Changed;
+ Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
}
+ return Changed;
+}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- BasicBlockPass::getAnalysisUsage(AU);
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
- AU.addRequired<TargetTransformInfo>();
+void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
+ BoUpSLP &R) {
+ // Check if this block is inside a loop.
+ Loop *L = LI->getLoopFor(BB);
+ if (!L)
+ return;
+
+ // Check if it has a preheader.
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ if (!PreHeader)
+ return;
+
+ // Mark the insertion point for the block.
+ Instruction *Location = PreHeader->getTerminator();
+
+ BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
+ for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
+ it != e; ++it) {
+ InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
+
+ // The InsertElement sequence can be simplified into a constant.
+ if (!Insert)
+ continue;
+
+ // If the vector or the element that we insert into it are
+ // instructions that are defined in this basic block then we can't
+ // hoist this instruction.
+ Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
+ Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
+ if (CurrVec && L->contains(CurrVec)) continue;
+ if (NewElem && L->contains(NewElem)) continue;
+
+ // We can hoist this instruction. Move it to the pre-header.
+ Insert->moveBefore(Location);
}
-
-private:
- StoreListMap StoreRefs;
-};
+}
} // end anonymous namespace
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
index c85711532dd..a69646336c8 100644
--- a/lib/Transforms/Vectorize/VecUtils.cpp
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -511,8 +511,15 @@ Instruction *BoUpSLP::GetLastInstr(ValueList &VL, unsigned VF) {
Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) {
IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
Value *Vec = UndefValue::get(Ty);
- for (unsigned i=0; i < Ty->getNumElements(); ++i)
+ for (unsigned i=0; i < Ty->getNumElements(); ++i) {
+ // Generate the 'InsertElement' instruction.
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
+ // Remember that this instruction is used as part of a 'gather' sequence.
+ // The caller of the bottom-up slp vectorizer can try to hoist the sequence
+ // if the users are outside of the basic block.
+ GatherInstructions.push_back(Vec);
+ }
+
return Vec;
}
diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h
index 03512bf8c34..fed5178b802 100644
--- a/lib/Transforms/Vectorize/VecUtils.h
+++ b/lib/Transforms/Vectorize/VecUtils.h
@@ -71,6 +71,11 @@ struct BoUpSLP {
/// \brief Vectorize a group of scalars into a vector tree.
void vectorizeArith(ValueList &Operands);
+ /// \returns the list of new instructions that were added in order to collect
+ /// scalars into vectors. This list can be used to further optimize the gather
+ /// sequences.
+ ValueList &getGatherSeqInstructions() {return GatherInstructions; }
+
private:
/// \brief This method contains the recursive part of getTreeCost.
int getTreeCost_rec(ValueList &VL, unsigned Depth);
@@ -107,11 +112,11 @@ private:
/// \returns a vector from a collection of scalars in \p VL.
Value *Scalarize(ValueList &VL, VectorType *Ty);
-
+
private:
- // Maps instructions to numbers and back.
+ /// Maps instructions to numbers and back.
SmallDenseMap<Value*, int> InstrIdx;
- // Maps integers to Instructions.
+ /// Maps integers to Instructions.
std::vector<Instruction*> InstrVec;
// -- containers that are used during getTreeCost -- //
@@ -121,21 +126,29 @@ private:
/// NOTICE: The vectorization methods also use this set.
ValueSet MustScalarize;
- // Contains a list of values that are used outside the current tree. This
- // set must be reset between runs.
+ /// Contains a list of values that are used outside the current tree. This
+ /// set must be reset between runs.
ValueSet MultiUserVals;
- // Maps values in the tree to the vector lanes that uses them. This map must
- // be reset between runs of getCost.
+ /// Maps values in the tree to the vector lanes that uses them. This map must
+ /// be reset between runs of getCost.
std::map<Value*, int> LaneMap;
- // A list of instructions to ignore while sinking
- // memory instructions. This map must be reset between runs of getCost.
+ /// A list of instructions to ignore while sinking
+ /// memory instructions. This map must be reset between runs of getCost.
SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
- // -- containers that are used during vectorizeTree -- //
- // Maps between the first scalar to the vector. This map must be reset between
- // runs.
+ // -- Containers that are used during vectorizeTree -- //
+
+ /// Maps between the first scalar to the vector. This map must be reset
+ ///between runs.
DenseMap<Value*, Value*> VectorizedValues;
+ // -- Containers that are used after vectorization by the caller -- //
+
+ /// A list of instructions that are used when gathering scalars into vectors.
+ /// In many cases these instructions can be hoisted outside of the BB.
+ /// Iterating over this list is faster than calling LICM.
+ ValueList GatherInstructions;
+
// Analysis and block reference.
BasicBlock *BB;
ScalarEvolution *SE;