ARM cost model: Address computation in vector mem ops not free

Adds a function to target transform info to query for the cost of address computation. The cost model analysis pass now also queries this interface. The code in LoopVectorize adds the cost of address computation as part of the memory instruction cost calculation. Only there, we know whether the instruction will be scalarized or not. Increase the penality for inserting in to D registers on swift. This becomes necessary because we now always assume that address computation has a cost and three is a closer value to the architecture. radar://13097204 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174713 91177308-0d34-0410-b5e6-96231b3b80d8
author: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-02-08 14:50:48 +0000
committer: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-02-08 14:50:48 +0000
commit: fb55a8fd7c38aa09d9c243d48a8a72d890f36a3d (patch)
tree: 6143816aee69c2cdf8ac4bd2414689216941b036 /lib/Transforms/Vectorize
parent: f64edf8d802824202b50046638696a6b7897d4d6 (diff)
1 files changed, 14 insertions, 8 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 91d565976ad..f12b0bf0f39 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3056,9 +3056,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   // TODO: We need to estimate the cost of intrinsic calls.
   switch (I->getOpcode()) {
   case Instruction::GetElementPtr:
-    // We mark this instruction as zero-cost because scalar GEPs are usually
-    // lowered to the intruction addressing mode. At the moment we don't
-    // generate vector geps.
+    // We mark this instruction as zero-cost because the cost of GEPs in
+    // vectorized code depends on whether the corresponding memory instruction
+    // is scalarized or not. Therefore, we handle GEPs with the memory
+    // instruction cost.
     return 0;
   case Instruction::Br: {
     return TTI.getCFInstrCost(I->getOpcode());
@@ -3113,9 +3114,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
     unsigned AS = SI ? SI->getPointerAddressSpace() :
       LI->getPointerAddressSpace();
     Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand();
-
+    // We add the cost of address computation here instead of with the gep
+    // instruction because only here we know whether the operation is
+    // scalarized.
     if (VF == 1)
-      return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+      return TTI.getAddressComputationCost(VectorTy) +
+        TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
 
     // Scalarized loads/stores.
     int Stride = Legal->isConsecutivePtr(Ptr);
@@ -3135,15 +3139,17 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
                                             VectorTy, i);
       }
 
-      // The cost of the scalar stores.
+      // The cost of the scalar loads/stores.
+      Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType());
       Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
                                        Alignment, AS);
       return Cost;
     }
 
     // Wide load/stores.
-    unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
-                                        Alignment, AS);
+    unsigned Cost = TTI.getAddressComputationCost(VectorTy);
+    Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+
     if (Reverse)
       Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
                                   VectorTy, 0);
author	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-02-08 14:50:48 +0000
committer	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-02-08 14:50:48 +0000
commit	fb55a8fd7c38aa09d9c243d48a8a72d890f36a3d (patch)
tree	6143816aee69c2cdf8ac4bd2414689216941b036 /lib/Transforms/Vectorize
parent	f64edf8d802824202b50046638696a6b7897d4d6 (diff)