lib/Transforms/Scalar/LoopStrengthReduce.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

//===- LoopStrengthReduce.cpp - Strength Reduce GEPs in Loops -------------===//
// 
//                     The LLVM Compiler Infrastructure
//
// This file was developed by Nate Begeman and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
// 
//===----------------------------------------------------------------------===//
//
// This pass performs a strength reduction on array references inside loops that
// have as one or more of their components the loop induction variable.  This is
// accomplished by creating a new Value to hold the initial value of the array
// access for the first iteration, and then creating a new GEP instruction in
// the loop to increment the value by the appropriate amount.
//
// There are currently several deficiencies in the implementation, marked with
// FIXME in the code.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/Type.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/Statistic.h"
#include <set>
using namespace llvm;

namespace {
  Statistic<> NumReduced ("loop-reduce", "Number of GEPs strength reduced");

  class GEPCache {
  public:
    GEPCache() : CachedPHINode(0), Map() {}

    GEPCache *get(Value *v) {
      std::map<Value *, GEPCache>::iterator I = Map.find(v);
      if (I == Map.end())
        I = Map.insert(std::pair<Value *, GEPCache>(v, GEPCache())).first;
      return &I->second;
    }

    PHINode *CachedPHINode;
    std::map<Value *, GEPCache> Map;
  };

  class LoopStrengthReduce : public FunctionPass {
    LoopInfo *LI;
    DominatorSet *DS;
    bool Changed;
    unsigned MaxTargetAMSize;
  public:
    LoopStrengthReduce(unsigned MTAMS = 1)
      : MaxTargetAMSize(MTAMS) {
    }

    virtual bool runOnFunction(Function &) {
      LI = &getAnalysis<LoopInfo>();
      DS = &getAnalysis<DominatorSet>();
      Changed = false;

      for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
        runOnLoop(*I);
      return Changed;
    }

    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
      AU.setPreservesCFG();
      AU.addRequiredID(LoopSimplifyID);
      AU.addRequired<LoopInfo>();
      AU.addRequired<DominatorSet>();
      AU.addRequired<TargetData>();
    }
  private:
    void runOnLoop(Loop *L);
    void strengthReduceGEP(GetElementPtrInst *GEPI, Loop *L,
                           GEPCache* GEPCache,
                           Instruction *InsertBefore,
                           std::set<Instruction*> &DeadInsts);
    void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts);
  };
  RegisterOpt<LoopStrengthReduce> X("loop-reduce", 
                                    "Strength Reduce GEP Uses of Ind. Vars");
}

FunctionPass *llvm::createLoopStrengthReducePass(unsigned MaxTargetAMSize) {
  return new LoopStrengthReduce(MaxTargetAMSize);
}

/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
void LoopStrengthReduce::
DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts) {
  while (!Insts.empty()) {
    Instruction *I = *Insts.begin();
    Insts.erase(Insts.begin());
    if (isInstructionTriviallyDead(I)) {
      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
        if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i)))
          Insts.insert(U);
      I->getParent()->getInstList().erase(I);
      Changed = true;
    }
  }
}

void LoopStrengthReduce::strengthReduceGEP(GetElementPtrInst *GEPI, Loop *L,
                                           GEPCache *Cache,
                                           Instruction *InsertBefore,
                                           std::set<Instruction*> &DeadInsts) {
  // We will strength reduce the GEP by splitting it into two parts.  The first
  // is a GEP to hold the initial value of the non-strength-reduced GEP upon
  // entering the loop, which we will insert at the end of the loop preheader.
  // The second is a GEP to hold the incremented value of the initial GEP.
  // The LoopIndVarSimplify pass guarantees that loop counts start at zero, so
  // we will replace the indvar with a constant zero value to create the first
  // GEP.
  //
  // We currently only handle GEP instructions that consist of zero or more
  // constants or loop invariable expressions prior to an instance of the
  // canonical induction variable.
  unsigned indvar = 0;
  std::vector<Value *> pre_op_vector;
  std::vector<Value *> inc_op_vector;
  const Type *ty = GEPI->getOperand(0)->getType();
  Value *CanonicalIndVar = L->getCanonicalInductionVariable();
  BasicBlock *Header = L->getHeader();
  BasicBlock *Preheader = L->getLoopPreheader();
  bool AllConstantOperands = true;
  Cache = Cache->get(GEPI->getOperand(0));

  for (unsigned op = 1, e = GEPI->getNumOperands(); op != e; ++op) {
    Value *operand = GEPI->getOperand(op);
    if (ty->getTypeID() == Type::StructTyID) {
      assert(isa<ConstantUInt>(operand));
      ConstantUInt *c = dyn_cast<ConstantUInt>(operand);
      ty = ty->getContainedType(unsigned(c->getValue()));
    } else {
      ty = ty->getContainedType(0);
    }

    if (operand == CanonicalIndVar) {
      // FIXME: use getCanonicalInductionVariableIncrement to choose between
      // one and neg one maybe?  We need to support int *foo = GEP base, -1
      const Type *Ty = CanonicalIndVar->getType();
      pre_op_vector.push_back(Constant::getNullValue(Ty));
      inc_op_vector.push_back(ConstantInt::get(Ty, 1));
      indvar = op;
      break;
    } else if (isa<Constant>(operand) || isa<Argument>(operand)) {
      pre_op_vector.push_back(operand);
    } else if (Instruction *inst = dyn_cast<Instruction>(operand)) {
      if (!DS->dominates(inst, Preheader->getTerminator()))
        return;
      pre_op_vector.push_back(operand);
      AllConstantOperands = false;
    } else
      return;
    Cache = Cache->get(operand);
  }
  assert(indvar > 0 && "Indvar used by GEP not found in operand list");
  
  // Ensure the pointer base is loop invariant.  While strength reduction
  // makes sense even if the pointer changed on every iteration, there is no
  // realistic way of handling it unless GEPs were completely decomposed into
  // their constituent operations so we have explicit multiplications to work
  // with.
  if (Instruction *GepPtrOp = dyn_cast<Instruction>(GEPI->getOperand(0)))
    if (!DS->dominates(GepPtrOp, Preheader->getTerminator()))
      return;

  // Don't reduce multiplies that the target can handle via addressing modes.
  uint64_t sz = getAnalysis<TargetData>().getTypeSize(ty);
  if (sz && (sz & (sz-1)) == 0)   // Power of two?
    if (sz <= (1ULL << (MaxTargetAMSize-1)))
      return;
  
  // If all operands of the GEP we are going to insert into the preheader
  // are constants, generate a GEP ConstantExpr instead. 
  //
  // If there is only one operand after the initial non-constant one, we know
  // that it was the induction variable, and has been replaced by a constant
  // null value.  In this case, replace the GEP with a use of pointer directly.
  PHINode *NewPHI;
  if (1) {
    Value *PreGEP;
    if (AllConstantOperands && isa<Constant>(GEPI->getOperand(0))) {
      Constant *C = dyn_cast<Constant>(GEPI->getOperand(0));
      PreGEP = ConstantExpr::getGetElementPtr(C, pre_op_vector);
    } else if (pre_op_vector.size() == 1) {
      PreGEP = GEPI->getOperand(0);
    } else {
      PreGEP = new GetElementPtrInst(GEPI->getOperand(0),
                                    pre_op_vector, GEPI->getName()+".pre", 
                                    Preheader->getTerminator());
    }

    // The next step of the strength reduction is to create a PHI that will
    // choose between the initial GEP we created and inserted into the
    // preheader, and the incremented GEP that we will create below and insert
    // into the loop body.
    NewPHI = new PHINode(PreGEP->getType(), 
                                  GEPI->getName()+".str", InsertBefore);
    NewPHI->addIncoming(PreGEP, Preheader);
    
    // Now, create the GEP instruction to increment by one the value selected
    // by the PHI instruction we just created above, and add it as the second
    // incoming Value/BasicBlock pair to the PHINode.  It is inserted before
    // the increment of the canonical induction variable.
    Instruction *IncrInst = 
      const_cast<Instruction*>(L->getCanonicalInductionVariableIncrement());
    GetElementPtrInst *StrGEP = new GetElementPtrInst(NewPHI, inc_op_vector,
                                                      GEPI->getName()+".inc",
                                                      IncrInst);
    pred_iterator PI = pred_begin(Header);
    if (*PI == Preheader)
      ++PI;
    NewPHI->addIncoming(StrGEP, *PI);
    Cache->CachedPHINode = NewPHI;
  } else {
    // Reuse previously created pointer, as it is identical to the one we were
    // about to create.
    NewPHI = Cache->CachedPHINode;
  }
  
  if (GEPI->getNumOperands() - 1 == indvar) {
    // If there were no operands following the induction variable, replace all
    // uses of the old GEP instruction with the new PHI.
    GEPI->replaceAllUsesWith(NewPHI);
  } else {
    // Create a new GEP instruction using the new PHI as the base.  The
    // operands of the original GEP past the induction variable become
    // operands of this new GEP.
    std::vector<Value *> op_vector;
    const Type *Ty = CanonicalIndVar->getType();
    op_vector.push_back(Constant::getNullValue(Ty));
    for (unsigned op = indvar + 1; op < GEPI->getNumOperands(); op++)
      op_vector.push_back(GEPI->getOperand(op));
    GetElementPtrInst *newGEP = new GetElementPtrInst(NewPHI, op_vector,
                                                      GEPI->getName() + ".lsr",
                                                      GEPI);
    GEPI->replaceAllUsesWith(newGEP);
  }
  
  // The old GEP is now dead.
  DeadInsts.insert(GEPI);
  ++NumReduced;
}

void LoopStrengthReduce::runOnLoop(Loop *L) {
  // First step, transform all loops nesting inside of this loop.
  for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I)
    runOnLoop(*I);

  // Next, get the first PHINode since it is guaranteed to be the canonical
  // induction variable for the loop by the preceding IndVarSimplify pass.
  PHINode *PN = L->getCanonicalInductionVariable();
  if (0 == PN)
    return;

  // FIXME: Need to use SCEV to detect GEP uses of the indvar, since indvars
  // pass creates code like this, which we can't currently detect:
  //  %tmp.1 = sub uint 2000, %indvar
  //  %tmp.8 = getelementptr int* %y, uint %tmp.1
  
  // Strength reduce all GEPs in the Loop.  Insert secondary PHI nodes for the
  // strength reduced pointers we'll be creating after the canonical induction
  // variable's PHI.
  std::set<Instruction*> DeadInsts;
  GEPCache Cache;
  for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
       UI != UE; ++UI)
    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
      strengthReduceGEP(GEPI, L, &Cache, PN->getNext(), DeadInsts);

  // Clean up after ourselves
  if (!DeadInsts.empty()) {
    DeleteTriviallyDeadInstructions(DeadInsts);

    // At this point, we know that we have killed one or more GEP instructions.
    // It is worth checking to see if the cann indvar is also dead, so that we
    // can remove it as well.  The requirements for the cann indvar to be
    // considered dead are:
    // 1. the cann indvar has one use
    // 2. the use is an add instruction
    // 3. the add has one use
    // 4. the add is used by the cann indvar
    // If all four cases above are true, then we can remove both the add and
    // the cann indvar.
    // FIXME: this needs to eliminate an induction variable even if it's being
    // compared against some value to decide loop termination.
    if (PN->hasOneUse()) {
      BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin()));
      if (BO && BO->getOpcode() == Instruction::Add)
        if (BO->hasOneUse()) {
          if (PN == dyn_cast<PHINode>(*(BO->use_begin()))) {
            DeadInsts.insert(BO);
            // Break the cycle, then delete the PHI.
            PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
            PN->eraseFromParent();
            DeleteTriviallyDeadInstructions(DeadInsts);
          }
        }
    }
  }
}