diff options
author | Vincent Lejeune <vljn@ovi.com> | 2013-03-05 17:02:17 +0100 |
---|---|---|
committer | Vincent Lejeune <vljn@ovi.com> | 2013-05-04 20:01:19 +0200 |
commit | 2aaee8abbe78d2ca17926d1ee13e0d17b2998a53 (patch) | |
tree | 6f9bdbdb6155f41c6ea1e77cb789e6a661971439 /lib/Target/R600/R600MachineScheduler.cpp | |
parent | 765671ca109bd463123eff5a5e7db8ab6eed971e (diff) |
R600: Use bottom up scheduling algorithm
Diffstat (limited to 'lib/Target/R600/R600MachineScheduler.cpp')
-rw-r--r-- | lib/Target/R600/R600MachineScheduler.cpp | 185 |
1 files changed, 91 insertions, 94 deletions
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index c6709a8dc3..699497395b 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -25,48 +25,53 @@ using namespace llvm; +cl::opt<bool> TopDown("r600-topdown", cl::Hidden, + cl::desc("Switch top-down scheduling"), cl::init(false)); + + void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { DAG = dag; TII = static_cast<const R600InstrInfo*>(DAG->TII); TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); MRI = &DAG->MRI; - Available[IDAlu]->clear(); - Available[IDFetch]->clear(); - Available[IDOther]->clear(); CurInstKind = IDOther; CurEmitted = 0; OccupedSlotsMask = 15; InstKindLimit[IDAlu] = TII->getMaxAlusPerClause(); - + InstKindLimit[IDOther] = 32; const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>(); InstKindLimit[IDFetch] = ST.getTexVTXClauseSize(); } -void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst) +void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc, std::vector<SUnit *> &QDst) { - if (QSrc->empty()) + if (QSrc.empty()) return; - for (ReadyQueue::iterator I = QSrc->begin(), - E = QSrc->end(); I != E; ++I) { - (*I)->NodeQueueId &= ~QSrc->getID(); - QDst->push(*I); + for (unsigned i = 0, e = QSrc.size(); i < e; ++i) { + QDst.insert(QDst.begin(), QSrc[i]); } - QSrc->clear(); + QSrc.clear(); } SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { SUnit *SU = 0; - IsTopNode = true; NextInstKind = IDOther; + if (!FakeCopy.empty()) { + IsTopNode = true; + SU = *FakeCopy.begin(); + FakeCopy.erase(FakeCopy.begin()); + return SU; + } + + IsTopNode = TopDown; + // check if we might want to switch current clause type - bool AllowSwitchToAlu = (CurInstKind == IDOther) || - (CurEmitted >= InstKindLimit[CurInstKind]) || - (Available[CurInstKind]->empty()); - bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) && - (!Available[IDFetch]->empty() || !Available[IDOther]->empty()); + bool AllowSwitchToAlu = (Available[CurInstKind].empty()); + bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) && + (!Available[IDFetch].empty() || !Available[IDOther].empty()); if ((AllowSwitchToAlu && CurInstKind != IDAlu) || (!AllowSwitchFromAlu && CurInstKind == IDAlu)) { @@ -95,14 +100,10 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { DEBUG( if (SU) { - dbgs() << "picked node: "; + dbgs() << " ** Pick node **\n"; SU->dump(DAG); } else { - dbgs() << "NO NODE "; - for (int i = 0; i < IDLast; ++i) { - Available[i]->dump(); - Pending[i]->dump(); - } + dbgs() << "NO NODE \n"; for (unsigned i = 0; i < DAG->SUnits.size(); i++) { const SUnit &S = DAG->SUnits[i]; if (!S.isScheduled) @@ -116,9 +117,6 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { - DEBUG(dbgs() << "scheduled: "); - DEBUG(SU->dump(DAG)); - if (NextInstKind != CurInstKind) { DEBUG(dbgs() << "Instruction Type Switch\n"); if (NextInstKind != IDAlu) @@ -151,22 +149,58 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); - if (CurInstKind != IDFetch) { + if (CurInstKind != IDFetch) MoveUnits(Pending[IDFetch], Available[IDFetch]); +} + +static bool +isFakeCopy(MachineInstr *MI) { + if (MI->getOpcode() != AMDGPU::COPY) + return false; + if (!TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) + return false; + switch (MI->getOperand(1).getReg()) { + case AMDGPU::ZERO: + case AMDGPU::HALF: + case AMDGPU::NEG_HALF: + case AMDGPU::ONE: + case AMDGPU::NEG_ONE: + case AMDGPU::ONE_INT: + return false; + default: + return true; } - MoveUnits(Pending[IDOther], Available[IDOther]); } void R600SchedStrategy::releaseTopNode(SUnit *SU) { - int IK = getInstKind(SU); - - DEBUG(dbgs() << IK << " <= "); - DEBUG(SU->dump(DAG)); + DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG);); + if (isFakeCopy(SU->getInstr())) { + FakeCopy.push_back(SU); + return; + } - Pending[IK]->push(SU); + if (TopDown) { + int IK = getInstKind(SU); + if (IK == IDOther) + Available[IDOther].push_back(SU); + else + Pending[IK].push_back(SU); + } } void R600SchedStrategy::releaseBottomNode(SUnit *SU) { + DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG);); + if (isFakeCopy(SU->getInstr())) { + return; + } + if (!TopDown) { + int IK = getInstKind(SU); + + if (IK == IDOther) + Available[IDOther].push_back(SU); + else + Pending[IK].push_back(SU); + } } bool R600SchedStrategy::regBelongsToClass(unsigned Reg, @@ -183,16 +217,12 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { switch (MI->getOpcode()) { case AMDGPU::INTERP_PAIR_XY: + case AMDGPU::PRED_X: case AMDGPU::INTERP_PAIR_ZW: case AMDGPU::INTERP_VEC_LOAD: return AluT_XYZW; case AMDGPU::COPY: - if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) { - // %vregX = COPY Tn_X is likely to be discarded in favor of an - // assignement of Tn_X to %vregX, don't considers it in scheduling - return AluDiscarded; - } - else if (MI->getOperand(1).isUndef()) { + if (MI->getOperand(1).isUndef()) { // MI will become a KILL, don't considers it in scheduling return AluDiscarded; } @@ -242,6 +272,9 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { int R600SchedStrategy::getInstKind(SUnit* SU) { int Opcode = SU->getInstr()->getOpcode(); + if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode)) + return IDFetch; + if (TII->isALUInstr(Opcode)) { return IDAlu; } @@ -255,38 +288,15 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { case AMDGPU::DOT4_eg_pseudo: case AMDGPU::DOT4_r600_pseudo: return IDAlu; - case AMDGPU::TEX_VTX_CONSTBUF: - case AMDGPU::TEX_VTX_TEXBUF: - case AMDGPU::TEX_LD: - case AMDGPU::TEX_GET_TEXTURE_RESINFO: - case AMDGPU::TEX_GET_GRADIENTS_H: - case AMDGPU::TEX_GET_GRADIENTS_V: - case AMDGPU::TEX_SET_GRADIENTS_H: - case AMDGPU::TEX_SET_GRADIENTS_V: - case AMDGPU::TEX_SAMPLE: - case AMDGPU::TEX_SAMPLE_C: - case AMDGPU::TEX_SAMPLE_L: - case AMDGPU::TEX_SAMPLE_C_L: - case AMDGPU::TEX_SAMPLE_LB: - case AMDGPU::TEX_SAMPLE_C_LB: - case AMDGPU::TEX_SAMPLE_G: - case AMDGPU::TEX_SAMPLE_C_G: - case AMDGPU::TXD: - case AMDGPU::TXD_SHADOW: - return IDFetch; default: - DEBUG( - dbgs() << "other inst: "; - SU->dump(DAG); - ); return IDOther; } } -SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { +SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) { if (Q.empty()) return NULL; - for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); + for (std::vector<SUnit *>::iterator It = Q.begin(), E = Q.end(); It != E; ++It) { SUnit *SU = *It; InstructionsGroupCandidate.push_back(SU->getInstr()); @@ -302,14 +312,12 @@ SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { } void R600SchedStrategy::LoadAlu() { - ReadyQueue *QSrc = Pending[IDAlu]; - for (ReadyQueue::iterator I = QSrc->begin(), - E = QSrc->end(); I != E; ++I) { - (*I)->NodeQueueId &= ~QSrc->getID(); - AluKind AK = getAluKind(*I); - AvailableAlus[AK].insert(*I); - } - QSrc->clear(); + std::vector<SUnit *> &QSrc = Pending[IDAlu]; + for (unsigned i = 0, e = QSrc.size(); i < e; ++i) { + AluKind AK = getAluKind(QSrc[i]); + AvailableAlus[AK].insert(AvailableAlus[AK].begin(), QSrc[i]); + } + QSrc.clear(); } void R600SchedStrategy::PrepareNextSlot() { @@ -351,27 +359,16 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); - SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); - if (!UnslotedSU) { + if (SlotedSU) return SlotedSU; - } else if (!SlotedSU) { + SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); + if (UnslotedSU) AssignSlot(UnslotedSU->getInstr(), Slot); - return UnslotedSU; - } else { - //Determine which one to pick (the lesser one) - if (CompareSUnit()(SlotedSU, UnslotedSU)) { - AvailableAlus[AluAny].insert(UnslotedSU); - return SlotedSU; - } else { - AvailableAlus[IndexToID[Slot]].insert(SlotedSU); - AssignSlot(UnslotedSU->getInstr(), Slot); - return UnslotedSU; - } - } + return UnslotedSU; } bool R600SchedStrategy::isAvailablesAluEmpty() const { - return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() && + return Pending[IDAlu].empty() && AvailableAlus[AluAny].empty() && AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() && AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() && AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty(); @@ -391,7 +388,7 @@ SUnit* R600SchedStrategy::pickAlu() { return PopInst(AvailableAlus[AluT_XYZW]); } } - for (unsigned Chan = 0; Chan < 4; ++Chan) { + for (int Chan = 3; Chan > -1; --Chan) { bool isOccupied = OccupedSlotsMask & (1 << Chan); if (!isOccupied) { SUnit *SU = AttemptFillSlot(Chan); @@ -409,14 +406,14 @@ SUnit* R600SchedStrategy::pickAlu() { SUnit* R600SchedStrategy::pickOther(int QID) { SUnit *SU = 0; - ReadyQueue *AQ = Available[QID]; + std::vector<SUnit *> &AQ = Available[QID]; - if (AQ->empty()) { + if (AQ.empty()) { MoveUnits(Pending[QID], AQ); } - if (!AQ->empty()) { - SU = *AQ->begin(); - AQ->remove(AQ->begin()); + if (!AQ.empty()) { + SU = *AQ.begin(); + AQ.erase(AQ.begin()); } return SU; } |