From 4accdea1dc1d434349575b39b5670b747126260a Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Wed, 26 Jun 2013 18:09:58 +0200 Subject: R600: Support schedule and packetization of trans-only inst --- lib/Target/R600/R600InstrInfo.cpp | 185 ++++++++++++++++++++++++------- lib/Target/R600/R600InstrInfo.h | 32 ++++-- lib/Target/R600/R600Instructions.td | 1 + lib/Target/R600/R600MachineScheduler.cpp | 25 +++-- lib/Target/R600/R600MachineScheduler.h | 1 + lib/Target/R600/R600Packetizer.cpp | 90 +++++++++------ lib/Target/R600/R600RegisterInfo.td | 1 + test/CodeGen/R600/fdiv.ll | 8 +- test/CodeGen/R600/fp_to_sint.ll | 8 +- test/CodeGen/R600/fp_to_uint.ll | 8 +- test/CodeGen/R600/llvm.cos.ll | 2 +- test/CodeGen/R600/llvm.pow.ll | 4 +- test/CodeGen/R600/llvm.sin.ll | 2 +- 13 files changed, 261 insertions(+), 106 deletions(-) diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 354f039c1e8..974a5794505 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -250,8 +250,9 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { std::vector > R600InstrInfo::ExtractSrcs(MachineInstr *MI, - const DenseMap &PV) - const { + const DenseMap &PV, + unsigned &ConstCount) const { + ConstCount = 0; const SmallVector, 3> Srcs = getSrcs(MI); const std::pair DummyPair(-1, 0); std::vector > Result; @@ -259,18 +260,20 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI, for (unsigned n = Srcs.size(); i < n; ++i) { unsigned Reg = Srcs[i].first->getReg(); unsigned Index = RI.getEncodingValue(Reg) & 0xff; - unsigned Chan = RI.getHWRegChan(Reg); if (Reg == AMDGPU::OQAP) { Result.push_back(std::pair(Index, 0)); } - if (Index > 127) { - Result.push_back(DummyPair); + if (PV.find(Reg) != PV.end()) { + // 255 is used to tells its a PS/PV reg + Result.push_back(std::pair(255, 0)); continue; } - if (PV.find(Reg) != PV.end()) { + if (Index > 127) { + ConstCount++; Result.push_back(DummyPair); continue; } + unsigned Chan = RI.getHWRegChan(Reg); Result.push_back(std::pair(Index, Chan)); } for (; i < 3; ++i) @@ -305,23 +308,51 @@ Swizzle(std::vector > Src, return Src; } -bool -R600InstrInfo::isLegal( - const std::vector > > &IGSrcs, - const std::vector &Swz, - unsigned CheckedSize) const { +static unsigned +getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { + switch (Swz) { + case R600InstrInfo::ALU_VEC_012_SCL_210: { + unsigned Cycles[3] = { 2, 1, 0}; + return Cycles[Op]; + } + case R600InstrInfo::ALU_VEC_021_SCL_122: { + unsigned Cycles[3] = { 1, 2, 2}; + return Cycles[Op]; + } + case R600InstrInfo::ALU_VEC_120_SCL_212: { + unsigned Cycles[3] = { 2, 1, 2}; + return Cycles[Op]; + } + case R600InstrInfo::ALU_VEC_102_SCL_221: { + unsigned Cycles[3] = { 2, 2, 1}; + return Cycles[Op]; + } + default: + llvm_unreachable("Wrong Swizzle for Trans Slot"); + return 0; + } +} + +/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed +/// in the same Instruction Group while meeting read port limitations given a +/// Swz swizzle sequence. +unsigned R600InstrInfo::isLegalUpTo( + const std::vector > > &IGSrcs, + const std::vector &Swz, + const std::vector > &TransSrcs, + R600InstrInfo::BankSwizzle TransSwz) const { int Vector[4][3]; memset(Vector, -1, sizeof(Vector)); - for (unsigned i = 0; i < CheckedSize; i++) { + for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { const std::vector > &Srcs = Swizzle(IGSrcs[i], Swz[i]); for (unsigned j = 0; j < 3; j++) { const std::pair &Src = Srcs[j]; - if (Src.first < 0) + if (Src.first < 0 || Src.first == 255) continue; if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { - if (Swz[i] != R600InstrInfo::ALU_VEC_012 && - Swz[i] != R600InstrInfo::ALU_VEC_021) { + if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && + Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { // The value from output queue A (denoted by register OQAP) can // only be fetched during the first cycle. return false; @@ -332,51 +363,126 @@ R600InstrInfo::isLegal( if (Vector[Src.second][j] < 0) Vector[Src.second][j] = Src.first; if (Vector[Src.second][j] != Src.first) - return false; + return i; } } - return true; + // Now check Trans Alu + for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { + const std::pair &Src = TransSrcs[i]; + unsigned Cycle = getTransSwizzle(TransSwz, i); + if (Src.first < 0) + continue; + if (Src.first == 255) + continue; + if (Vector[Src.second][Cycle] < 0) + Vector[Src.second][Cycle] = Src.first; + if (Vector[Src.second][Cycle] != Src.first) + return IGSrcs.size() - 1; + } + return IGSrcs.size(); } -bool -R600InstrInfo::recursiveFitsFPLimitation( - const std::vector > > &IGSrcs, - std::vector &SwzCandidate, - unsigned Depth) const { - if (!isLegal(IGSrcs, SwzCandidate, Depth)) +/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next +/// (in lexicographic term) swizzle sequence assuming that all swizzles after +/// Idx can be skipped +static bool +NextPossibleSolution( + std::vector &SwzCandidate, + unsigned Idx) { + assert(Idx < SwzCandidate.size()); + int ResetIdx = Idx; + while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) + ResetIdx --; + for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { + SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; + } + if (ResetIdx == -1) return false; - if (IGSrcs.size() == Depth) - return true; - unsigned i = SwzCandidate[Depth]; - for (; i < 6; i++) { - SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i; - if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1)) + SwzCandidate[ResetIdx]++; + return true; +} + +/// Enumerate all possible Swizzle sequence to find one that can meet all +/// read port requirements. +bool R600InstrInfo::FindSwizzleForVectorSlot( + const std::vector > > &IGSrcs, + std::vector &SwzCandidate, + const std::vector > &TransSrcs, + R600InstrInfo::BankSwizzle TransSwz) const { + unsigned ValidUpTo = 0; + do { + ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); + if (ValidUpTo == IGSrcs.size()) return true; - } - SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012; + } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); return false; } +/// Instructions in Trans slot can't read gpr at cycle 0 if they also read +/// a const, and can't read a gpr at cycle 1 if they read 2 const. +static bool +isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, + const std::vector > &TransOps, + unsigned ConstCount) { + for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { + const std::pair &Src = TransOps[i]; + unsigned Cycle = getTransSwizzle(TransSwz, i); + if (Src.first < 0) + continue; + if (ConstCount > 0 && Cycle == 0) + return false; + if (ConstCount > 1 && Cycle == 1) + return false; + } + return true; +} + bool R600InstrInfo::fitsReadPortLimitations(const std::vector &IG, - const DenseMap &PV, - std::vector &ValidSwizzle) + const DenseMap &PV, + std::vector &ValidSwizzle, + bool isLastAluTrans) const { //Todo : support shared src0 - src1 operand std::vector > > IGSrcs; ValidSwizzle.clear(); + unsigned ConstCount; + BankSwizzle TransBS; for (unsigned i = 0, e = IG.size(); i < e; ++i) { - IGSrcs.push_back(ExtractSrcs(IG[i], PV)); + IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); unsigned Op = getOperandIdx(IG[i]->getOpcode(), AMDGPU::OpName::bank_swizzle); ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) IG[i]->getOperand(Op).getImm()); } - bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle); - if (!Result) - return false; - return true; + std::vector > TransOps; + if (!isLastAluTrans) + return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); + + TransOps = IGSrcs.back(); + IGSrcs.pop_back(); + ValidSwizzle.pop_back(); + + static const R600InstrInfo::BankSwizzle TransSwz[] = { + ALU_VEC_012_SCL_210, + ALU_VEC_021_SCL_122, + ALU_VEC_120_SCL_212, + ALU_VEC_102_SCL_221 + }; + for (unsigned i = 0; i < 4; i++) { + TransBS = TransSwz[i]; + if (!isConstCompatible(TransBS, TransOps, ConstCount)) + continue; + bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, + TransBS); + if (Result) { + ValidSwizzle.push_back(TransBS); + return true; + } + } + + return false; } @@ -406,7 +512,8 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector &Consts) } bool -R600InstrInfo::canBundle(const std::vector &MIs) const { +R600InstrInfo::fitsConstReadLimitations(const std::vector &MIs) + const { std::vector Consts; for (unsigned i = 0, n = MIs.size(); i < n; i++) { MachineInstr *MI = MIs[i]; diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index a6add8382cf..1ba4160747b 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -84,26 +84,38 @@ namespace llvm { SmallVector, 3> getSrcs(MachineInstr *MI) const; - bool isLegal( - const std::vector > > &IGSrcs, - const std::vector &Swz, - unsigned CheckedSize) const; - bool recursiveFitsFPLimitation( - const std::vector > > &IGSrcs, - std::vector &SwzCandidate, - unsigned Depth = 0) const; + unsigned isLegalUpTo( + const std::vector > > &IGSrcs, + const std::vector &Swz, + const std::vector > &TransSrcs, + R600InstrInfo::BankSwizzle TransSwz) const; + + bool FindSwizzleForVectorSlot( + const std::vector > > &IGSrcs, + std::vector &SwzCandidate, + const std::vector > &TransSrcs, + R600InstrInfo::BankSwizzle TransSwz) const; /// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210 /// returns true and the first (in lexical order) BankSwizzle affectation /// starting from the one already provided in the Instruction Group MIs that /// fits Read Port limitations in BS if available. Otherwise returns false /// and undefined content in BS. + /// isLastAluTrans should be set if the last Alu of MIs will be executed on + /// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to + /// apply to the last instruction. /// PV holds GPR to PV registers in the Instruction Group MIs. bool fitsReadPortLimitations(const std::vector &MIs, const DenseMap &PV, - std::vector &BS) const; + std::vector &BS, + bool isLastAluTrans) const; + + /// An instruction group can only access 2 channel pair (either [XY] or [ZW]) + /// from KCache bank on R700+. This function check if MI set in input meet + /// this limitations + bool fitsConstReadLimitations(const std::vector &) const; + /// Same but using const index set instead of MI set. bool fitsConstReadLimitations(const std::vector&) const; - bool canBundle(const std::vector &) const; /// \breif Vector instructions are instructions that must fill all /// instruction slots within an instruction group. diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index fd585f8b7e9..862080d9ca4 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1489,6 +1489,7 @@ let hasSideEffects = 1 in { def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { let Pattern = []; + let TransOnly = 0; } def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 7e28f9dde47..0dc0365926e 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -32,7 +32,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { MRI = &DAG->MRI; CurInstKind = IDOther; CurEmitted = 0; - OccupedSlotsMask = 15; + OccupedSlotsMask = 31; InstKindLimit[IDAlu] = TII->getMaxAlusPerClause(); InstKindLimit[IDOther] = 32; @@ -160,7 +160,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { if (NextInstKind != CurInstKind) { DEBUG(dbgs() << "Instruction Type Switch\n"); if (NextInstKind != IDAlu) - OccupedSlotsMask = 15; + OccupedSlotsMask |= 31; CurEmitted = 0; CurInstKind = NextInstKind; } @@ -251,6 +251,9 @@ bool R600SchedStrategy::regBelongsToClass(unsigned Reg, R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { MachineInstr *MI = SU->getInstr(); + if (TII->isTransOnly(MI)) + return AluTrans; + switch (MI->getOpcode()) { case AMDGPU::PRED_X: return AluPredX; @@ -346,7 +349,7 @@ SUnit *R600SchedStrategy::PopInst(std::vector &Q) { It != E; ++It) { SUnit *SU = *It; InstructionsGroupCandidate.push_back(SU->getInstr()); - if (TII->canBundle(InstructionsGroupCandidate)) { + if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) { InstructionsGroupCandidate.pop_back(); Q.erase((It + 1).base()); return SU; @@ -421,7 +424,8 @@ unsigned R600SchedStrategy::AvailablesAluCount() const { return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() + AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() + AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() + - AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size(); + AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() + + AvailableAlus[AluPredX].size(); } SUnit* R600SchedStrategy::pickAlu() { @@ -429,20 +433,27 @@ SUnit* R600SchedStrategy::pickAlu() { if (!OccupedSlotsMask) { // Bottom up scheduling : predX must comes first if (!AvailableAlus[AluPredX].empty()) { - OccupedSlotsMask = 15; + OccupedSlotsMask |= 31; return PopInst(AvailableAlus[AluPredX]); } // Flush physical reg copies (RA will discard them) if (!AvailableAlus[AluDiscarded].empty()) { - OccupedSlotsMask = 15; + OccupedSlotsMask |= 31; return PopInst(AvailableAlus[AluDiscarded]); } // If there is a T_XYZW alu available, use it if (!AvailableAlus[AluT_XYZW].empty()) { - OccupedSlotsMask = 15; + OccupedSlotsMask |= 15; return PopInst(AvailableAlus[AluT_XYZW]); } } + bool TransSlotOccuped = OccupedSlotsMask & 16; + if (!TransSlotOccuped) { + if (!AvailableAlus[AluTrans].empty()) { + OccupedSlotsMask |= 16; + return PopInst(AvailableAlus[AluTrans]); + } + } for (int Chan = 3; Chan > -1; --Chan) { bool isOccupied = OccupedSlotsMask & (1 << Chan); if (!isOccupied) { diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index aae8b3f4eb8..f8965d8998a 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -46,6 +46,7 @@ class R600SchedStrategy : public MachineSchedStrategy { AluT_W, AluT_XYZW, AluPredX, + AluTrans, AluDiscarded, // LLVM Instructions that are going to be eliminated AluLast }; diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index 6fc15deb5c9..5ee51faea87 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -77,8 +77,6 @@ private: do { if (TII->isPredicated(BI)) continue; - if (TII->isTransOnly(BI)) - continue; int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) continue; @@ -87,6 +85,10 @@ private: continue; } unsigned Dst = BI->getOperand(DstIdx).getReg(); + if (TII->isTransOnly(BI)) { + Result[Dst] = AMDGPU::PS; + continue; + } if (BI->getOpcode() == AMDGPU::DOT4_r600 || BI->getOpcode() == AMDGPU::DOT4_eg) { Result[Dst] = AMDGPU::PV_X; @@ -157,10 +159,6 @@ public: return true; if (!TII->isALUInstr(MI->getOpcode())) return true; - if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY) - return true; - if (TII->isTransOnly(MI)) - return true; if (MI->getOpcode() == AMDGPU::GROUP_BARRIER) return true; return false; @@ -170,7 +168,7 @@ public: // together. bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); - if (getSlot(MII) <= getSlot(MIJ)) + if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII)) return false; // Does MII and MIJ share the same pred_sel ? int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), @@ -204,11 +202,16 @@ public: MI->getOperand(LastOp).setImm(Bit); } - MachineBasicBlock::iterator addToPacket(MachineInstr *MI) { + bool isBundlableWithCurrentPMI(MachineInstr *MI, + const DenseMap &PV, + std::vector &BS, + bool &isTransSlot) { + isTransSlot = TII->isTransOnly(MI); + + // Are the Constants limitations met ? CurrentPacketMIs.push_back(MI); - bool FitsConstLimits = TII->canBundle(CurrentPacketMIs); - DEBUG( - if (!FitsConstLimits) { + if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { + DEBUG( dbgs() << "Couldn't pack :\n"; MI->dump(); dbgs() << "with the following packets :\n"; @@ -217,14 +220,15 @@ public: dbgs() << "\n"; } dbgs() << "because of Consts read limitations\n"; - }); - const DenseMap &PV = - getPreviousVector(CurrentPacketMIs.front()); - std::vector BS; - bool FitsReadPortLimits = - TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS); - DEBUG( - if (!FitsReadPortLimits) { + ); + CurrentPacketMIs.pop_back(); + return false; + } + + // Is there a BankSwizzle set that meet Read Port limitations ? + if (!TII->fitsReadPortLimitations(CurrentPacketMIs, + PV, BS, isTransSlot)) { + DEBUG( dbgs() << "Couldn't pack :\n"; MI->dump(); dbgs() << "with the following packets :\n"; @@ -233,25 +237,43 @@ public: dbgs() << "\n"; } dbgs() << "because of Read port limitations\n"; - }); - bool isBundlable = FitsConstLimits && FitsReadPortLimits; - if (isBundlable) { + ); + CurrentPacketMIs.pop_back(); + return false; + } + + CurrentPacketMIs.pop_back(); + return true; + } + + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) { + MachineBasicBlock::iterator FirstInBundle = + CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front(); + const DenseMap &PV = + getPreviousVector(FirstInBundle); + std::vector BS; + bool isTransSlot; + + if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { MachineInstr *MI = CurrentPacketMIs[i]; - unsigned Op = TII->getOperandIdx(MI->getOpcode(), - AMDGPU::OpName::bank_swizzle); - MI->getOperand(Op).setImm(BS[i]); + unsigned Op = TII->getOperandIdx(MI->getOpcode(), + AMDGPU::OpName::bank_swizzle); + MI->getOperand(Op).setImm(BS[i]); } + unsigned Op = TII->getOperandIdx(MI->getOpcode(), + AMDGPU::OpName::bank_swizzle); + MI->getOperand(Op).setImm(BS.back()); + if (!CurrentPacketMIs.empty()) + setIsLastBit(CurrentPacketMIs.back(), 0); + substitutePV(MI, PV); + MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); + if (isTransSlot) { + endPacket(llvm::next(It)->getParent(), llvm::next(It)); + } + return It; } - CurrentPacketMIs.pop_back(); - if (!isBundlable) { - endPacket(MI->getParent(), MI); - substitutePV(MI, getPreviousVector(MI)); - return VLIWPacketizerList::addToPacket(MI); - } - if (!CurrentPacketMIs.empty()) - setIsLastBit(CurrentPacketMIs.back(), 0); - substitutePV(MI, PV); + endPacket(MI->getParent(), MI); return VLIWPacketizerList::addToPacket(MI); } }; diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 60a93e3f6c2..66aa91628e1 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -96,6 +96,7 @@ def PV_X : R600RegWithChan<"PV.X", 254, "X">; def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">; def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">; def PV_W : R600RegWithChan<"PV.W", 254, "W">; +def PS: R600Reg<"PS", 255>; def PREDICATE_BIT : R600Reg<"PredicateBit", 0>; def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll index 003590bb53d..f6eb6a66948 100644 --- a/test/CodeGen/R600/fdiv.ll +++ b/test/CodeGen/R600/fdiv.ll @@ -1,13 +1,13 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll index f5716e1d47e..5a608fdf4a7 100644 --- a/test/CodeGen/R600/fp_to_sint.ll +++ b/test/CodeGen/R600/fp_to_sint.ll @@ -1,10 +1,10 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ; CHECK: @fp_to_sint_v4i32 -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %value = load <4 x float> addrspace(1) * %in diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll index 1c3c0c62cf5..b07e286f43e 100644 --- a/test/CodeGen/R600/fp_to_uint.ll +++ b/test/CodeGen/R600/fp_to_uint.ll @@ -1,10 +1,10 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ; CHECK: @fp_to_uint_v4i32 -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %value = load <4 x float> addrspace(1) * %in diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll index 9b281670704..b444fa782be 100644 --- a/test/CodeGen/R600/llvm.cos.ll +++ b/test/CodeGen/R600/llvm.cos.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll index 14220837048..0f51cf46f59 100644 --- a/test/CodeGen/R600/llvm.pow.ll +++ b/test/CodeGen/R600/llvm.pow.ll @@ -1,8 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} +;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll index 803dc2d6deb..09cc3d2c52d 100644 --- a/test/CodeGen/R600/llvm.sin.ll +++ b/test/CodeGen/R600/llvm.sin.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) -- cgit v1.2.3