diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-02-06 21:29:05 -0500 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-02-06 21:29:05 -0500 |
commit | 331287898d5b0dbd61dd980c6d13e15d1e3c6bb0 (patch) | |
tree | 239bcc24f9006bf7c1620650917dde7dd0a003f4 | |
parent | 422ccac33f346db3c90543a9449de1346641fa1e (diff) |
XXX: Lower PRED_SET directly to PRED_SET_*r600-structurizer-v2
-rw-r--r-- | lib/Target/R600/AMDGPUInstructions.td | 10 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 129 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.cpp | 49 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.h | 4 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 120 | ||||
-rw-r--r-- | lib/Target/R600/R600LowerControlFlow.cpp | 31 |
7 files changed, 190 insertions, 155 deletions
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 3dee004270d..368ee08de02 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -77,6 +77,16 @@ def COND_LE : PatLeaf < case ISD::SETLE: return true;}}}] >; +def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; +def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; +def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; +def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; + +def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>; +def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>; +def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>; +def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>; + //===----------------------------------------------------------------------===// // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 51b20ba1d07..c485267c6b4 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -115,6 +115,11 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock::iterator I = *MI; + if (TII->isPredicateSetter(MI->getOpcode())) { + LowerPRED_SET(MI, BB, MF); + return BB; + } + switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::SHADER_TYPE: break; @@ -169,60 +174,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); break; - case AMDGPU::PRED_SET: { - // Find the correct insertion point - Occasionally the instruction - // selector will insert instructions between the PRED_SET instruction - // and the control flow instruction. This usually happens when - // compiling code like this: - // - // int a = 0 - // if (cond) { - // a = 1; - // } - // - // and the resulting MachineInstr's look something like this: - // - // Pred0 = PRED_SET - // a = MOV ZERO - // R600_IF Pred0 - // a = MOV ONE - // R600_ENDIF Pred0 - // - - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned NumUses = 0; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(DstReg), - UE = MachineRegisterInfo::use_end(); - UI != UE; ++UI, ++NumUses) { - MachineInstr *Use = &(*UI); - MachineBasicBlock::iterator InsertPoint = Use; - MachineInstr *PredSet; - - if (NumUses < 1) { - MI->removeFromParent(); - PredSet = MI; - } else { - // PRED_SET can only have one use, so if there is more than one use, we - // need to clone the instruction. - PredSet = MF->CloneMachineInstr(MI); - const TargetRegisterClass *DstRegClass = - MRI.getRegClass(MI->getOperand(0).getReg()); - unsigned NewDst = MRI.createVirtualRegister(DstRegClass); - PredSet->getOperand(0).setReg(NewDst); - UI.getOperand().setReg(NewDst); - } - BB->insert(InsertPoint, PredSet); - } - - return BB; - } case AMDGPU::R600_IF: { // Remove all copies that have been inserted between the R600_IF and BRANCH // instructions. MachineBasicBlock::iterator PredSetI = --I; - assert(PredSetI->getOpcode() == AMDGPU::PRED_SET); + assert(TII->isPredicateSetter(PredSetI->getOpcode())); ++I; // Points to the IF instruction ++I; // Points to instruction after IF @@ -375,6 +333,57 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( return BB; } +/// Find the correct insertion point - Occasionally the instruction +/// selector will insert instructions between the PRED_SET* instruction +/// and the control flow instruction. This usually happens when +/// compiling code like this: +/// +/// int a = 0 +/// if (cond) { +/// a = 1; +/// } +/// +/// and the resulting MachineInstr's look something like this: +/// +/// Pred0 = PRED_SET* +/// a = MOV ZERO +/// R600_IF Pred0 +/// a = MOV ONE +/// R600_ENDIF Pred0 +/// +void R600TargetLowering::LowerPRED_SET(MachineInstr *MI, + MachineBasicBlock *BB, + MachineFunction *MF) const { + + MachineRegisterInfo &MRI = MF->getRegInfo(); + int DstIdx = TII->getOperandIdx(*MI, R600Operands::DST); + assert(DstIdx != -1); + unsigned DstReg = MI->getOperand(DstIdx).getReg(); + unsigned NumUses = 0; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(DstReg), + UE = MachineRegisterInfo::use_end(); + UI != UE; ++UI, ++NumUses) { + MachineInstr *Use = &(*UI); + MachineBasicBlock::iterator InsertPoint = Use; + MachineInstr *PredSet; + + if (NumUses < 1) { + MI->removeFromParent(); + PredSet = MI; + } else { + // PRED_SET* can only have one use, so if there is more than one use, we + // need to clone the instruction. + PredSet = MF->CloneMachineInstr(MI); + const TargetRegisterClass *DstRegClass = + MRI.getRegClass(DstReg); + unsigned NewDst = MRI.createVirtualRegister(DstRegClass); + PredSet->getOperand(DstIdx).setReg(NewDst); + UI.getOperand().setReg(NewDst); + } + BB->insert(InsertPoint, PredSet); + } +} + //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// @@ -497,9 +506,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const SDValue PredSet; if (Cond.getOpcode() == ISD::SETCC) { bool IsInteger = Cond.getOperand(0).getValueType().isInteger(); - unsigned CondCode = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); PredSet = DAG.getNode(AMDGPUISD::PRED_SET, DL, MVT::i1, - DAG.getTargetConstant(CondCode, MVT::i32), + Cond.getOperand(2), DAG.getTargetConstant(IsInteger, MVT::i1), Cond.getOperand(0), Cond.getOperand(1)); @@ -513,7 +521,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const } } PredSet = DAG.getNode(AMDGPUISD::PRED_SET, DL, MVT::i1, - DAG.getTargetConstant(CondCode, MVT::i32), + DAG.getCondCode(CondCode), DAG.getTargetConstant(-1, MVT::i1), DAG.getZExtOrTrunc(Cond, DL, MVT::i32), DAG.getConstant(0, MVT::i32)); @@ -737,10 +745,8 @@ SDValue R600TargetLowering::LowerBR_COND(SDValue Op, SelectionDAG &DAG) const { CCOpcode = ISD::getSetCCInverse(CCOpcode, IsInteger); } - unsigned CondCode = CCOpcode; - Cond = DAG.getNode(AMDGPUISD::PRED_SET, DL, MVT::i1, - DAG.getTargetConstant(CCOpcode, MVT::i32), + DAG.getCondCode(CCOpcode), DAG.getTargetConstant(IsInteger, MVT::i1), Cond.getOperand(0), // LHS Cond.getOperand(1)); // RHS @@ -750,7 +756,7 @@ SDValue R600TargetLowering::LowerBR_COND(SDValue Op, SelectionDAG &DAG) const { CCOpcode = ISD::getSetCCInverse(CCOpcode, true); } Cond = DAG.getNode(AMDGPUISD::PRED_SET, DL, MVT::i1, - DAG.getTargetConstant(CCOpcode, MVT::i32), + DAG.getCondCode(CCOpcode), DAG.getTargetConstant(1, MVT::i1), DAG.getZExtOrTrunc(Cond, DL, MVT::i32), DAG.getConstant(0, MVT::i32)); @@ -823,11 +829,11 @@ SDValue R600TargetLowering::LowerCopyToReg(SDValue Op, } case AMDGPUISD::PRED_SET: { - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value.getOperand(0)); - ISD::CondCode CC = (ISD::CondCode)C->getZExtValue(); + CondCodeSDNode *CondCode = dyn_cast<CondCodeSDNode>(Value.getOperand(0)); + ISD::CondCode CCOp = CondCode->get(); SDValue SetCC = DAG.getSetCC(DL, MVT::i1, Value.getOperand(2), Value.getOperand(3), - CC); + CCOp); if (SetCC.getOpcode() == ISD::SETCC) { NewValue = LowerSETCC(SetCC, DAG); } else { @@ -1525,10 +1531,9 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } - ISD::CondCode OldCondCode = (ISD::CondCode) - (dyn_cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()); + ISD::CondCode OldCondCode = cast<CondCodeSDNode>(N->getOperand(0))->get(); ISD::CondCode NewCondCode = - cast<CondCodeSDNode>(SelectCC->getOperand(4))->get(); + cast<CondCodeSDNode>(SelectCC->getOperand(4))->get(); if (OldCondCode == ISD::SETEQ) { NewCondCode = ISD::getSetCCInverse(NewCondCode, @@ -1539,7 +1544,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(AMDGPUISD::PRED_SET, N->getDebugLoc(), N->getValueType(0), - DAG.getTargetConstant(NewCondCode, MVT::i32), + DAG.getCondCode(NewCondCode), DAG.getTargetConstant( SelectCC.getOperand(0).getValueType().isInteger(), MVT::i1), SelectCC.getOperand(0), diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 85734daac84..6cfd8f33aec 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -52,6 +52,8 @@ private: void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, MachineRegisterInfo & MRI, unsigned dword_offset) const; + void LowerPRED_SET(MachineInstr *MI, MachineBasicBlock *BB, + MachineFunction *MF) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_COND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 83509d695a1..2b1648bea24 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -145,11 +145,9 @@ DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); } -static bool -isPredicateSetter(unsigned Opcode) { +bool R600InstrInfo::isPredicateSetter(unsigned Opcode) const { switch (Opcode) { case AMDGPU::PRED_X: - case AMDGPU::PRED_SET: case AMDGPU::PRED_SETE: case AMDGPU::PRED_SETGT: case AMDGPU::PRED_SETGE: @@ -158,20 +156,52 @@ isPredicateSetter(unsigned Opcode) { case AMDGPU::PRED_SETGT_INT: case AMDGPU::PRED_SETGE_INT: case AMDGPU::PRED_SETNE_INT: + case AMDGPU::PRED_SETGT_UINT: + case AMDGPU::PRED_SETGE_UINT: return true; default: return false; } } +ISD::CondCode R600InstrInfo::getCondCodeForPRED_SET(unsigned Opcode) const { + assert(isPredicateSetter(Opcode)); + switch (Opcode) { + case AMDGPU::PRED_SETE: return ISD::SETOEQ; + case AMDGPU::PRED_SETGT: return ISD::SETOGT; + case AMDGPU::PRED_SETGE: return ISD::SETOGE; + case AMDGPU::PRED_SETNE: return ISD::SETONE; + case AMDGPU::PRED_SETE_INT: return ISD::SETEQ; + case AMDGPU::PRED_SETGT_INT: return ISD::SETGT; + case AMDGPU::PRED_SETGE_INT: return ISD::SETGE; + case AMDGPU::PRED_SETNE_INT: return ISD::SETNE; + case AMDGPU::PRED_SETGT_UINT: return ISD::SETUGT; + case AMDGPU::PRED_SETGE_UINT: return ISD::SETUGE; + } + return ISD::SETCC_INVALID; + ; +} + +bool R600InstrInfo::PRED_SETTypeIsInteger(unsigned Opcode) const { + assert(isPredicateSetter(Opcode)); + switch (Opcode) { + default: return true; + case AMDGPU::PRED_SETE: + case AMDGPU::PRED_SETGT: + case AMDGPU::PRED_SETGE: + case AMDGPU::PRED_SETNE: + return false; + } +} + static MachineInstr * findFirstPredicateSetterFrom(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { while (I != MBB.begin()) { --I; MachineInstr *MI = I; - if (isPredicateSetter(MI->getOpcode())) - return MI; +// if (isPredicateSetter(MI->getOpcode())) +// return MI; } return NULL; @@ -192,6 +222,7 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { return true; +#if 0 // Most of the following comes from the ARM implementation of AnalyzeBranch // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); @@ -262,6 +293,7 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Otherwise, can't handle this. return true; +#endif } int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { @@ -280,6 +312,8 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { + return true; +#if 0 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); if (FBB == 0) { @@ -342,11 +376,13 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); return 2; } +#endif } unsigned R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - +return true; +#if 0 // Note : we leave PRED* instructions there. // They may be needed when predicating instructions. @@ -398,6 +434,7 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { } return 2; */ +#endif } bool diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index 8fdf7004f3f..a55fb30496f 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -67,6 +67,10 @@ namespace llvm { DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const; + bool isPredicateSetter(unsigned Opcode) const; + ISD::CondCode getCondCodeForPRED_SET(unsigned Opcode) const; + bool PRED_SETTypeIsInteger(unsigned Opcode) const; + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; bool isConditionalBranch(const MachineInstr *MI) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 2ad2cfc26d3..e4450723b30 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// include "R600Intrinsics.td" +include "R600InstrInfo.td" class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin> @@ -353,6 +354,16 @@ class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, R600_Reg32:$src1))] >; +class R600_PredSet <bits<11> inst, string opName, PatLeaf CC, ValueType CmpVT> + : R600_2OP <inst, opName, + [(set R600_PredReg:$dst, (R600predset CC, (i1 timm), + (CmpVT R600_Reg32:$src0), + R600_Reg32:$src1))]> { + let OutOperandList = (outs R600_PredReg:$dst); + let usesCustomInserter = 1; + let isTerminator = 1; +} + // If you add our change the operands for R600_3OP instructions, you must // also update the R600Op3OperandIndex::ROI enum in R600Defines.h, // R600InstrInfo::buildDefaultInstruction(), and @@ -677,36 +688,6 @@ class ExportBufInst : InstR600ISA<( let Inst{63-32} = Word1; } -def SDTIf : SDTypeProfile<1, 2, [ - SDTCisVT<2, OtherVT> -]>; - -def SDTLoop : SDTypeProfile<0, 1, [ - SDTCisVT<0, OtherVT> -]>; - -def SDTIfBreak : SDTypeProfile<1, 1, [ - SDTCisInt<0>, SDTCisInt<1> -]>; - -def SDTElse : SDTypeProfile<0, 2, [ - SDTCisVT<1, OtherVT> -]>; - -def SDTPredSet : SDTypeProfile<1, 4, [ // setcc - SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisSameAs<3, 4> -]>; - -def R600if : SDNode<"AMDGPUISD::IF", SDTIf, [SDNPHasChain]>; -def R600ifbreak : SDNode<"AMDGPUISD::IFBREAK", SDTIfBreak, [SDNPHasChain]>; -def R600else : SDNode<"AMDGPUISD::ELSE", SDTElse, [SDNPHasChain]>; -def R600loop : SDNode<"AMDGPUISD::LOOP", SDTLoop, [SDNPHasChain]>; -def R600elsebreak : SDNode<"AMDGPUISD::ELSEBREAK", - SDTypeProfile<0, 0, []>, [SDNPHasChain]>; -def R600predset : SDNode<"AMDGPUISD::PRED_SET", SDTPredSet>; -def R600endif : SDNode<"AMDGPUISD::ENDIF", - SDTypeProfile<0, 0, []>, [SDNPHasChain]>; - let Predicates = [isR600toCayman] in { //===----------------------------------------------------------------------===// @@ -801,10 +782,12 @@ class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; -def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>; -def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>; -def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>; -def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>; +def PRED_SETGT_UINT : R600_PredSet <0x1E, "PRED_SETGT_UINT", COND_UGT, i32>; +def PRED_SETGE_UINT : R600_PredSet <0x1F, "PRED_SETGE_UINT", COND_UGE, i32>; +def PRED_SETE : R600_PredSet <0x20, "PRED_SETE" , COND_EQ, f32>; +def PRED_SETGT : R600_PredSet <0x21, "PRED_SETGT", COND_GT, f32>; +def PRED_SETGE : R600_PredSet <0x22, "PRED_SETGE", COND_GE, f32>; +def PRED_SETNE : R600_PredSet <0x23, "PRED_SETNE", COND_NE, f32>; let hasSideEffects = 1 in { @@ -871,10 +854,10 @@ def SETGE_UINT : R600_2OP < (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] >; -def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>; -def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGT_INT", []>; -def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>; -def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>; +def PRED_SETE_INT : R600_PredSet <0x42, "PRED_SETE_INT" , COND_EQ, i32>; +def PRED_SETGT_INT : R600_PredSet <0x43, "PRED_SETGT_INT", COND_SGT, i32>; +def PRED_SETGE_INT : R600_PredSet <0x44, "PRED_SETGE_INT", COND_SGE, i32>; +def PRED_SETNE_INT : R600_PredSet <0x45, "PRED_SETNE_INT", COND_NE, i32>; def CNDE_INT : R600_3OP < 0x1C, "CNDE_INT", @@ -1617,19 +1600,6 @@ def PRED_X : InstR600 < let FlagOperandIdx = 3; } -let usesCustomInserter = 1, isTerminator = 1 in { - -def PRED_SET : InstR600 < - 0, (outs R600_Reg1:$dst), - (ins i32imm:$cc, i1imm:$isint, PRED_SET_FLAG:$flags, R600_Reg32:$lhs, R600_Reg32:$rhs), "", - [(set R600_Reg1:$dst, - (R600predset (i32 timm:$cc), (i1 timm:$isint), (i32 R600_Reg32:$lhs), R600_Reg32:$rhs))], - NullALU> { - let FlagOperandIdx = 3; -} - -} - let isTerminator = 1, isBranch = 1, isBarrier = 1 in { def JUMP : InstR600 <0x10, @@ -1838,15 +1808,16 @@ def R600_BREAK : AMDGPUInst < [] >; +let hasSideEffects = 1 in { + + def R600_PREDICATED_BREAK : AMDGPUInst < (outs R600_PredReg:$dst), (ins R600_PredReg:$cond), "R600_PREDICATED_BREAK $dst, $cond", - [] + [(set R600_PredReg:$dst, (R600ifbreak R600_PredReg:$cond))] >; -let hasSideEffects = 1 in { - def R600_ELSEBREAK : AMDGPUInst < (outs), (ins), @@ -1966,24 +1937,19 @@ def KIL : Pat < (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) >; -// PRED_SET floating-point comparison -def : Pat < - (R600predset (i32 timm:$cc), (i1 timm:$isint), (f32 R600_Reg32:$lhs), R600_Reg32:$rhs), - (PRED_SET imm:$cc, imm:$isint, R600_Reg32:$lhs, R600_Reg32:$rhs) ->; - -// R600ifbreak patterns: -class IfBreak<ValueType CmpVT> : Pat < - (i1 (R600ifbreak (i1 (R600predset (i32 timm:$cc), (i1 timm:$isint), - (CmpVT R600_Reg32:$src0), - R600_Reg32:$src1)))), - (R600_PREDICATED_BREAK (i1 (PRED_SET imm:$cc, imm:$isint, - R600_Reg32:$src0, - R600_Reg32:$src1))) +// PRED_SET patterns +class PredSetRevPat <Instruction PredSet, PatLeaf CC, ValueType CmpVT> + : Pat < + (R600predset CC, (i1 timm), (CmpVT R600_Reg32:$lhs), R600_Reg32:$rhs), + (PredSet R600_Reg32:$rhs, R600_Reg32:$lhs) >; -def : IfBreak<i32>; -def : IfBreak<f32>; +def : PredSetRevPat <PRED_SETGT, COND_LT, f32>; +def : PredSetRevPat <PRED_SETGE, COND_LE, f32>; +def : PredSetRevPat <PRED_SETGT_INT, COND_SLT, i32>; +def : PredSetRevPat <PRED_SETGE_INT, COND_SLE, i32>; +def : PredSetRevPat <PRED_SETGT_UINT, COND_ULT, i32>; +def : PredSetRevPat <PRED_SETGE_UINT, COND_ULE, i32>; // SGT Reverse args def : Pat < @@ -2033,7 +1999,7 @@ def : Pat < (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0) >; -// The next two patterns are special cases for handling 'true if ordered' and +// The next six patterns are special cases for handling 'true if ordered' and // 'true if unordered' conditionals. The assumption here is that the behavior of // SETE and SNE conforms to the Direct3D 10 rules for floating point values // described here: @@ -2053,6 +2019,12 @@ def : Pat < (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) >; +//PRED_SETE - 'true if ordered' +def : Pat < + (R600predset SETO, (i1 timm), (f32 R600_Reg32:$src0), R600_Reg32:$src1), + (PRED_SETE R600_Reg32:$src0, R600_Reg32:$src1) +>; + //SNE - 'true if unordered' def : Pat < (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), @@ -2065,6 +2037,12 @@ def : Pat < (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) >; +//PRED_SETNE - 'true if ordered' +def : Pat < + (R600predset SETUO, (i1 timm), (f32 R600_Reg32:$src0), R600_Reg32:$src1), + (PRED_SETNE R600_Reg32:$src0, R600_Reg32:$src1) +>; + // i32 = sign_extend (i1 (truncate i32)) def : Pat < (i32 (sext(i1 (trunc (i32 R600_Reg32:$src0))))), diff --git a/lib/Target/R600/R600LowerControlFlow.cpp b/lib/Target/R600/R600LowerControlFlow.cpp index 9f079386634..f6ed06ed026 100644 --- a/lib/Target/R600/R600LowerControlFlow.cpp +++ b/lib/Target/R600/R600LowerControlFlow.cpp @@ -130,7 +130,6 @@ static bool isPredicateSetter(unsigned Opcode) { switch (Opcode) { case AMDGPU::PRED_X: - case AMDGPU::PRED_SET: case AMDGPU::PRED_SETE: case AMDGPU::PRED_SETGT: case AMDGPU::PRED_SETGE: @@ -365,15 +364,19 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { // Lower the PRED_SET instruction to the correct hardware instruction. MachineInstr &PredSet = *(++I); MachineInstr *HWPredSet; - if (PredSet.getOpcode() == AMDGPU::PRED_SET) { - unsigned CC = PredSet.getOperand(1).getImm(); - unsigned IsInteger = PredSet.getOperand(2).getImm(); - uint64_t Flags = PredSet.getOperand(3).getImm(); - unsigned Src0Reg = PredSet.getOperand(4).getReg(); - unsigned Src1Reg = PredSet.getOperand(5).getReg(); + if (TII->isPredicateSetter(PredSet.getOpcode())) { + ISD::CondCode CC = TII->getCondCodeForPRED_SET(PredSet.getOpcode()); + bool IsInteger = TII->PRED_SETTypeIsInteger(PredSet.getOpcode()); + int DstIdx = TII->getOperandIdx(PredSet, R600Operands::DST); + int Src0Idx = TII->getOperandIdx(PredSet, R600Operands::SRC0); + int Src1Idx = TII->getOperandIdx(PredSet, R600Operands::SRC1); + assert(DstIdx != -1 && Src0Idx != -1 && Src1Idx != -1); + unsigned DstReg = PredSet.getOperand(DstIdx).getReg(); + unsigned Src0Reg = PredSet.getOperand(Src0Idx).getReg(); + unsigned Src1Reg = PredSet.getOperand(Src1Idx).getReg(); if (InversePred) { - CC = ISD::getSetCCInverse((ISD::CondCode)CC, IsInteger); + CC = ISD::getSetCCInverse(CC, IsInteger); } bool Swap = false; @@ -382,11 +385,10 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { std::swap(Src0Reg, Src1Reg); } - HWPredSet = TII->buildDefaultInstruction(MBB, &PredSet, Opcode, - PredSet.getOperand(0).getReg(), // dst - Src0Reg, // src0 - Src1Reg); // src1 - PredSet.eraseFromParent(); + PredSet.setDesc(TII->get(Opcode)); + PredSet.getOperand(Src0Idx).setReg(Src0Reg); + PredSet.getOperand(Src1Idx).setReg(Src1Reg); + HWPredSet = &PredSet; } else { HWPredSet = TII->buildDefaultInstruction(MBB, &MI, AMDGPU::PRED_SETNE_INT, @@ -395,10 +397,7 @@ bool R600LowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { AMDGPU::ZERO); } TII->setImmOperand(HWPredSet, R600Operands::WRITE, 0); -// if (Flags & MO_FLAG_PUSH) { TII->setImmOperand(HWPredSet, R600Operands::UPDATE_EXEC_MASK, 1); - // } else { -// TII->setImmOperand(HWPredSet, R600Operands::UPDATE_PREDICATE, 1); break; } |