diff options
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 47 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Defines.h | 19 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600ISelLowering.cpp | 13 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Instructions.td | 311 |
4 files changed, 307 insertions, 83 deletions
diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp index 807113134d2..a9439b88003 100644 --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp @@ -70,6 +70,11 @@ private: bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRFrame(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectALUDst(SDValue N, SDValue &GPR, SDValue &WriteMask, SDValue &Omod, + SDValue &Rel, SDValue &Clamp); + bool SelectALUSrcOp2(SDValue N, SDValue &GPR, SDValue &Neg, SDValue &Rel, + SDValue &Abs); // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" @@ -97,6 +102,8 @@ SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); } + + bool AMDGPUDAGToDAGISel::SelectADDRParam( SDValue Addr, SDValue& R1, SDValue& R2) { @@ -393,3 +400,43 @@ bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, return true; } + +bool AMDGPUDAGToDAGISel::SelectADDRFrame(SDValue Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::FrameIndex) { + Base = CurDAG->getRegister(AMDGPU::ZERO, MVT::i32); + Offset = CurDAG->getConstant(0, MVT::i32); +// unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); + return true; + } + + if (Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) { + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { + Base = Addr.getOperand(1); + } else { + Base = Addr; + } + + Offset = CurDAG->getConstant(0, MVT::i32); + return true; + } + + return false; +} + +bool AMDGPUDAGToDAGISel::SelectALUDst(SDValue N, SDValue &GPR, + SDValue &WriteMask, SDValue &Omod, + SDValue &Rel, SDValue &Clamp) +{ + return false; +} + +bool AMDGPUDAGToDAGISel::SelectALUSrcOp2(SDValue N, SDValue &GPR, SDValue &Neg, + SDValue &Rel, SDValue &Abs) +{ + GPR = N; + Neg = CurDAG->getTargetConstant(0, MVT::i32); + Rel = CurDAG->getTargetConstant(0, MVT::i32); + Abs = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h index 8191c6a64a7..962f4888c5f 100644 --- a/lib/Target/AMDGPU/R600Defines.h +++ b/lib/Target/AMDGPU/R600Defines.h @@ -33,8 +33,9 @@ namespace R600_InstFlag { FC = (1 << 3), TRIG = (1 << 4), OP3 = (1 << 5), - VECTOR = (1 << 6) + VECTOR = (1 << 6), //FlagOperand bits 7, 8 + HAS_NATIVE_OPERANDS = (1 << 9) }; } @@ -42,4 +43,20 @@ namespace R600_InstFlag { #define HW_REG_MASK 0x1ff #define HW_CHAN_SHIFT 9 +namespace R600Op2OperandIndex { + enum ROI { + SRC0, + SRC0_NEG, + SRC0_REL, + IMM0, + SRC1, + SRC1_REL, + SRC1_NEG, + IMM1, + INDEX_MODE, + PRED_SEL, + LAST + }; +} + #endif // R600DEFINES_H_ diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index da1de57777a..db208438739 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -112,7 +112,18 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( TII->addFlag(NewMI, 1, MO_FLAG_NEG); break; } - +#if 0 + case AMDGPU::FrameLoad_i32: + case AMDGPU::FrameStore_i32: + { + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addImm(0) + .addReg(AMDGPU::PRED_SEL_OFF); + break; + } +#endif case AMDGPU::R600_LOAD_CONST: { int64_t RegIndex = MI->getOperand(1).getImm(); diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 4ce256d1298..eba9e72a6c4 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -22,6 +22,7 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, bit Op3 = 0; bit isVector = 0; bits<2> FlagOperandIdx = 0; + bit HasNativeOperands = 0; bits<11> op_code = inst; //let Inst = inst; @@ -39,6 +40,7 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, // instruction group let TSFlags{6} = isVector; let TSFlags{8-7} = FlagOperandIdx; + let TSFlags{9} = HasNativeOperands; } class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : @@ -59,21 +61,163 @@ def MEMrr : Operand<iPTR> { def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; +def FRAMEri : Operand<iPTR> { + let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); +} + +class ALUDstBase <ValueType vt> : Operand <vt> { + let MIOperandInfo = (ops R600_Reg32:$gpr, InstFlag:$write_mask, + InstFlag:$omod, InstFlag:$rel, InstFlag:$clamp); +} + +def ALUDst_I32 : ALUDstBase <i32>; +def ALUDst_F32 : ALUDstBase <f32>; + +class ALUSrcOp2Base <ValueType vt, Operand immType> : Operand <vt> { + let MIOperandInfo = (ops R600_Reg32:$gpr, InstFlag:$neg, InstFlag:$rel, + InstFlag:$abs); +} +def ALUSrcOp2_I32 : ALUSrcOp2Base <i32, i32imm>; +def ALUSrcOp2_F32 : ALUSrcOp2Base <f32, f32imm>; + def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; +def ADDRFrame : ComplexPattern<i32, 2, "SelectADDRFrame", [], []>; +class ALUDstPat<ValueType vt> : ComplexPattern<vt, 5, "SelectALUDst", [], []>; +class ALUSrcOp2Pat<ValueType vt> : ComplexPattern<vt, 4, + "SelectALUSrcOp2", [], []>; -class R600_ALU { - bits<7> DST_GPR = 0; - bits<9> SRC0_SEL = 0; - bits<1> SRC0_NEG = 0; - bits<9> SRC1_SEL = 0; - bits<1> SRC1_NEG = 0; - bits<1> CLAMP = 0; - +class R600ALU_Word0 { + field bits<64> Inst; + + bits<11> src0; + bits<1> src0_neg; + bits<1> src0_rel; + bits<11> src1; + bits<1> src1_rel; + bits<1> src1_neg; + bits<3> index_mode; + bits<2> pred_sel; + bits<1> last; + + let Inst{8-0} = src0{8-0}; + let Inst{9} = src0_rel; + let Inst{11-10} = src0{10-9}; + let Inst{12} = src0_neg; + let Inst{21-13} = src1{8-0}; + let Inst{22} = src1_rel; + let Inst{24-23} = src1{10-9}; + let Inst{25} = src1_neg; + let Inst{28-26} = index_mode; + let Inst{30-29} = pred_sel; + let Inst{31} = last; +} + +class R600ALU_Word1_OP2 <bits<11> alu_inst> { + field bits<64> Inst; + + bits<1> src0_abs; + bits<1> src1_abs; + bits<1> update_exec_mask; + bits<1> update_pred; + bits<1> write_mask; + bits<2> omod; + bits<3> bank_swizzle; + bits<9> dst; + bits<1> dst_rel; + bits<1> clamp; + + let Inst{32} = src0_abs; + let Inst{33} = src1_abs; + let Inst{34} = update_exec_mask; + let Inst{35} = update_pred; + let Inst{36} = write_mask; + let Inst{38-37} = omod; + let Inst{49-39} = alu_inst; + let Inst{52-50} = bank_swizzle; + let Inst{59-53} = dst{6-0}; + let Inst{60} = dst_rel; + let Inst{62-61} = dst{8-7}; + let Inst{63} = clamp; } +class R600ALU_OP2 <bits<11> inst> { + field bits<64> Inst; + + bits<11> dst; + bits<11> src0; + bits<1> src0_neg; + bits<1> src0_rel; + bits<1> src0_abs; + bits<11> src1; + bits<1> src1_neg; + bits<1> src1_rel; + bits<1> src1_abs; + bits<2> pred_sel; + + // For now, the following fields are manually encoded in R600MCCodeEmitter: + bits<1> update_exec_mask = 0; + bits<1> update_pred = 0; + bits<1> write_mask = 0; + bits<2> omod = 0; + bits<3> bank_swizzle = 0; + bits<1> dst_rel = 0; + bits<1> clamp = 0; + bits<3> index_mode = 0; + bits<1> last = 0; + + let Inst{8-0} = src0{8-0}; + let Inst{9} = src0_rel; + let Inst{11-10} = src0{10-9}; + let Inst{12} = src0_neg; + let Inst{21-13} = src1{8-0}; + let Inst{22} = src1_rel; + let Inst{24-23} = src1{10-9}; + let Inst{25} = src1_neg; + let Inst{28-26} = index_mode; + let Inst{30-29} = pred_sel; + let Inst{31} = last; + + let Inst{32} = src0_abs; + let Inst{33} = src1_abs; + let Inst{34} = update_exec_mask; + let Inst{35} = update_pred; + let Inst{36} = write_mask; + let Inst{38-37} = omod; + let Inst{49-39} = inst; + let Inst{52-50} = bank_swizzle; + let Inst{59-53} = dst{6-0}; + let Inst{60} = dst_rel; + let Inst{62-61} = dst{10-9}; + let Inst{63} = clamp; +// R600ALU_Word0, +// R600ALU_Word1_OP2 <inst>; +} + +/* +class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 { + + bits<1> fog_merge; + bits<10> alu_inst; + + let Inst{37} = fog_merge; + let Inst{39-38} = omod; + let Inst{49-40} = alu_inst; +} + + +class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { + + bits<11> alu_inst; + + let Inst{38-37} = omod; + let Inst{49-39} = alu_inst; +} + +*/ + def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), (ops PRED_SEL_OFF)>; @@ -82,18 +226,33 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { class R600_1OP <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : - InstR600 <inst, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src, R600_Pred:$p, variable_ops), - !strconcat(opName, " $dst, $src $p"), - pattern, - itin>{ - bits<7> dst; - bits<9> src; - let Inst{8-0} = src; - let Inst{49-39} = inst; - let Inst{59-53} = dst; - } + InstR600 <0, + (outs R600_Reg32:$dst), + (ins R600_Reg32:$src0, InstFlag:$src0_neg, InstFlag:$src0_rel, + InstFlag:$src0_abs, R600_Pred:$pred_sel, InstFlag:$literal), + !strconcat(opName, " $dst, $src0 (neg$src0_neg rel$src0_rel " + "abs$src0_abs) $pred_sel, $literal"), + pattern, + itin>, + R600ALU_OP2<inst> { + + let src1 = 0; + let src1_rel = 0; + let src1_neg = 0; + let src1_abs = 0; +} + +class R600_1OP_F32 <bits<11> inst, string opName, SDPatternOperator node, + InstrItinClass itin = AnyALU> : + R600_1OP <inst, opName, + [(set R600_Reg32:$dst, (node ALUSrcOp2Pat<f32>:$src0))] +>; + +class R600_1OP_I32 <bits<11> inst, string opName, SDPatternOperator node, + InstrItinClass itin = AnyALU> : + R600_1OP <inst, opName, + [(set R600_Reg32:$dst, (node ALUSrcOp2Pat<i32>:$src0))] +>; class R600_2OP <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : @@ -383,30 +542,15 @@ def SNE : R600_2OP < COND_NE))] >; -def FRACT : R600_1OP < - 0x10, "FRACT", - [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))] ->; +def FRACT : R600_1OP_F32 <0x10, "FRACT", AMDGPUfract>; -def TRUNC : R600_1OP < - 0x11, "TRUNC", - [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))] ->; +def TRUNC : R600_1OP_F32 <0x11, "TRUNC", int_AMDGPU_trunc>; -def CEIL : R600_1OP < - 0x12, "CEIL", - [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))] ->; +def CEIL : R600_1OP_F32 <0x12, "CEIL", fceil>; -def RNDNE : R600_1OP < - 0x13, "RNDNE", - [(set R600_Reg32:$dst, (frint R600_Reg32:$src))] ->; +def RNDNE : R600_1OP_F32 <0x13, "RNDNE", frint>; -def FLOOR : R600_1OP < - 0x14, "FLOOR", - [(set R600_Reg32:$dst, (ffloor R600_Reg32:$src))] ->; +def FLOOR : R600_1OP_F32 <0x14, "FLOOR", ffloor>; let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { @@ -482,10 +626,7 @@ def XOR_INT : R600_2OP < [(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))] >; -def NOT_INT : R600_1OP < - 0x33, "NOT_INT", - [(set R600_Reg32:$dst, (not R600_Reg32:$src))] ->; +def NOT_INT : R600_1OP_I32 <0x33, "NOT_INT", not>; def ADD_INT : R600_2OP < 0x34, "ADD_INT", @@ -739,39 +880,32 @@ multiclass CUBE_Common <bits<11> inst> { } } } // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 -class EXP_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "EXP_IEEE", - [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))] +class EXP_IEEE_Common <bits<11> inst> : R600_1OP_F32 < + inst, "EXP_IEEE", fexp2 >; -class FLT_TO_INT_Common <bits<11> inst> : R600_1OP < - inst, "FLT_TO_INT", - [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))] +class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_F32 < + inst, "FLT_TO_INT", fp_to_sint >; -class INT_TO_FLT_Common <bits<11> inst> : R600_1OP < - inst, "INT_TO_FLT", - [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))] +class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_I32 < + inst, "INT_TO_FLT", sint_to_fp >; -class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP < - inst, "FLT_TO_UINT", - [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))] +class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_F32 < + inst, "FLT_TO_UINT", fp_to_uint >; -class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP < - inst, "UINT_TO_FLT", - [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))] +class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_I32 < + inst, "UINT_TO_FLT", uint_to_fp >; class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < - inst, "LOG_CLAMPED", - [] + inst, "LOG_CLAMPED", [] >; -class LOG_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "LOG_IEEE", - [(set R600_Reg32:$dst, (flog2 R600_Reg32:$src))] +class LOG_IEEE_Common <bits<11> inst> : R600_1OP_F32 < + inst, "LOG_IEEE", flog2 >; class LSHL_Common <bits<11> inst> : R600_2OP < @@ -810,28 +944,23 @@ class MULLO_UINT_Common <bits<11> inst> : R600_2OP < >; class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < - inst, "RECIP_CLAMPED", - [] + inst, "RECIP_CLAMPED", [] >; -class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "RECIP_IEEE", - [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))] +class RECIP_IEEE_Common <bits<11> inst> : R600_1OP_F32 < + inst, "RECIP_IEEE", int_AMDGPU_rcp >; -class RECIP_UINT_Common <bits<11> inst> : R600_1OP < - inst, "RECIP_INT $dst, $src", - [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))] +class RECIP_UINT_Common <bits<11> inst> : R600_1OP_I32 < + inst, "RECIP_UINT", AMDGPUurecip >; -class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP < - inst, "RECIPSQRT_CLAMPED", - [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))] +class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_F32 < + inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq >; class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "RECIPSQRT_IEEE", - [] + inst, "RECIPSQRT_IEEE", [] >; class SIN_Common <bits<11> inst> : R600_1OP < @@ -1037,11 +1166,11 @@ let Predicates = [isEGorCayman] in { // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, // which do not need to be truncated since the fp values are 0.0f or 1.0f. // We should look into handling these cases separately. - def : Pat<(fp_to_sint R600_Reg32:$src), - (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>; + def : Pat<(fp_to_sint ALUSrcOp2Pat<f32>:$src0), + (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>; - def : Pat<(fp_to_uint R600_Reg32:$src), - (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>; + def : Pat<(fp_to_uint ALUSrcOp2Pat<f32>:$src0), + (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>; def : Pat<(fsqrt R600_Reg32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>; @@ -1286,6 +1415,26 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, } +//===----------------------------------------------------------------------===// +// Frame loads and stores +//===----------------------------------------------------------------------===// + +let usesCustomInserter = 1, isPseudo = 1, isCodeGenOnly =1 in { + +def FrameLoad_i32 : InstR600 <0x0, + (outs R600_Reg32:$dst), (ins FRAMEri:$addr), + "FrameLoad_i32 $dst, $addr", + [(set (i32 R600_Reg32:$dst), (load ADDRFrame:$addr))], NullALU +>; + +def FrameStore_i32 : InstR600 <0x0, + (outs), (ins R600_Reg32:$val, FRAMEri:$addr), + "FrameStore_i32 $val, $addr", + [(store (i32 R600_Reg32:$val), ADDRFrame:$addr)], NullALU +>; + +} // End usesCustomInserter = 1, isPseudo = 1, isCodeGenOnly = 1 + let Predicates = [isCayman] in { let isVector = 1 in { |