diff options
Diffstat (limited to 'lib/Target/R600/R600ISelLowering.cpp')
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 86 |
1 files changed, 66 insertions, 20 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 21a2b0dd17..bb0f6aab14 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -505,6 +505,25 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// +static void getVector2OpArgs(SDValue *Arg, SelectionDAG &DAG, + SDValue SlotX0, SDValue SlotY0, SDValue SlotZ0, SDValue SlotW0, + SDValue SlotX1, SDValue SlotY1, SDValue SlotZ1, SDValue SlotW1) { + for (unsigned i = 0; i < 70; i++) + Arg[i] = DAG.getTargetConstant(0, MVT::i32); + // WriteMask + Arg[2] = Arg[19] = Arg[36] = Arg[53] = DAG.getTargetConstant(1, MVT::i32); + Arg[16] = Arg[33] = Arg[50] = Arg[67] = + DAG.getRegister(AMDGPU::PRED_SEL_OFF, MVT::i32); + Arg[6] = SlotX0; + Arg[11] = SlotX1; + Arg[23] = SlotY0; + Arg[28] = SlotY1; + Arg[40] = SlotZ0; + Arg[45] = SlotZ1; + Arg[57] = SlotW0; + Arg[62] = SlotW1; +} + SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); @@ -692,25 +711,52 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19); } case AMDGPUIntrinsic::AMDGPU_dp4: { - SDValue Args[8] = { - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), - DAG.getConstant(0, MVT::i32)), - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), - DAG.getConstant(0, MVT::i32)), - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), - DAG.getConstant(1, MVT::i32)), - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), - DAG.getConstant(1, MVT::i32)), - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), - DAG.getConstant(2, MVT::i32)), - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), - DAG.getConstant(2, MVT::i32)), - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), - DAG.getConstant(3, MVT::i32)), - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), - DAG.getConstant(3, MVT::i32)) - }; - return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8); + SDValue Op0 = Op.getOperand(1), Op1 = Op.getOperand(2); + SDValue Args[70]; + getVector2OpArgs(Args, DAG, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(0, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(1, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(2, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(3, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op1), MVT::f32, Op1, + DAG.getConstant(0, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op1), MVT::f32, Op1, + DAG.getConstant(1, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op1), MVT::f32, Op1, + DAG.getConstant(2, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op1), MVT::f32, Op1, + DAG.getConstant(3, MVT::i32))); + MachineSDNode *N = DAG.getMachineNode(AMDGPU::DOT4, DL, MVT::f32, Args); + return SDValue(N, 0); + } + case AMDGPUIntrinsic::AMDGPU_cube: { + SDValue Op0 = Op.getOperand(1); + SDValue Args[70]; + getVector2OpArgs(Args, DAG, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(2, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(2, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(0, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(1, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(1, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(0, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(2, MVT::i32)), + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0, + DAG.getConstant(2, MVT::i32))); + MachineSDNode *N = DAG.getMachineNode(AMDGPU::CUBE, DL, + MVT::f32, MVT::f32, MVT::f32, MVT::f32, Args); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, SDValue(N, 0), + SDValue(N, 1), SDValue(N, 2), SDValue(N, 3)); } case Intrinsic::r600_read_ngroups_x: @@ -1854,7 +1900,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, I != E; ++I) Ops.push_back(*I); - if (Opcode == AMDGPU::DOT_4) { + if (TII->uses4Slots(Opcode)) { int OperandIdx[] = { TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), |