summaryrefslogtreecommitdiff
path: root/lib/Target/R600/R600ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/R600ISelLowering.cpp')
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp86
1 files changed, 66 insertions, 20 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 21a2b0dd17..bb0f6aab14 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -505,6 +505,25 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
+static void getVector2OpArgs(SDValue *Arg, SelectionDAG &DAG,
+ SDValue SlotX0, SDValue SlotY0, SDValue SlotZ0, SDValue SlotW0,
+ SDValue SlotX1, SDValue SlotY1, SDValue SlotZ1, SDValue SlotW1) {
+ for (unsigned i = 0; i < 70; i++)
+ Arg[i] = DAG.getTargetConstant(0, MVT::i32);
+ // WriteMask
+ Arg[2] = Arg[19] = Arg[36] = Arg[53] = DAG.getTargetConstant(1, MVT::i32);
+ Arg[16] = Arg[33] = Arg[50] = Arg[67] =
+ DAG.getRegister(AMDGPU::PRED_SEL_OFF, MVT::i32);
+ Arg[6] = SlotX0;
+ Arg[11] = SlotX1;
+ Arg[23] = SlotY0;
+ Arg[28] = SlotY1;
+ Arg[40] = SlotZ0;
+ Arg[45] = SlotZ1;
+ Arg[57] = SlotW0;
+ Arg[62] = SlotW1;
+}
+
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
@@ -692,25 +711,52 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
}
case AMDGPUIntrinsic::AMDGPU_dp4: {
- SDValue Args[8] = {
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(0, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(0, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(1, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(1, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(2, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(2, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(3, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(3, MVT::i32))
- };
- return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
+ SDValue Op0 = Op.getOperand(1), Op1 = Op.getOperand(2);
+ SDValue Args[70];
+ getVector2OpArgs(Args, DAG,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(0, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(1, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(3, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op1), MVT::f32, Op1,
+ DAG.getConstant(0, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op1), MVT::f32, Op1,
+ DAG.getConstant(1, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op1), MVT::f32, Op1,
+ DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op1), MVT::f32, Op1,
+ DAG.getConstant(3, MVT::i32)));
+ MachineSDNode *N = DAG.getMachineNode(AMDGPU::DOT4, DL, MVT::f32, Args);
+ return SDValue(N, 0);
+ }
+ case AMDGPUIntrinsic::AMDGPU_cube: {
+ SDValue Op0 = Op.getOperand(1);
+ SDValue Args[70];
+ getVector2OpArgs(Args, DAG,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(0, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(1, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(1, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(0, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, Op0,
+ DAG.getConstant(2, MVT::i32)));
+ MachineSDNode *N = DAG.getMachineNode(AMDGPU::CUBE, DL,
+ MVT::f32, MVT::f32, MVT::f32, MVT::f32, Args);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, SDValue(N, 0),
+ SDValue(N, 1), SDValue(N, 2), SDValue(N, 3));
}
case Intrinsic::r600_read_ngroups_x:
@@ -1854,7 +1900,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
I != E; ++I)
Ops.push_back(*I);
- if (Opcode == AMDGPU::DOT_4) {
+ if (TII->uses4Slots(Opcode)) {
int OperandIdx[] = {
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),