summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/R600ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/R600ISelLowering.cpp')
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp127
1 files changed, 126 insertions, 1 deletions
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 38d68f4d9f8..fb59d1b4d8b 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -67,6 +67,19 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ // Legalize loads and stores to the private address space.
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
+ setOperationAction(ISD::STORE, MVT::i8, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
setTargetDAGCombine(ISD::FP_ROUND);
setSchedulingPreference(Sched::VLIW);
@@ -110,7 +123,39 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
TII->addFlag(NewMI, 0, MO_FLAG_NEG);
break;
}
-
+ case AMDGPU::RegisterLoad_i32:
+ case AMDGPU::RegisterLoad_f32:
+ {
+ unsigned OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(MI->getOperand(2).getImm() + MI->getOperand(3).getImm());
+ unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::R600_TReg32RegClass);
+ MachineInstr *LSHR = TII->buildDefaultInstruction(*BB, I, AMDGPU::LSHR_eg, Tmp, MI->getOperand(1).getReg(), AMDGPU::ALU_LITERAL_X);
+ LSHR->getOperand(TII->getOperandIdx(*LSHR, R600Operands::IMM)).setImm(2);
+ MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X,
+ Tmp);
+ MOVA->getOperand(TII->getOperandIdx(*MOVA, R600Operands::WRITE)).setImm(0);
+ MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, MI->getOperand(0).getReg(),
+ OffsetReg);
+ MachineInstr *NewMI = MIBuilder.addReg(AMDGPU::AR_X, RegState::Implicit);
+
+ NewMI->getOperand(TII->getOperandIdx(*NewMI, R600Operands::SRC0_REL)).setImm(1);
+ break;
+ }
+ case AMDGPU::RegisterStore_i32:
+ case AMDGPU::RegisterStore_f32:
+ {
+ R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
+ if (MI->getOperand(1).getReg() == AMDGPU::SP) {
+ int64_t StackIndex = MI->getOperand(2).getImm() +
+ MI->getOperand(3).getImm();
+ unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(StackIndex);
+ MFI->ReservedRegs.push_back(DstReg);
+ TII->buildDefaultInstruction(*BB, I, AMDGPU::StackMOV, DstReg,
+ MI->getOperand(0).getReg());
+ } else {
+ assert(!"Relative dst not supported.");
+ }
+ break;
+ }
case AMDGPU::R600_LOAD_CONST:
{
int64_t RegIndex = MI->getOperand(1).getImm();
@@ -300,7 +345,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::FPOW: return LowerFPOW(Op, DAG);
+ case ISD::FrameIndex: return DAG.getConstant(0, MVT::i32);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID =
@@ -697,6 +745,83 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
return Cond;
}
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+
+ if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return SDValue();
+ }
+
+ if (VT.isVector()) {
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Loads[4];
+
+ for (unsigned i = 0; i < 4; ++i) {
+ Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
+ Chain, Ptr,
+ DAG.getTargetConstant(i, MVT::i32)); // Channel
+ }
+ LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4);
+ } else {
+ LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
+ Chain, Ptr,
+ DAG.getTargetConstant(0, MVT::i32)); // Channel
+ }
+
+ SDValue Ops[2];
+ Ops[0] = LoweredLoad;
+ Ops[1] = Chain;
+
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Value = Op.getOperand(1);
+ SDValue Ptr = Op.getOperand(2);
+ EVT VT = Value.getValueType();
+
+ if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return SDValue();
+ }
+
+ if (VT.isVector()) {
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Stores[4];
+
+ // XXX: I'm not sure how to explain this.
+ Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(2, MVT::i32));
+
+ for (unsigned i = 0; i < 4; ++i) {
+ SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
+ Value, DAG.getConstant(i, MVT::i32));
+
+ Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+ Chain, Elem, Ptr,
+ DAG.getTargetConstant(i, MVT::i32)); // Channel
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, 4);
+ } else {
+ if (VT == MVT::i8) {
+ Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
+ }
+ Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
+ DAG.getTargetConstant(0, MVT::i32)); // Channel
+ }
+
+ return Chain;
+}
+
SDValue R600TargetLowering::LowerFPOW(SDValue Op,
SelectionDAG &DAG) const
{