diff options
Diffstat (limited to 'lib/Target/AMDGPU/R600ISelLowering.cpp')
-rw-r--r-- | lib/Target/AMDGPU/R600ISelLowering.cpp | 127 |
1 files changed, 126 insertions, 1 deletions
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 38d68f4d9f8..fb59d1b4d8b 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -67,6 +67,19 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); + // Legalize loads and stores to the private address space. + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4f32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom); + setOperationAction(ISD::STORE, MVT::i8, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::v4f32, Custom); + + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + setTargetDAGCombine(ISD::FP_ROUND); setSchedulingPreference(Sched::VLIW); @@ -110,7 +123,39 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( TII->addFlag(NewMI, 0, MO_FLAG_NEG); break; } - + case AMDGPU::RegisterLoad_i32: + case AMDGPU::RegisterLoad_f32: + { + unsigned OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(MI->getOperand(2).getImm() + MI->getOperand(3).getImm()); + unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::R600_TReg32RegClass); + MachineInstr *LSHR = TII->buildDefaultInstruction(*BB, I, AMDGPU::LSHR_eg, Tmp, MI->getOperand(1).getReg(), AMDGPU::ALU_LITERAL_X); + LSHR->getOperand(TII->getOperandIdx(*LSHR, R600Operands::IMM)).setImm(2); + MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, + Tmp); + MOVA->getOperand(TII->getOperandIdx(*MOVA, R600Operands::WRITE)).setImm(0); + MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, MI->getOperand(0).getReg(), + OffsetReg); + MachineInstr *NewMI = MIBuilder.addReg(AMDGPU::AR_X, RegState::Implicit); + + NewMI->getOperand(TII->getOperandIdx(*NewMI, R600Operands::SRC0_REL)).setImm(1); + break; + } + case AMDGPU::RegisterStore_i32: + case AMDGPU::RegisterStore_f32: + { + R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); + if (MI->getOperand(1).getReg() == AMDGPU::SP) { + int64_t StackIndex = MI->getOperand(2).getImm() + + MI->getOperand(3).getImm(); + unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(StackIndex); + MFI->ReservedRegs.push_back(DstReg); + TII->buildDefaultInstruction(*BB, I, AMDGPU::StackMOV, DstReg, + MI->getOperand(0).getReg()); + } else { + assert(!"Relative dst not supported."); + } + break; + } case AMDGPU::R600_LOAD_CONST: { int64_t RegIndex = MI->getOperand(1).getImm(); @@ -300,7 +345,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); + case ISD::FrameIndex: return DAG.getConstant(0, MVT::i32); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); unsigned IntrinsicID = @@ -697,6 +745,83 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const return Cond; } +SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const +{ + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + LoadSDNode *LoadNode = cast<LoadSDNode>(Op); + SDValue Chain = Op.getOperand(0); + SDValue Ptr = Op.getOperand(1); + SDValue LoweredLoad; + + if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + return SDValue(); + } + + if (VT.isVector()) { + EVT ElemVT = VT.getVectorElementType(); + SDValue Loads[4]; + + for (unsigned i = 0; i < 4; ++i) { + Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, + Chain, Ptr, + DAG.getTargetConstant(i, MVT::i32)); // Channel + } + LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4); + } else { + LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, + Chain, Ptr, + DAG.getTargetConstant(0, MVT::i32)); // Channel + } + + SDValue Ops[2]; + Ops[0] = LoweredLoad; + Ops[1] = Chain; + + return DAG.getMergeValues(Ops, 2, DL); +} + +SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + StoreSDNode *StoreNode = cast<StoreSDNode>(Op); + SDValue Chain = Op.getOperand(0); + SDValue Value = Op.getOperand(1); + SDValue Ptr = Op.getOperand(2); + EVT VT = Value.getValueType(); + + if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + return SDValue(); + } + + if (VT.isVector()) { + EVT ElemVT = VT.getVectorElementType(); + SDValue Stores[4]; + + // XXX: I'm not sure how to explain this. + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(2, MVT::i32)); + + for (unsigned i = 0; i < 4; ++i) { + SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, + Value, DAG.getConstant(i, MVT::i32)); + + Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, + Chain, Elem, Ptr, + DAG.getTargetConstant(i, MVT::i32)); // Channel + } + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, 4); + } else { + if (VT == MVT::i8) { + Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); + } + Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, + DAG.getTargetConstant(0, MVT::i32)); // Channel + } + + return Chain; +} + SDValue R600TargetLowering::LowerFPOW(SDValue Op, SelectionDAG &DAG) const { |