diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2012-11-08 19:29:25 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2012-11-08 22:02:19 +0000 |
commit | 1fb06f0f5cb6ce924f6aaa3912d011b07537c1c9 (patch) | |
tree | fe42261a625cc657509763ab20afd8af89be1360 | |
parent | 4be03c0e06315d4537a26e21da2e988adcba941c (diff) |
R600: Support for indirect addressingindirect-wip-2
-rw-r--r-- | lib/Target/AMDGPU/AMDGPU.h | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 24 | ||||
-rw-r--r-- | lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 5 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp | 146 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Defines.h | 3 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600ISelLowering.cpp | 107 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600ISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600InstrInfo.cpp | 71 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600InstrInfo.h | 12 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600Instructions.td | 59 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600MachineFunctionInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600MachineFunctionInfo.h | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600RegisterInfo.cpp | 13 | ||||
-rw-r--r-- | lib/Target/AMDGPU/R600RegisterInfo.td | 18 |
17 files changed, 464 insertions, 8 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index c722d0455e7..9897e0d6478 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -20,6 +20,7 @@ class FunctionPass; class AMDGPUTargetMachine; // R600 Passes +FunctionPass* createR600AllocateMemoryRegsPass(TargetMachine &tm); FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 57dcaac19a0..730744914a1 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -347,5 +347,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const NODE_NAME_CASE(INTERP) NODE_NAME_CASE(INTERP_P0) NODE_NAME_CASE(EXPORT) + NODE_NAME_CASE(REGISTER_LOAD) + NODE_NAME_CASE(REGISTER_STORE) } } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 58d2287a348..60de190e9a7 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -122,6 +122,8 @@ enum INTERP, INTERP_P0, EXPORT, + REGISTER_LOAD, + REGISTER_STORE, LAST_AMDGPU_ISD_NUMBER }; diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index dd4b7333000..5210849bf1f 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -115,6 +115,10 @@ bool AMDGPUPassConfig::addPreRegAlloc() { } bool AMDGPUPassConfig::addPostRegAlloc() { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + addPass(createR600AllocateMemoryRegsPass(*TM)); + } return false; } diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp index 807113134d2..9a1483b1c75 100644 --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp @@ -70,6 +70,7 @@ private: bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" @@ -97,6 +98,8 @@ SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); } + + bool AMDGPUDAGToDAGISel::SelectADDRParam( SDValue Addr, SDValue& R1, SDValue& R2) { @@ -164,7 +167,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int FI = FIN->getIndex(); EVT OpVT = N->getValueType(0); unsigned int NewOpc = AMDGPU::COPY; - SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); + SDValue TFI = CurDAG->getRegister(AMDGPU::T0_X, MVT::i32); return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI); } } @@ -393,3 +396,22 @@ bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, return true; } + +bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, + SDValue &Offset) { + ConstantSDNode *C; + + if ((C = dyn_cast<ConstantSDNode>(Addr))) { + Base = CurDAG->getRegister(AMDGPU::ZERO, MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && + (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + } else { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + } + + return true; +} diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index fe2032ea0ef..e96552f7fe9 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -91,10 +91,7 @@ void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.getImm() != 0) { - O << " + " + Op.getImm(); - } + printIfSet(MI, OpNo, O, "+"); } void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp new file mode 100644 index 00000000000..a5b3c688e9a --- /dev/null +++ b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp @@ -0,0 +1,146 @@ +//===-- R600AllocateMemoryRegs.cpp - Indirect Adressing Support -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Instruction can use indirect addressing to index the register file as if it +// were memory. In order to make this work correctly we need add all registers +// that might be used for indirect addressing to the LiveIn lists of basic +// blocks and also add them as implicit uses for instructions that do +// indirect reads. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + +class R600AllocateMemoryRegsPass : public MachineFunctionPass { + +private: + static char ID; + const R600InstrInfo *TII; + +public: + R600AllocateMemoryRegsPass(TargetMachine &tm) : + MachineFunctionPass(ID), + TII(static_cast<const R600InstrInfo*>(tm.getInstrInfo())) + { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "R600 Handle indirect addressing"; } + +}; + +} // End anonymous namespace + +char R600AllocateMemoryRegsPass::ID = 0; + +FunctionPass *llvm::createR600AllocateMemoryRegsPass(TargetMachine &tm) { + return new R600AllocateMemoryRegsPass(tm); +} + +bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) { + + std::vector<unsigned> IndirectRegs = TII->getIndirectReservedRegs(MF); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned IndirectRegOffset = TII->getIndirectIndexBegin(MF); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next) { + Next = llvm::next(I); + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: continue; + case AMDGPU::RegisterStore_i32: + case AMDGPU::RegisterStore_f32: + { + int64_t Offset = (MI.getOperand(2).getImm() * 4) + + MI.getOperand(3).getImm() + + (IndirectRegOffset * 4); + unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + + MFI->IndirectChannels.set(MI.getOperand(3).getImm()); + + if (MI.getOperand(1).getReg() == AMDGPU::ZERO) { + MFI->ReservedRegs.push_back(DstReg); + TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, DstReg, + MI.getOperand(0).getReg()); + } else { + MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I, + AMDGPU::MOVA_INT_eg, + AMDGPU::AR_X, + MI.getOperand(1).getReg()); + TII->setImmOperand(MOVA, R600Operands::WRITE, 0); + unsigned OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset); + MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I, + AMDGPU::StackMOV, OffsetReg, + MI.getOperand(0).getReg()); + MachineInstr *NewMI = MIBuilder.addReg(AMDGPU::AR_X, RegState::Implicit); + TII->setImmOperand(NewMI, R600Operands::DST_REL, 1); + } + break; + } + + case AMDGPU::RegisterLoad_i32: + case AMDGPU::RegisterLoad_f32: + { + unsigned Channel = MI.getOperand(3).getImm(); + unsigned Offset = (MI.getOperand(2).getImm() * 4) + Channel + + (IndirectRegOffset * 4); + unsigned OffsetReg; + + if (MI.getOperand(1).getReg() == AMDGPU::ZERO) { + OffsetReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset); + TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV, + MI.getOperand(0).getReg(), + OffsetReg); + } else { + R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>(); + MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I, + AMDGPU::MOVA_INT_eg, + AMDGPU::AR_X, + MI.getOperand(1).getReg()); + TII->setImmOperand(MOVA, R600Operands::WRITE, 0); + OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset); + MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I, + AMDGPU::MOV, MI.getOperand(0).getReg(), + OffsetReg); + for (std::vector<unsigned>::iterator RRI = MFI->ReservedRegs.begin(), + RRE = MFI->ReservedRegs.end(); + RRE != RRI; ++RRI) { + unsigned Reg = *RRI; + if (GET_REG_CHAN(Reg) == Channel) { + MIBuilder.addReg(Reg, RegState::Implicit); + } + } + MachineInstr *NewMI = MIBuilder.addReg(AMDGPU::AR_X, RegState::Implicit); + TII->setImmOperand(NewMI, R600Operands::SRC0_REL, 1); + } + break; + } + } + MI.eraseFromParent(); + } + } + + return false; +} + diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h index 4a0e238b849..108abc4e7bb 100644 --- a/lib/Target/AMDGPU/R600Defines.h +++ b/lib/Target/AMDGPU/R600Defines.h @@ -48,6 +48,9 @@ namespace R600_InstFlag { #define HW_REG_MASK 0x1ff #define HW_CHAN_SHIFT 9 +#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT) +#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK) + namespace R600Operands { enum Ops { DST, diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 75a2a90b31a..884eb304f7b 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -67,6 +67,19 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); + // Legalize loads and stores to the private address space. + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4f32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom); + setOperationAction(ISD::STORE, MVT::i8, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::v4f32, Custom); + + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + setTargetDAGCombine(ISD::FP_ROUND); setSchedulingPreference(Sched::VLIW); @@ -110,7 +123,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( TII->addFlag(NewMI, 0, MO_FLAG_NEG); break; } - case AMDGPU::R600_LOAD_CONST: { int64_t RegIndex = MI->getOperand(1).getImm(); @@ -319,7 +331,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); + case ISD::FrameIndex: return DAG.getConstant(0, MVT::i32); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); unsigned IntrinsicID = @@ -763,6 +778,96 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const return Cond; } +SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const +{ + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + LoadSDNode *LoadNode = cast<LoadSDNode>(Op); + SDValue Chain = Op.getOperand(0); + SDValue Ptr = Op.getOperand(1); + SDValue LoweredLoad; + + if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + return SDValue(); + } + + if (VT.isVector()) { + EVT ElemVT = VT.getVectorElementType(); + SDValue Loads[4]; + // LLVM generates byte-addresing pointers, but we need to convert this to a + // register index. Each register holds 16 bytes (4 x 32), so in order to + // get the register index, we need to divide the pointer by 16. + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, MVT::i32)); + + for (unsigned i = 0; i < 4; ++i) { + Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, + Chain, Ptr, + DAG.getTargetConstant(i, MVT::i32), // Channel + Op.getOperand(2)); + } + LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4); + } else { + LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, + Chain, Ptr, + DAG.getTargetConstant(0, MVT::i32), // Channel + Op.getOperand(2)); + } + + SDValue Ops[2]; + Ops[0] = LoweredLoad; + Ops[1] = Chain; + + return DAG.getMergeValues(Ops, 2, DL); +} + +SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + StoreSDNode *StoreNode = cast<StoreSDNode>(Op); + SDValue Chain = Op.getOperand(0); + SDValue Value = Op.getOperand(1); + SDValue Ptr = Op.getOperand(2); + EVT VT = Value.getValueType(); + R600MachineFunctionInfo *MFI = + DAG.getMachineFunction().getInfo<R600MachineFunctionInfo>(); + + if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + return SDValue(); + } + + if (VT.isVector()) { + EVT ElemVT = VT.getVectorElementType(); + SDValue Stores[4]; + + // LLVM generates byte-addresing pointers, but we need to convert this to a + // register index. Each register holds 16 bytes (4 x 32), so in order to + // get the register index, we need to divide the pointer by 16. + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, MVT::i32)); + + for (unsigned i = 0; i < 4; ++i) { + SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, + Value, DAG.getConstant(i, MVT::i32)); + + Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, + Chain, Elem, Ptr, + DAG.getTargetConstant(i, MVT::i32)); // Channel + MFI->IndirectChannels.set(i); + } + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, 4); + } else { + if (VT == MVT::i8) { + Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); + } + Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, + DAG.getTargetConstant(0, MVT::i32)); // Channel + MFI->IndirectChannels.set(0); + } + + return Chain; +} + SDValue R600TargetLowering::LowerFPOW(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index d1dfe9fe45b..a2d7934287d 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -60,6 +60,8 @@ private: SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 7c5b19ed3ca..cb232edce6a 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -15,8 +15,12 @@ #include "AMDGPUTargetMachine.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" +#include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Instructions.h" #include "AMDILUtilityFunctions.h" #define GET_INSTRINFO_CTOR @@ -482,6 +486,73 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return 2; } +unsigned R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const +{ + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + unsigned Offset = 0; + + if (MRI.livein_empty() && MFI->ReservedRegs.empty()) { + return 0; + } + + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + LE = MRI.livein_end(); + LI != LE; ++LI) { + Offset = std::max(Offset, + (unsigned)GET_REG_INDEX(RI.getEncodingValue(LI->first))); + } + + for (std::vector<unsigned>::const_iterator RRI = MFI->ReservedRegs.begin(), + RRE = MFI->ReservedRegs.end(); + RRI != RRE; ++RRI) { + Offset = std::max(Offset, + (unsigned GET_REG_INDEX(RI.getEncodingValue(*RRI)))); + } + + return Offset + 1; +} + +unsigned R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const +{ + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Variable sized objects are not supported + assert(!MFI->hasVarSizedObjects()); + + // Only one stack object is supported at the moment + assert(MFI->getNumObjects() <= 1); + + if (MFI->getNumObjects() == 0) { + return 0; + } + unsigned StackObject = MFI->getObjectIndexBegin(); + const AllocaInst *Alloca = MFI->getObjectAllocation(StackObject); + const ConstantInt *Size = dyn_cast<ConstantInt>(Alloca->getArraySize()); + assert(Size); + + return getIndirectIndexBegin(MF) + Size->getZExtValue(); +} + +std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs( + const MachineFunction &MF) const +{ + const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + unsigned End = getIndirectIndexEnd(MF); + + std::vector<unsigned> Regs; + + for (unsigned Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { + unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); + Regs.push_back(SuperReg); + for (unsigned Chan = 0; Chan < 4; ++Chan) { + unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); + Regs.push_back(Reg); + } + } + return Regs; +} + MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h index cec1c3bd38a..dd46a27082e 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.h +++ b/lib/Target/AMDGPU/R600InstrInfo.h @@ -110,6 +110,18 @@ namespace llvm { virtual int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const { return 1;} + /// getIndirectIndexBegin - return the smallest register index that will + /// be accessed by an indirect read or write. + unsigned getIndirectIndexBegin(const MachineFunction &MF) const; + + /// getIndirectIndexEnd - return the largest register index that will be + /// accessed by an indirect read or write. + unsigned getIndirectIndexEnd(const MachineFunction &MF) const; + + /// getIndirectReservedRegs - return a list of all the registers that may be + /// accesed using indirect addressing. + std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const; + ///buildDefaultInstruction - This function returns a MachineInstr with /// all the instruction modifiers initialized to their default values. /// You can use this function to avoid manually specifying each instruction diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index c8cf0123db5..d081824f418 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -86,9 +86,14 @@ def UP : InstFlag <"printUpdatePred">; // default to 0. def LAST : InstFlag<"printLast", 1>; +def FRAMEri : Operand<iPTR> { + let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); +} + def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; +def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; class R600ALU_Word0 { field bits<32> Word0; @@ -417,9 +422,17 @@ def isR600toCayman : Predicate< "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; //===----------------------------------------------------------------------===// -// Interpolation Instructions +// R600 SDNodes //===----------------------------------------------------------------------===// +def REGISTER_LOAD : SDNode<"AMDGPUISD::REGISTER_LOAD", + SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, + [SDNPHasChain, SDNPMayLoad]>; + +def REGISTER_STORE : SDNode<"AMDGPUISD::REGISTER_STORE", + SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, + [SDNPHasChain, SDNPMayStore]>; + def INTERP: SDNode<"AMDGPUISD::INTERP", SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]> >; @@ -428,6 +441,10 @@ def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> >; +//===----------------------------------------------------------------------===// +// Interpolation Instructions +//===----------------------------------------------------------------------===// + let usesCustomInserter = 1 in { def input_perspective : AMDGPUShaderInst < (outs R600_Reg128:$dst), @@ -1076,6 +1093,10 @@ let Predicates = [isEGorCayman] in { defm DOT4_eg : DOT4_Common<0xBE>; defm CUBE_eg : CUBE_Common<0xC0>; +let hasSideEffects = 1 in { + def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>; +} + def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { @@ -1358,6 +1379,42 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, } +//===----------------------------------------------------------------------===// +// Regist loads and stores - for indirect addressing +//===----------------------------------------------------------------------===// + +let isPseudo = 1, isCodeGenOnly =1 in { + +class RegisterLoad <ValueType vt> : InstR600 <0x0, + (outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan), + "RegisterLoad $dst, $addr", + [(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr, + (i32 timm:$chan)))], + NullALU +>; + +class RegisterStore <ValueType vt> : InstR600 <0x0, + (outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan), + "RegisterStore_i32 $val, $addr", + [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 timm:$chan))], + NullALU +>; + + +} // End usesCustomInserter = 1, isPseudo = 1, isCodeGenOnly = 1 + +def RegisterLoad_i32 : RegisterLoad<i32>; +def RegisterLoad_f32 : RegisterLoad<f32>; + +def RegisterStore_i32 : RegisterStore<i32>; +def RegisterStore_f32 : RegisterStore<f32>; + +let hasSideEffects = 1 in { + +def StackMOV : R600_1OP <0x19, "MOV", []>; + +} // End hasSideEffects = 1 + let Predicates = [isCayman] in { let isVector = 1 in { diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp index 49e662f3401..8c58330f378 100644 --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp @@ -13,6 +13,7 @@ using namespace llvm; R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) : MachineFunctionInfo(), + IndirectChannels(4), HasLinearInterpolation(false), HasPerspectiveInterpolation(false) { diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h index 9f01379caf2..0c6b40d0f94 100644 --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h @@ -15,6 +15,7 @@ #ifndef R600MACHINEFUNCTIONINFO_H #define R600MACHINEFUNCTIONINFO_H +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include <vector> @@ -27,6 +28,7 @@ public: R600MachineFunctionInfo(const MachineFunction &MF); std::vector<unsigned> ReservedRegs; SDNode *Outputs[16]; + BitVector IndirectChannels; bool HasLinearInterpolation; bool HasPerspectiveInterpolation; diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp index ef151834807..2d582cb6a26 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -14,6 +14,7 @@ #include "R600RegisterInfo.h" #include "AMDGPUTargetMachine.h" #include "R600Defines.h" +#include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" using namespace llvm; @@ -48,11 +49,23 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const Reserved.set(*I); } + for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(), + E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) { + Reserved.set(*I); + } + for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(), E = MFI->ReservedRegs.end(); I != E; ++I) { Reserved.set(*I); } + const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(&TII); + std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF); + for (std::vector<unsigned>::iterator I = IndirectRegs.begin(), + E = IndirectRegs.end(); + I != E; ++I) { + Reserved.set(*I); + } return Reserved; } diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td index d3d6d25d292..3b218253659 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.td +++ b/lib/Target/AMDGPU/R600RegisterInfo.td @@ -31,6 +31,10 @@ foreach Index = 0-127 in { // 32-bit Constant Registers (There are more than 128, this the number // that is currently supported. def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>; + + // Indirect addressing offset registers + def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan, + Index, Chan>; } // 128-bit Temporary Registers def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW", @@ -46,7 +50,6 @@ foreach Index = 448-464 in { def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; } - // Special Registers def ZERO : R600Reg<"0.0", 248>; @@ -61,10 +64,22 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>; def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; +def AR_X : R600Reg<"AR.x", 0>; def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "ArrayBase%u", 448, 464))>; +let isAllocatable = 0 in { + +def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, + (add (interleave + (interleave (sequence "Addr%u_X", 0, 127), + (sequence "Addr%u_Z", 0, 127)), + (interleave (sequence "Addr%u_Y", 0, 127), + (sequence "Addr%u_W", 0, 127))))>; + +} // End isAllocatable = 0 + def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add (interleave (interleave (sequence "C%u_X", 0, 127), @@ -93,6 +108,7 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add R600_TReg32, R600_CReg32, R600_ArrayBase, + R600_Addr, ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add |