summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-09-30 17:21:06 -0700
committerTom Stellard <thomas.stellard@amd.com>2013-10-10 11:25:06 -0700
commitcf5d9a2ac13e1270bf120ac20be4f2e23af6df65 (patch)
tree7f9e08d8b548587c36a7416666adbfdb3d20b1a7
parent79249ee208bf9e27e235c295c3f9d00dbba22a3f (diff)
R600/SI: Add support for private address space load/storer600-private-mem-fixes
Private address space is emulated using the register file with MOVRELS and MOVRELD instructions.
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp34
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp21
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h1
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp78
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h16
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td4
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h4
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp15
-rw-r--r--lib/Target/R600/R600ISelLowering.h1
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp52
-rw-r--r--lib/Target/R600/R600InstrInfo.h11
-rw-r--r--lib/Target/R600/R600Instructions.td1
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp16
-rw-r--r--lib/Target/R600/R600RegisterInfo.h2
-rw-r--r--lib/Target/R600/SIISelLowering.cpp106
-rw-r--r--lib/Target/R600/SIISelLowering.h2
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp2
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp66
-rw-r--r--lib/Target/R600/SIInstrInfo.h18
-rw-r--r--lib/Target/R600/SIInstrInfo.td4
-rw-r--r--lib/Target/R600/SIInstructions.td40
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp13
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp8
-rw-r--r--lib/Target/R600/SIRegisterInfo.h2
-rw-r--r--test/CodeGen/R600/private-memory.ll (renamed from test/CodeGen/R600/indirect-addressing.ll)37
-rw-r--r--test/CodeGen/R600/sra.ll2
26 files changed, 419 insertions, 137 deletions
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 88b375b0218..a9891350e57 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -309,6 +309,40 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
SDLoc(N), N->getValueType(0), Ops);
}
+ case AMDGPUISD::REGISTER_LOAD: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ break;
+ SDValue Addr, Offset;
+
+ SelectADDRIndirect(N->getOperand(1), Addr, Offset);
+ const SDValue Ops[] = {
+ Addr,
+ Offset,
+ CurDAG->getTargetConstant(0, MVT::i32),
+ N->getOperand(0),
+ };
+ return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
+ CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
+ Ops);
+ }
+ case AMDGPUISD::REGISTER_STORE: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ break;
+ SDValue Addr, Offset;
+ SelectADDRIndirect(N->getOperand(2), Addr, Offset);
+ const SDValue Ops[] = {
+ N->getOperand(1),
+ Addr,
+ Offset,
+ CurDAG->getTargetConstant(0, MVT::i32),
+ N->getOperand(0),
+ };
+ return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
+ CurDAG->getVTList(MVT::Other),
+ Ops);
+ }
}
return SelectCode(N);
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index f6c074a4cd5..f8f0596c5a0 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUFrameLowering.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDILIntrinsicInfo.h"
@@ -227,8 +228,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
// AMDGPU DAG lowering
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
}
return Op;
@@ -302,6 +303,21 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
&Args[0], Args.size());
}
+SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
+ assert(FIN);
+
+ unsigned FrameIndex = FIN->getIndex();
+ unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+ return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
+ Op.getValueType());
+}
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
@@ -539,7 +555,8 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
StoreSDNode *Store = cast<StoreSDNode>(Op);
- if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Store->getValue().getValueType().isVector()) {
return SplitVectorStore(Op, DAG);
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 8a68356c2e5..43f6389fac7 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -28,6 +28,7 @@ private:
void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Args,
unsigned Start, unsigned Count) const;
+ SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index 434c91a5231..333a8c1d348 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -120,31 +120,43 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
MachineBasicBlock *MBB = MI->getParent();
+ int OffsetOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::addr);
+ // addr is a custom operand with multiple MI operands, and only the
+ // first MI operand is given a name.
+ int RegOpIdx = OffsetOpIdx + 1;
+ int ChanOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::chan);
switch(MI->getOpcode()) {
default:
if (isRegisterLoad(*MI)) {
- unsigned RegIndex = MI->getOperand(2).getImm();
- unsigned Channel = MI->getOperand(3).getImm();
+ int DstOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
+ unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
- unsigned OffsetReg = MI->getOperand(1).getReg();
+ unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
- buildMovInstr(MBB, MI, MI->getOperand(0).getReg(),
+ buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
getIndirectAddrRegClass()->getRegister(Address));
} else {
- buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(),
+ buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
Address, OffsetReg);
}
} else if (isRegisterStore(*MI)) {
- unsigned RegIndex = MI->getOperand(2).getImm();
- unsigned Channel = MI->getOperand(3).getImm();
+ int ValOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::val);
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
+ unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
- unsigned OffsetReg = MI->getOperand(1).getReg();
+ unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
- MI->getOperand(0).getReg());
+ MI->getOperand(ValOpIdx).getReg());
} else {
- buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(),
+ buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(),
calculateIndirectAddress(RegIndex, Channel),
OffsetReg);
}
@@ -263,6 +275,52 @@ bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
}
+int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo*>(
+ MF.getTarget().getRegisterInfo());
+ int Offset = -1;
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ if (MRI.livein_empty()) {
+ return 0;
+ }
+
+ const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ LE = MRI.livein_end();
+ LI != LE; ++LI) {
+ unsigned Reg = LI->first;
+ if (TargetRegisterInfo::isVirtualRegister(Reg) ||
+ !IndirectRC->contains(Reg))
+ continue;
+
+ Offset = std::max(Offset, (int)TRI->getHWRegIndex(Reg));
+ }
+
+ return Offset + 1;
+}
+
+int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+ int Offset = 0;
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Variable sized objects are not supported
+ assert(!MFI->hasVarSizedObjects());
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+
+ return getIndirectIndexBegin(MF) + Offset;
+}
+
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const {
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index dc65d4e75f7..6378fdd1eb4 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -99,6 +99,14 @@ protected:
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const;
+ /// \returns the smallest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+ /// \returns the largest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
public:
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
@@ -144,14 +152,6 @@ public:
virtual unsigned getIEQOpcode() const = 0;
virtual bool isMov(unsigned opcode) const = 0;
- /// \returns the smallest register index that will be accessed by an indirect
- /// read or write or -1 if indirect addressing is not used by this program.
- virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
-
- /// \returns the largest register index that will be accessed by an indirect
- /// read or write or -1 if indirect addressing is not used by this program.
- virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
-
/// \brief Calculate the "Indirect Address" for the given \p RegIndex and
/// \p Channel
///
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 5778a8c2b23..33ad5ec5a37 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -35,6 +35,7 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
}
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
def COND_EQ : PatLeaf <
(cond),
@@ -277,6 +278,8 @@ class FNEG <RegisterClass rc> : AMDGPUShaderInst <
multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
ComplexPattern addrPat> {
+let UseNamedOperandTable = 1 in {
+
def RegisterLoad : AMDGPUShaderInst <
(outs dstClass:$dst),
(ins addrClass:$addr, i32imm:$chan),
@@ -295,6 +298,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
let isRegisterStore = 1;
}
}
+}
} // End isCodeGenOnly = 1, isPseudo = 1
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index 135d3dd0207..688e1a02c12 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -50,6 +50,10 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
assert(!"Unimplemented"); return NULL;
}
+ virtual unsigned getHWRegIndex(unsigned Reg) const {
+ assert(!"Unimplemented"); return 0;
+ }
+
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
unsigned getSubRegFromChannel(unsigned Channel) const;
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 62577eabf99..f231d47bd20 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -513,7 +513,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
@@ -820,20 +819,6 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
false, false, false, 0);
}
-SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
-
- FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
- assert(FIN);
-
- unsigned FrameIndex = FIN->getIndex();
- unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
- return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
-}
-
bool R600TargetLowering::isZero(SDValue Op) const {
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
return Cst->isNullValue();
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index 6457ad42ea6..c10257eeada 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -59,7 +59,6 @@ private:
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index a11d54a9f7d..aff11ce1b34 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -1024,67 +1024,25 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return 2;
}
-int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int Offset = 0;
-
- if (MFI->getNumObjects() == 0) {
- return -1;
- }
-
- if (MRI.livein_empty()) {
- return 0;
- }
-
- for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
- LE = MRI.livein_end();
- LI != LE; ++LI) {
- Offset = std::max(Offset,
- GET_REG_INDEX(RI.getEncodingValue(LI->first)));
- }
-
- return Offset + 1;
-}
-
-int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
- int Offset = 0;
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Variable sized objects are not supported
- assert(!MFI->hasVarSizedObjects());
-
- if (MFI->getNumObjects() == 0) {
- return -1;
- }
-
- Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
-
- return getIndirectIndexBegin(MF) + Offset;
-}
-
-std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
+void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
const AMDGPUFrameLowering *TFL =
static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
- std::vector<unsigned> Regs;
unsigned StackWidth = TFL->getStackWidth(MF);
int End = getIndirectIndexEnd(MF);
- if (End == -1) {
- return Regs;
- }
+ if (End == -1)
+ return;
for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
- Regs.push_back(SuperReg);
+ Reserved.set(SuperReg);
for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
- Regs.push_back(Reg);
+ Reserved.set(Reg);
}
}
- return Regs;
}
unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index d7438ef2771..e2996c7a78f 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -193,14 +193,9 @@ namespace llvm {
virtual int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const { return 1;}
- /// \returns a list of all the registers that may be accesed using indirect
- /// addressing.
- std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const;
-
- virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
-
- virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
-
+ /// \brief Reserve the registers that may be accesed using indirect addressing.
+ void reserveIndirectRegisters(BitVector &Reserved,
+ const MachineFunction &MF) const;
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index f0c061ec9d6..d940d45a4ac 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -75,7 +75,6 @@ def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
-def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index dd8f3ef9814..fbe333d2038 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -28,6 +28,8 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm)
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
Reserved.set(AMDGPU::ONE);
@@ -48,14 +50,8 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(*I);
}
- const R600InstrInfo *RII =
- static_cast<const R600InstrInfo*>(TM.getInstrInfo());
- std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
- for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
- E = IndirectRegs.end();
- I != E; ++I) {
- Reserved.set(*I);
- }
+ TII->reserveIndirectRegisters(Reserved, MF);
+
return Reserved;
}
@@ -73,6 +69,10 @@ unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
}
+unsigned R600RegisterInfo::getHWRegIndex(unsigned Reg) const {
+ return GET_REG_INDEX(getEncodingValue(Reg));
+}
+
const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
MVT VT) const {
switch(VT.SimpleTy) {
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index d458e557a4e..8833ee77e04 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -39,6 +39,8 @@ struct R600RegisterInfo : public AMDGPURegisterInfo {
/// \brief get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
+ virtual unsigned getHWRegIndex(unsigned Reg) const;
+
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index fb21f6e860c..89b3c7c7674 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -70,6 +70,19 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ // We need to custom lower loads/stores from private memory
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
+ setOperationAction(ISD::STORE, MVT::i128, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+
+
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@@ -89,12 +102,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
setTargetDAGCombine(ISD::SELECT_CC);
@@ -111,6 +126,8 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
bool *IsFast) const {
// XXX: This depends on the address space and also we may want to revist
// the alignment values we specify in the DataLayout.
+ if (!VT.isSimple() || VT == MVT::Other)
+ return false;
return VT.bitsGT(MVT::i32);
}
@@ -332,6 +349,19 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
+ case AMDGPU::SI_RegisterStorePseudo: {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
+ Reg);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ MI->eraseFromParent();
+ }
}
return BB;
}
@@ -377,7 +407,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
- if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Op.getValueType().isVector()) {
SDValue MergedValues[2] = {
SplitVectorLoad(Op, DAG),
@@ -385,11 +416,12 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
};
return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
} else {
- return SDValue();
+ return LowerLOAD(Op, DAG);
}
}
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: {
@@ -600,6 +632,30 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
return Chain;
}
+SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+
+ if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ return SDValue();
+
+ SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Load->getBasePtr(), DAG.getConstant(0, MVT::i32));
+ SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
+ DAG.getConstant(2, MVT::i32));
+
+ SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
+ Load->getChain(), Ptr,
+ DAG.getTargetConstant(0, MVT::i32),
+ Op.getOperand(2));
+ SDValue MergedValues[2] = {
+ Ret,
+ Load->getChain()
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+
+}
+
SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
SelectionDAG &DAG) const {
@@ -657,6 +713,52 @@ SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op,
return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi);
}
+SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ EVT VT = Store->getMemoryVT();
+ SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
+ if (Ret.getNode())
+ return Ret;
+
+ if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ return SDValue();
+
+ SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32);
+ SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue Chain = Store->getChain();
+ SmallVector<SDValue, 8> Values;
+
+ if (VT == MVT::i64) {
+ for (unsigned i = 0; i < 2; ++i) {
+ Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Store->getValue(), DAG.getConstant(i, MVT::i32)));
+ }
+ } else if (VT == MVT::i128) {
+ for (unsigned i = 0; i < 2; ++i) {
+ for (unsigned j = 0; j < 2; ++j) {
+ Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
+ Store->getValue(), DAG.getConstant(i, MVT::i32)),
+ DAG.getConstant(j, MVT::i32)));
+ }
+ }
+ } else {
+ Values.push_back(Store->getValue());
+ }
+
+ for (unsigned i = 0; i < Values.size(); ++i) {
+ SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32,
+ Ptr, DAG.getConstant(i, MVT::i32));
+ Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+ Chain, Values[i], PartPtr,
+ DAG.getTargetConstant(0, MVT::i32));
+ }
+ return Chain;
+}
+
+
SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 9c54a6f48aa..ecfea15e612 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -25,8 +25,10 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue Chain, unsigned Offset) const;
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 7e42fb777d4..7ef662eb65b 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -186,7 +186,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
- if (!Op.isReg())
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
return std::make_pair(0, 0);
unsigned Reg = Op.getReg();
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 4640d80fdfd..15ba7d4b25b 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -226,7 +226,8 @@ MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg,
unsigned SrcReg) const {
- assert(!"Not Implemented");
+ return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
+ DstReg) .addReg(SrcReg);
}
bool SIInstrInfo::isMov(unsigned Opcode) const {
@@ -595,17 +596,8 @@ unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
return RegIndex;
}
-
-int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
- llvm_unreachable("Unimplemented");
-}
-
-int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
- llvm_unreachable("Unimplemented");
-}
-
const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
- llvm_unreachable("Unimplemented");
+ return &AMDGPU::VReg_32RegClass;
}
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
@@ -613,7 +605,17 @@ MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
MachineBasicBlock::iterator I,
unsigned ValueReg,
unsigned Address, unsigned OffsetReg) const {
- llvm_unreachable("Unimplemented");
+ const DebugLoc &DL = MBB->findDebugLoc(I);
+ unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
+ getIndirectIndexBegin(*MBB->getParent()));
+
+ return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
+ .addReg(IndirectBaseReg, RegState::Define)
+ .addOperand(I->getOperand(0))
+ .addReg(IndirectBaseReg)
+ .addReg(OffsetReg)
+ .addImm(0)
+ .addReg(ValueReg);
}
MachineInstrBuilder SIInstrInfo::buildIndirectRead(
@@ -621,5 +623,43 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
MachineBasicBlock::iterator I,
unsigned ValueReg,
unsigned Address, unsigned OffsetReg) const {
- llvm_unreachable("Unimplemented");
+ const DebugLoc &DL = MBB->findDebugLoc(I);
+ unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
+ getIndirectIndexBegin(*MBB->getParent()));
+
+ return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
+ .addOperand(I->getOperand(0))
+ .addOperand(I->getOperand(1))
+ .addReg(IndirectBaseReg)
+ .addReg(OffsetReg)
+ .addImm(0);
+
+}
+
+void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
+ const MachineFunction &MF) const {
+ int End = getIndirectIndexEnd(MF);
+ int Begin = getIndirectIndexBegin(MF);
+
+ if (End == -1)
+ return;
+
+
+ for (int Index = Begin; Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Index - 1); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Index - 2); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Index - 3); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Index - 7); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Index - 15); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 31ceaf35f54..8bc53e65526 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -25,6 +25,14 @@ class SIInstrInfo : public AMDGPUInstrInfo {
private:
const SIRegisterInfo RI;
+ MachineInstrBuilder buildIndirectIndexLoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned OffsetVGPR,
+ unsigned MovRelOp,
+ unsigned Dst,
+ unsigned Src0) const;
+ // If you add or remove instructions from this function, you will
+
public:
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
@@ -58,9 +66,6 @@ public:
virtual bool verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const;
- virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
-
- virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
bool isSALUInstr(const MachineInstr &MI) const;
unsigned getVALUOp(const MachineInstr &MI) const;
@@ -115,7 +120,12 @@ public:
unsigned ValueReg,
unsigned Address,
unsigned OffsetReg) const;
- };
+ void reserveIndirectRegisters(BitVector &Reserved,
+ const MachineFunction &MF) const;
+
+ void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
+ unsigned SavReg, unsigned IndexReg) const;
+};
namespace AMDGPU {
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index b55f59d1618..4cd0daa55c5 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -121,6 +121,10 @@ class SGPRImm <dag frag> : PatLeaf<frag, [{
return false;
}]>;
+def FRAMEri64 : Operand<iPTR> {
+ let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index);
+}
+
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index a8b1d53deda..05af6e91b06 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1286,6 +1286,36 @@ def SI_KILL : InstSI <
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri64, ADDRIndirect>;
+
+let UseNamedOperandTable = 1 in {
+
+def SI_RegisterLoad : AMDGPUShaderInst <
+ (outs VReg_32:$dst, SReg_64:$temp),
+ (ins FRAMEri64:$addr, i32imm:$chan),
+ "", []
+> {
+ let isRegisterLoad = 1;
+ let mayLoad = 1;
+}
+
+class SIRegStore<dag outs> : AMDGPUShaderInst <
+ outs,
+ (ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan),
+ "", []
+> {
+ let isRegisterStore = 1;
+ let mayStore = 1;
+}
+
+let usesCustomInserter = 1 in {
+def SI_RegisterStorePseudo : SIRegStore<(outs)>;
+} // End usesCustomInserter = 1
+def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
+
+
+} // End UseNamedOperandTable = 1
+
def SI_INDIRECT_SRC : InstSI <
(outs VReg_32:$dst, SReg_64:$temp),
(ins unknown:$src, VSrc_32:$idx, i32imm:$off),
@@ -1302,6 +1332,7 @@ class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
let Constraints = "$src = $dst";
}
+def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VReg_32>;
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
@@ -1978,6 +2009,15 @@ def : Pat<
//============================================================================//
// Misc patterns
//============================================================================//
+def : Pat <
+ (i32 (trunc i64:$a)),
+ (EXTRACT_SUBREG $a, sub0)
+>;
+
+def : Pat <
+ (i32 (trunc i64:$a)),
+ (EXTRACT_SUBREG $a, sub0)
+>;
def : Pat <
(or i64:$a, i64:$b),
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index a6c43bbb2c5..958763dffc2 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -377,10 +377,13 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Vec = MI.getOperand(2).getReg();
unsigned Off = MI.getOperand(4).getImm();
+ unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = Vec;
- MachineInstr *MovRel =
+ MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(SubReg + Off)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
@@ -395,10 +398,13 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Off = MI.getOperand(4).getImm();
unsigned Val = MI.getOperand(5).getReg();
+ unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = Dst;
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
- .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(SubReg + Off, RegState::Define)
.addReg(Val)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
@@ -477,6 +483,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
IndirectSrc(MI);
break;
+ case AMDGPU::SI_INDIRECT_DST_V1:
case AMDGPU::SI_INDIRECT_DST_V2:
case AMDGPU::SI_INDIRECT_DST_V4:
case AMDGPU::SI_INDIRECT_DST_V8:
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index e06a02257fe..0bbad09cf15 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "SIRegisterInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "SIInstrInfo.h"
using namespace llvm;
@@ -26,6 +27,9 @@ SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm)
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::EXEC);
+ Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo());
+ TII->reserveIndirectRegisters(Reserved, MF);
return Reserved;
}
@@ -51,6 +55,10 @@ const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
}
}
+unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
+ return getEncodingValue(Reg);
+}
+
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index ba831b0f77b..8148f7fa476 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -42,6 +42,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// CFGStructurizer
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+ virtual unsigned getHWRegIndex(unsigned Reg) const;
+
/// \brief Return the 'base' register class for this register.
/// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc.
const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
diff --git a/test/CodeGen/R600/indirect-addressing.ll b/test/CodeGen/R600/private-memory.ll
index 1ef6c358921..48a013c8e54 100644
--- a/test/CodeGen/R600/indirect-addressing.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,16 +1,24 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
; This test checks that uses and defs of the AR register happen in the same
; instruction clause.
-; CHECK: @mova_same_clause
-; CHECK: MOVA_INT
-; CHECK-NOT: ALU clause
-; CHECK: 0 + AR.x
-; CHECK: MOVA_INT
-; CHECK-NOT: ALU clause
-; CHECK: 0 + AR.x
+; R600-CHECK-LABEL: @mova_same_clause
+; R600-CHECK: MOVA_INT
+; R600-CHECK-NOT: ALU clause
+; R600-CHECK: 0 + AR.x
+; R600-CHECK: MOVA_INT
+; R600-CHECK-NOT: ALU clause
+; R600-CHECK: 0 + AR.x
+; SI-CHECK-LABEL: @mova_same_clause
+; SI-CHECK: V_READFIRSTLANE
+; SI-CHECK: V_MOVRELD
+; SI-CHECK: S_CBRANCH
+; SI-CHECK: V_READFIRSTLANE
+; SI-CHECK: V_MOVRELD
+; SI-CHECK: S_CBRANCH
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [5 x i32], align 4
@@ -38,9 +46,10 @@ entry:
; XXX: This generated code has unnecessary MOVs, we should be able to optimize
; this.
-; CHECK: @multiple_structs
-; CHECK-NOT: MOVA_INT
-
+; R600-CHECK-LABEL: @multiple_structs
+; R600-CHECK-NOT: MOVA_INT
+; SI-CHECK-LABEL: @multiple_structs
+; SI-CHECK-NOT: V_MOVREL
%struct.point = type { i32, i32 }
define void @multiple_structs(i32 addrspace(1)* %out) {
@@ -68,8 +77,10 @@ entry:
; loads and stores should be lowered to copies, so there shouldn't be any
; MOVA instructions.
-; CHECK: @direct_loop
-; CHECK-NOT: MOVA_INT
+; R600-CHECK-LABLE: @direct_loop
+; R600-CHECK-NOT: MOVA_INT
+; SI-CHECK-LABEL: @direct_loop
+; SI-CHECK-NOT: V_MOVREL
define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index 3ede6a5d39d..01728f7742c 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -43,7 +43,7 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
;EG-CHECK: ASHR
;SI-CHECK-LABEL: @ashr_i64
-;SI-CHECK: V_ASHR_I64
+;SI-CHECK: S_ASHR_I64 SGPR{{[0-9]}}_SGPR{{[0-9]}}, SGPR{{[0-9]}}_SGPR{{[0-9]}}, 8
define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
entry:
%0 = sext i32 %in to i64