summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2012-10-23 15:46:11 +0000
committerTom Stellard <thomas.stellard@amd.com>2012-10-23 15:46:11 +0000
commitdd52f021d275f653857542e63297fbafa2ff60bc (patch)
treedf38ef80fdbc56a392ab15238cbfefb358afa975
parente6dba68ec1a6a3ea452809787d25ec21569544e2 (diff)
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h2
-rw-r--r--lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp21
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp5
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp127
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.h2
-rw-r--r--lib/Target/AMDGPU/R600Instructions.td59
-rw-r--r--lib/Target/AMDGPU/R600RegisterInfo.td18
8 files changed, 228 insertions, 8 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8021fc473d5..6cf2f9df492 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -346,5 +346,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(INTERP)
NODE_NAME_CASE(INTERP_P0)
+ NODE_NAME_CASE(REGISTER_LOAD)
+ NODE_NAME_CASE(REGISTER_STORE)
}
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 2d8ed82c117..0ea9cd50208 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -121,6 +121,8 @@ enum
URECIP,
INTERP,
INTERP_P0,
+ REGISTER_LOAD,
+ REGISTER_STORE,
LAST_AMDGPU_ISD_NUMBER
};
diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
index 807113134d2..14a33551a32 100644
--- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
@@ -70,6 +70,7 @@ private:
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
@@ -97,6 +98,8 @@ SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
+
+
bool AMDGPUDAGToDAGISel::SelectADDRParam(
SDValue Addr, SDValue& R1, SDValue& R2) {
@@ -164,7 +167,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int FI = FIN->getIndex();
EVT OpVT = N->getValueType(0);
unsigned int NewOpc = AMDGPU::COPY;
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ SDValue TFI = CurDAG->getRegister(AMDGPU::T0_X, MVT::i32);
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
}
}
@@ -393,3 +396,19 @@ bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
return true;
}
+
+bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
+ Base = CurDAG->getRegister(AMDGPU::SP, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ } else if (Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) {
+ Base = Addr.getOperand(1);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ } else {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+
+ return true;
+}
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index fe2032ea0ef..e96552f7fe9 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -91,10 +91,7 @@ void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.getImm() != 0) {
- O << " + " + Op.getImm();
- }
+ printIfSet(MI, OpNo, O, "+");
}
void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 38d68f4d9f8..fb59d1b4d8b 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -67,6 +67,19 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ // Legalize loads and stores to the private address space.
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
+ setOperationAction(ISD::STORE, MVT::i8, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
setTargetDAGCombine(ISD::FP_ROUND);
setSchedulingPreference(Sched::VLIW);
@@ -110,7 +123,39 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
TII->addFlag(NewMI, 0, MO_FLAG_NEG);
break;
}
-
+ case AMDGPU::RegisterLoad_i32:
+ case AMDGPU::RegisterLoad_f32:
+ {
+ unsigned OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(MI->getOperand(2).getImm() + MI->getOperand(3).getImm());
+ unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::R600_TReg32RegClass);
+ MachineInstr *LSHR = TII->buildDefaultInstruction(*BB, I, AMDGPU::LSHR_eg, Tmp, MI->getOperand(1).getReg(), AMDGPU::ALU_LITERAL_X);
+ LSHR->getOperand(TII->getOperandIdx(*LSHR, R600Operands::IMM)).setImm(2);
+ MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X,
+ Tmp);
+ MOVA->getOperand(TII->getOperandIdx(*MOVA, R600Operands::WRITE)).setImm(0);
+ MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, MI->getOperand(0).getReg(),
+ OffsetReg);
+ MachineInstr *NewMI = MIBuilder.addReg(AMDGPU::AR_X, RegState::Implicit);
+
+ NewMI->getOperand(TII->getOperandIdx(*NewMI, R600Operands::SRC0_REL)).setImm(1);
+ break;
+ }
+ case AMDGPU::RegisterStore_i32:
+ case AMDGPU::RegisterStore_f32:
+ {
+ R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
+ if (MI->getOperand(1).getReg() == AMDGPU::SP) {
+ int64_t StackIndex = MI->getOperand(2).getImm() +
+ MI->getOperand(3).getImm();
+ unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(StackIndex);
+ MFI->ReservedRegs.push_back(DstReg);
+ TII->buildDefaultInstruction(*BB, I, AMDGPU::StackMOV, DstReg,
+ MI->getOperand(0).getReg());
+ } else {
+ assert(!"Relative dst not supported.");
+ }
+ break;
+ }
case AMDGPU::R600_LOAD_CONST:
{
int64_t RegIndex = MI->getOperand(1).getImm();
@@ -300,7 +345,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::FPOW: return LowerFPOW(Op, DAG);
+ case ISD::FrameIndex: return DAG.getConstant(0, MVT::i32);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID =
@@ -697,6 +745,83 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
return Cond;
}
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+
+ if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return SDValue();
+ }
+
+ if (VT.isVector()) {
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Loads[4];
+
+ for (unsigned i = 0; i < 4; ++i) {
+ Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
+ Chain, Ptr,
+ DAG.getTargetConstant(i, MVT::i32)); // Channel
+ }
+ LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4);
+ } else {
+ LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
+ Chain, Ptr,
+ DAG.getTargetConstant(0, MVT::i32)); // Channel
+ }
+
+ SDValue Ops[2];
+ Ops[0] = LoweredLoad;
+ Ops[1] = Chain;
+
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Value = Op.getOperand(1);
+ SDValue Ptr = Op.getOperand(2);
+ EVT VT = Value.getValueType();
+
+ if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return SDValue();
+ }
+
+ if (VT.isVector()) {
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Stores[4];
+
+ // XXX: I'm not sure how to explain this.
+ Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(2, MVT::i32));
+
+ for (unsigned i = 0; i < 4; ++i) {
+ SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
+ Value, DAG.getConstant(i, MVT::i32));
+
+ Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+ Chain, Elem, Ptr,
+ DAG.getTargetConstant(i, MVT::i32)); // Channel
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, 4);
+ } else {
+ if (VT == MVT::i8) {
+ Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
+ }
+ Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
+ DAG.getTargetConstant(0, MVT::i32)); // Channel
+ }
+
+ return Chain;
+}
+
SDValue R600TargetLowering::LowerFPOW(SDValue Op,
SelectionDAG &DAG) const
{
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index d1dfe9fe45b..a2d7934287d 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -60,6 +60,8 @@ private:
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 472538ee65a..b26d363cae7 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -86,9 +86,14 @@ def UP : InstFlag <"printUpdatePred">;
// default to 0.
def LAST : InstFlag<"printLast", 1>;
+def FRAMEri : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
+}
+
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
class R600ALU_Word0 {
field bits<32> Word0;
@@ -417,9 +422,17 @@ def isR600toCayman : Predicate<
"Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
//===----------------------------------------------------------------------===//
-// Interpolation Instructions
+// R600 SDNodes
//===----------------------------------------------------------------------===//
+def REGISTER_LOAD : SDNode<"AMDGPUISD::REGISTER_LOAD",
+ SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+def REGISTER_STORE : SDNode<"AMDGPUISD::REGISTER_STORE",
+ SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ [SDNPHasChain, SDNPMayStore]>;
+
def INTERP: SDNode<"AMDGPUISD::INTERP",
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
>;
@@ -428,6 +441,10 @@ def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
>;
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+
let usesCustomInserter = 1 in {
def input_perspective : AMDGPUShaderInst <
(outs R600_Reg128:$dst),
@@ -994,6 +1011,10 @@ let Predicates = [isEGorCayman] in {
defm DOT4_eg : DOT4_Common<0xBE>;
defm CUBE_eg : CUBE_Common<0xC0>;
+let hasSideEffects = 1 in {
+ def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
+}
+
def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
@@ -1264,6 +1285,42 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
}
+//===----------------------------------------------------------------------===//
+// Frame loads and stores
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1, isPseudo = 1, isCodeGenOnly =1 in {
+
+class RegisterLoad <ValueType vt> : InstR600 <0x0,
+ (outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan),
+ "RegisterLoad $dst, $addr",
+ [(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr,
+ (i32 timm:$chan)))],
+ NullALU
+>;
+
+class RegisterStore <ValueType vt> : InstR600 <0x0,
+ (outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan),
+ "RegisterStore_i32 $val, $addr",
+ [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 timm:$chan))],
+ NullALU
+>;
+
+
+} // End usesCustomInserter = 1, isPseudo = 1, isCodeGenOnly = 1
+
+def RegisterLoad_i32 : RegisterLoad<i32>;
+def RegisterLoad_f32 : RegisterLoad<f32>;
+
+def RegisterStore_i32 : RegisterStore<i32>;
+def RegisterStore_f32 : RegisterStore<f32>;
+
+let hasSideEffects = 1 in {
+
+def StackMOV : R600_1OP <0x19, "MOV", []>;
+
+} // End hasSideEffects = 1
+
let Predicates = [isCayman] in {
let isVector = 1 in {
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td
index d3d6d25d292..3b218253659 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.td
+++ b/lib/Target/AMDGPU/R600RegisterInfo.td
@@ -31,6 +31,10 @@ foreach Index = 0-127 in {
// 32-bit Constant Registers (There are more than 128, this the number
// that is currently supported.
def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
+
+ // Indirect addressing offset registers
+ def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
+ Index, Chan>;
}
// 128-bit Temporary Registers
def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
@@ -46,7 +50,6 @@ foreach Index = 448-464 in {
def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
}
-
// Special Registers
def ZERO : R600Reg<"0.0", 248>;
@@ -61,10 +64,22 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
+def AR_X : R600Reg<"AR.x", 0>;
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "ArrayBase%u", 448, 464))>;
+let isAllocatable = 0 in {
+
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 127,
+ (add (interleave
+ (interleave (sequence "Addr%u_X", 0, 127),
+ (sequence "Addr%u_Z", 0, 127)),
+ (interleave (sequence "Addr%u_Y", 0, 127),
+ (sequence "Addr%u_W", 0, 127))))>;
+
+} // End isAllocatable = 0
+
def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (interleave
(interleave (sequence "C%u_X", 0, 127),
@@ -93,6 +108,7 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
R600_TReg32,
R600_CReg32,
R600_ArrayBase,
+ R600_Addr,
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add