From 6ae09192a7cab82d7569fdb3696fb61cfd6a639e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 1 Mar 2013 17:09:28 -0500 Subject: XXX: It works --- lib/Target/R600/AMDILISelDAGToDAG.cpp | 12 +++++ lib/Target/R600/SIDefines.h | 97 +++++++++++++++++++++++++++++++++++ lib/Target/R600/SIISelLowering.cpp | 48 +++++++++++------ lib/Target/R600/SIInstrFormats.td | 2 +- lib/Target/R600/SIInstrInfo.td | 23 +++++---- lib/Target/R600/SIInstructions.td | 2 +- 6 files changed, 155 insertions(+), 29 deletions(-) create mode 100644 lib/Target/R600/SIDefines.h diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 2e18bad467b..f6702ae9524 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -161,6 +161,18 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } switch (Opc) { default: break; + case ISD::BUILD_PAIR: { + if (N->getValueType(0) != MVT::i128) { + llvm_unreachable("Unhandled value type for BUILD_PAIR"); + } + + SDValue RegClass = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); + SDValue SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); + const SDValue Ops[] = { RegClass, N->getOperand(0), SubReg0, N->getOperand(1), SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, N->getDebugLoc(), N->getValueType(0), Ops, 5); + } + case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget(); diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h new file mode 100644 index 00000000000..88d6e934731 --- /dev/null +++ b/lib/Target/R600/SIDefines.h @@ -0,0 +1,97 @@ +#define R_008F0C_SQ_BUF_RSRC_WORD3 0x008F0C +#define S_008F0C_DST_SEL_X(x) (((x) & 0x07) << 0) +#define G_008F0C_DST_SEL_X(x) (((x) >> 0) & 0x07) +#define C_008F0C_DST_SEL_X 0xFFFFFFF8 +#define V_008F0C_SQ_SEL_0 0x00 +#define V_008F0C_SQ_SEL_1 0x01 +#define V_008F0C_SQ_SEL_RESERVED_0 0x02 +#define V_008F0C_SQ_SEL_RESERVED_1 0x03 +#define V_008F0C_SQ_SEL_X 0x04 +#define V_008F0C_SQ_SEL_Y 0x05 +#define V_008F0C_SQ_SEL_Z 0x06 +#define V_008F0C_SQ_SEL_W 0x07 +#define S_008F0C_DST_SEL_Y(x) (((x) & 0x07) << 3) +#define G_008F0C_DST_SEL_Y(x) (((x) >> 3) & 0x07) +#define C_008F0C_DST_SEL_Y 0xFFFFFFC7 +#define V_008F0C_SQ_SEL_0 0x00 +#define V_008F0C_SQ_SEL_1 0x01 +#define V_008F0C_SQ_SEL_RESERVED_0 0x02 +#define V_008F0C_SQ_SEL_RESERVED_1 0x03 +#define V_008F0C_SQ_SEL_X 0x04 +#define V_008F0C_SQ_SEL_Y 0x05 +#define V_008F0C_SQ_SEL_Z 0x06 +#define V_008F0C_SQ_SEL_W 0x07 +#define S_008F0C_DST_SEL_Z(x) (((x) & 0x07) << 6) +#define G_008F0C_DST_SEL_Z(x) (((x) >> 6) & 0x07) +#define C_008F0C_DST_SEL_Z 0xFFFFFE3F +#define V_008F0C_SQ_SEL_0 0x00 +#define V_008F0C_SQ_SEL_1 0x01 +#define V_008F0C_SQ_SEL_RESERVED_0 0x02 +#define V_008F0C_SQ_SEL_RESERVED_1 0x03 +#define V_008F0C_SQ_SEL_X 0x04 +#define V_008F0C_SQ_SEL_Y 0x05 +#define V_008F0C_SQ_SEL_Z 0x06 +#define V_008F0C_SQ_SEL_W 0x07 +#define S_008F0C_DST_SEL_W(x) (((x) & 0x07) << 9) +#define G_008F0C_DST_SEL_W(x) (((x) >> 9) & 0x07) +#define C_008F0C_DST_SEL_W 0xFFFFF1FF +#define V_008F0C_SQ_SEL_0 0x00 +#define V_008F0C_SQ_SEL_1 0x01 +#define V_008F0C_SQ_SEL_RESERVED_0 0x02 +#define V_008F0C_SQ_SEL_RESERVED_1 0x03 +#define V_008F0C_SQ_SEL_X 0x04 +#define V_008F0C_SQ_SEL_Y 0x05 +#define V_008F0C_SQ_SEL_Z 0x06 +#define V_008F0C_SQ_SEL_W 0x07 +#define S_008F0C_NUM_FORMAT(x) (((x) & 0x07) << 12) +#define G_008F0C_NUM_FORMAT(x) (((x) >> 12) & 0x07) +#define C_008F0C_NUM_FORMAT 0xFFFF8FFF +#define V_008F0C_BUF_NUM_FORMAT_UNORM 0x00 +#define V_008F0C_BUF_NUM_FORMAT_SNORM 0x01 +#define V_008F0C_BUF_NUM_FORMAT_USCALED 0x02 +#define V_008F0C_BUF_NUM_FORMAT_SSCALED 0x03 +#define V_008F0C_BUF_NUM_FORMAT_UINT 0x04 +#define V_008F0C_BUF_NUM_FORMAT_SINT 0x05 +#define V_008F0C_BUF_NUM_FORMAT_SNORM_OGL 0x06 +#define V_008F0C_BUF_NUM_FORMAT_FLOAT 0x07 +#define S_008F0C_DATA_FORMAT(x) (((x) & 0x0F) << 15) +#define G_008F0C_DATA_FORMAT(x) (((x) >> 15) & 0x0F) +#define C_008F0C_DATA_FORMAT 0xFFF87FFF +#define V_008F0C_BUF_DATA_FORMAT_INVALID 0x00 +#define V_008F0C_BUF_DATA_FORMAT_8 0x01 +#define V_008F0C_BUF_DATA_FORMAT_16 0x02 +#define V_008F0C_BUF_DATA_FORMAT_8_8 0x03 +#define V_008F0C_BUF_DATA_FORMAT_32 0x04 +#define V_008F0C_BUF_DATA_FORMAT_16_16 0x05 +#define V_008F0C_BUF_DATA_FORMAT_10_11_11 0x06 +#define V_008F0C_BUF_DATA_FORMAT_11_11_10 0x07 +#define V_008F0C_BUF_DATA_FORMAT_10_10_10_2 0x08 +#define V_008F0C_BUF_DATA_FORMAT_2_10_10_10 0x09 +#define V_008F0C_BUF_DATA_FORMAT_8_8_8_8 0x0A +#define V_008F0C_BUF_DATA_FORMAT_32_32 0x0B +#define V_008F0C_BUF_DATA_FORMAT_16_16_16_16 0x0C +#define V_008F0C_BUF_DATA_FORMAT_32_32_32 0x0D +#define V_008F0C_BUF_DATA_FORMAT_32_32_32_32 0x0E +#define V_008F0C_BUF_DATA_FORMAT_RESERVED_15 0x0F +#define S_008F0C_ELEMENT_SIZE(x) (((x) & 0x03) << 19) +#define G_008F0C_ELEMENT_SIZE(x) (((x) >> 19) & 0x03) +#define C_008F0C_ELEMENT_SIZE 0xFFE7FFFF +#define S_008F0C_INDEX_STRIDE(x) (((x) & 0x03) << 21) +#define G_008F0C_INDEX_STRIDE(x) (((x) >> 21) & 0x03) +#define C_008F0C_INDEX_STRIDE 0xFF9FFFFF +#define S_008F0C_ADD_TID_ENABLE(x) (((x) & 0x1) << 23) +#define G_008F0C_ADD_TID_ENABLE(x) (((x) >> 23) & 0x1) +#define C_008F0C_ADD_TID_ENABLE 0xFF7FFFFF +#define S_008F0C_HASH_ENABLE(x) (((x) & 0x1) << 25) +#define G_008F0C_HASH_ENABLE(x) (((x) >> 25) & 0x1) +#define C_008F0C_HASH_ENABLE 0xFDFFFFFF +#define S_008F0C_HEAP(x) (((x) & 0x1) << 26) +#define G_008F0C_HEAP(x) (((x) >> 26) & 0x1) +#define C_008F0C_HEAP 0xFBFFFFFF +#define S_008F0C_TYPE(x) (((x) & 0x03) << 30) +#define G_008F0C_TYPE(x) (((x) >> 30) & 0x03) +#define C_008F0C_TYPE 0x3FFFFFFF +#define V_008F0C_SQ_RSRC_BUF 0x00 +#define V_008F0C_SQ_RSRC_BUF_RSVD_1 0x01 +#define V_008F0C_SQ_RSRC_BUF_RSVD_2 0x02 +#define V_008F0C_SQ_RSRC_BUF_RSVD_3 0x03 diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 06e8b725da0..b59abc0b439 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -15,10 +15,11 @@ #include "SIISelLowering.h" #include "AMDIL.h" #include "AMDILIntrinsicInfo.h" +#include "SIDefines.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "llvm/Argument.h" +#include "llvm/IR/Argument.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -313,27 +314,42 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // The T# is composed of a 48-bit virtual address plus 80-bits of information // about the buffer. These 80-bits of information encode things like buffer // stride, buffer size, buffer swizzle as well as some configuration bits. - // For simple loads and stores to memory, the only relevant part of the T# is - // the virtual address. The rest of the bits can be set to zero. - VirtualAddress = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, VirtualAddress); + // Set NumRecords to the maximum allowed value. This way we don't need to + // pass the number of records to he program and it should work for all + // buffer sizes. + // + // XXX: Is there any disadvantage to doing this? + // + uint64_t RsrcWord2 = 0xFFFFFFFF; //NUM_RECORDS + + unsigned DataFormat; + switch (VT.getSizeInBits()) { + case 32: DataFormat = V_008F0C_BUF_DATA_FORMAT_32; break; + case 64: DataFormat = V_008F0C_BUF_DATA_FORMAT_32_32; break; + default: llvm_unreachable("Unhandle type size in store"); + } - SDValue Reg0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, - VirtualAddress, DAG.getConstant(0, MVT::i32)); - SDValue Reg1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, - VirtualAddress, DAG.getConstant(1, MVT::i32)); - SDValue Reg2 = DAG.getRegister(AMDGPU::SREG_LIT_0, MVT::i32); - SDValue Reg3 = DAG.getRegister(AMDGPU::SREG_LIT_0, MVT::i32); + uint64_t RsrcWord3 = + // Always set the destination select values to XYZW. + // XXX: Is there any disadvantage to doing this? + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + // NUM_FORMAT_UINT appears to work even if the storage type is float, + // so we'll use it for everything. + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | + S_008F0C_DATA_FORMAT(DataFormat); - SDValue TNum = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Reg0, Reg1, - Reg2, Reg3); - TNum = DAG.getNode(ISD::BITCAST, DL, MVT::i128, TNum); + SDValue RsrcWord23 = DAG.getConstant(RsrcWord2 | (RsrcWord3 << 32), MVT::i64); - SDValue Vaddr = DAG.getConstant(0, MVT::i64); + SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, + VirtualAddress, RsrcWord23); SDValue Ops[2]; - Ops[0] = DAG.getNode(SIISD::BUFFER_STORE, DL, MVT::Other, Chain, - Value, TNum, Vaddr); + Ops[0] = DAG.getNode(AMDGPUISD::BUFFER_STORE, DL, MVT::Other, Chain, + Value, SrcSrc, DAG.getConstant(0, MVT::i32)); Ops[1] = Chain; return DAG.getMergeValues(Ops, 2, DL); diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 916435cbc21..6c5f93292cf 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -287,7 +287,7 @@ class MUBUF op, dag outs, dag ins, string asm, list pattern> : // These 4 fields are matched based on order in some instructions. Do not // reorder them. bits<8> vdata; - bits<7> srsrc; + bits<8> srsrc; bits<12> offset; bits<8> vaddr; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ddc9964f129..fd9d989a758 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -26,7 +26,7 @@ def HI32 : SDNodeXFormgetTargetConstant(N->getZExtValue() >> 32, MVT::i32); }]>; -def SIbuffer_store : SDNode<"SIISD::BUFFER_STORE", +def SIbuffer_store : SDNode<"AMDGPUISD::BUFFER_STORE", SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, [SDNPHasChain, SDNPMayStore]>; @@ -57,7 +57,7 @@ def SIOperand { int ZERO = 0x80; } -class GPR4Align : Operand { +class GPR4Align : Operand { let EncoderMethod = "GPR4AlignEncode"; let MIOperandInfo = (ops rc:$reg); } @@ -70,14 +70,14 @@ class GPR2Align : Operand { def BufPtr : Operand { // Changing the operand order for this instruction will break the code emitter, // so do not reorder them. - let MIOperandInfo = (ops SReg_128:$srsrc, i16imm:$offset); + let MIOperandInfo = (ops SReg_64:$srsrc, i16imm:$offset); } //===----------------------------------------------------------------------===// // Complex patterns //===----------------------------------------------------------------------===// -def ADDR_MBUF : ComplexPattern; +def ADDR_MBUF : ComplexPattern; include "SIInstrFormats.td" @@ -297,19 +297,20 @@ class MUBUF_LoadFormat op, string name, RegisterClass regClass, // Changing the operand order for this instruction will break the code emitter, // so do not reorder them. class MUBUF_StoreBuffer op, string name, RegisterClass vdataClass> : - MUBUF { + MUBUF { let mayLoad = 0; let mayStore = 1; // Encoding - let offen = 0; - let idxen = 1; + let offset = 0; + let offen = 1; + let idxen = 0; let glc = 0; - let addr64 = 1; + let addr64 = 0; let lds = 0; let slc = 0; let tfe = 0; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 0f1c7f52923..bd47bacf1bc 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -942,7 +942,7 @@ def : Pat < def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", - [(set (i64 SReg_64:$dst), (and SReg_64:$src0, SReg_64:$src1))] + [(set (i64 SReg_64:$dst), (and SSrc_64:$src0, SSrc_64:$src1))] >; def : Pat < -- cgit v1.2.3