summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/R600/AMDILISelDAGToDAG.cpp12
-rw-r--r--lib/Target/R600/SIDefines.h97
-rw-r--r--lib/Target/R600/SIISelLowering.cpp48
-rw-r--r--lib/Target/R600/SIInstrFormats.td2
-rw-r--r--lib/Target/R600/SIInstrInfo.td23
-rw-r--r--lib/Target/R600/SIInstructions.td2
6 files changed, 155 insertions, 29 deletions
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 2e18bad467b..f6702ae9524 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -161,6 +161,18 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
switch (Opc) {
default: break;
+ case ISD::BUILD_PAIR: {
+ if (N->getValueType(0) != MVT::i128) {
+ llvm_unreachable("Unhandled value type for BUILD_PAIR");
+ }
+
+ SDValue RegClass = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
+ const SDValue Ops[] = { RegClass, N->getOperand(0), SubReg0, N->getOperand(1), SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, N->getDebugLoc(), N->getValueType(0), Ops, 5);
+ }
+
case ISD::ConstantFP:
case ISD::Constant: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
new file mode 100644
index 00000000000..88d6e934731
--- /dev/null
+++ b/lib/Target/R600/SIDefines.h
@@ -0,0 +1,97 @@
+#define R_008F0C_SQ_BUF_RSRC_WORD3 0x008F0C
+#define S_008F0C_DST_SEL_X(x) (((x) & 0x07) << 0)
+#define G_008F0C_DST_SEL_X(x) (((x) >> 0) & 0x07)
+#define C_008F0C_DST_SEL_X 0xFFFFFFF8
+#define V_008F0C_SQ_SEL_0 0x00
+#define V_008F0C_SQ_SEL_1 0x01
+#define V_008F0C_SQ_SEL_RESERVED_0 0x02
+#define V_008F0C_SQ_SEL_RESERVED_1 0x03
+#define V_008F0C_SQ_SEL_X 0x04
+#define V_008F0C_SQ_SEL_Y 0x05
+#define V_008F0C_SQ_SEL_Z 0x06
+#define V_008F0C_SQ_SEL_W 0x07
+#define S_008F0C_DST_SEL_Y(x) (((x) & 0x07) << 3)
+#define G_008F0C_DST_SEL_Y(x) (((x) >> 3) & 0x07)
+#define C_008F0C_DST_SEL_Y 0xFFFFFFC7
+#define V_008F0C_SQ_SEL_0 0x00
+#define V_008F0C_SQ_SEL_1 0x01
+#define V_008F0C_SQ_SEL_RESERVED_0 0x02
+#define V_008F0C_SQ_SEL_RESERVED_1 0x03
+#define V_008F0C_SQ_SEL_X 0x04
+#define V_008F0C_SQ_SEL_Y 0x05
+#define V_008F0C_SQ_SEL_Z 0x06
+#define V_008F0C_SQ_SEL_W 0x07
+#define S_008F0C_DST_SEL_Z(x) (((x) & 0x07) << 6)
+#define G_008F0C_DST_SEL_Z(x) (((x) >> 6) & 0x07)
+#define C_008F0C_DST_SEL_Z 0xFFFFFE3F
+#define V_008F0C_SQ_SEL_0 0x00
+#define V_008F0C_SQ_SEL_1 0x01
+#define V_008F0C_SQ_SEL_RESERVED_0 0x02
+#define V_008F0C_SQ_SEL_RESERVED_1 0x03
+#define V_008F0C_SQ_SEL_X 0x04
+#define V_008F0C_SQ_SEL_Y 0x05
+#define V_008F0C_SQ_SEL_Z 0x06
+#define V_008F0C_SQ_SEL_W 0x07
+#define S_008F0C_DST_SEL_W(x) (((x) & 0x07) << 9)
+#define G_008F0C_DST_SEL_W(x) (((x) >> 9) & 0x07)
+#define C_008F0C_DST_SEL_W 0xFFFFF1FF
+#define V_008F0C_SQ_SEL_0 0x00
+#define V_008F0C_SQ_SEL_1 0x01
+#define V_008F0C_SQ_SEL_RESERVED_0 0x02
+#define V_008F0C_SQ_SEL_RESERVED_1 0x03
+#define V_008F0C_SQ_SEL_X 0x04
+#define V_008F0C_SQ_SEL_Y 0x05
+#define V_008F0C_SQ_SEL_Z 0x06
+#define V_008F0C_SQ_SEL_W 0x07
+#define S_008F0C_NUM_FORMAT(x) (((x) & 0x07) << 12)
+#define G_008F0C_NUM_FORMAT(x) (((x) >> 12) & 0x07)
+#define C_008F0C_NUM_FORMAT 0xFFFF8FFF
+#define V_008F0C_BUF_NUM_FORMAT_UNORM 0x00
+#define V_008F0C_BUF_NUM_FORMAT_SNORM 0x01
+#define V_008F0C_BUF_NUM_FORMAT_USCALED 0x02
+#define V_008F0C_BUF_NUM_FORMAT_SSCALED 0x03
+#define V_008F0C_BUF_NUM_FORMAT_UINT 0x04
+#define V_008F0C_BUF_NUM_FORMAT_SINT 0x05
+#define V_008F0C_BUF_NUM_FORMAT_SNORM_OGL 0x06
+#define V_008F0C_BUF_NUM_FORMAT_FLOAT 0x07
+#define S_008F0C_DATA_FORMAT(x) (((x) & 0x0F) << 15)
+#define G_008F0C_DATA_FORMAT(x) (((x) >> 15) & 0x0F)
+#define C_008F0C_DATA_FORMAT 0xFFF87FFF
+#define V_008F0C_BUF_DATA_FORMAT_INVALID 0x00
+#define V_008F0C_BUF_DATA_FORMAT_8 0x01
+#define V_008F0C_BUF_DATA_FORMAT_16 0x02
+#define V_008F0C_BUF_DATA_FORMAT_8_8 0x03
+#define V_008F0C_BUF_DATA_FORMAT_32 0x04
+#define V_008F0C_BUF_DATA_FORMAT_16_16 0x05
+#define V_008F0C_BUF_DATA_FORMAT_10_11_11 0x06
+#define V_008F0C_BUF_DATA_FORMAT_11_11_10 0x07
+#define V_008F0C_BUF_DATA_FORMAT_10_10_10_2 0x08
+#define V_008F0C_BUF_DATA_FORMAT_2_10_10_10 0x09
+#define V_008F0C_BUF_DATA_FORMAT_8_8_8_8 0x0A
+#define V_008F0C_BUF_DATA_FORMAT_32_32 0x0B
+#define V_008F0C_BUF_DATA_FORMAT_16_16_16_16 0x0C
+#define V_008F0C_BUF_DATA_FORMAT_32_32_32 0x0D
+#define V_008F0C_BUF_DATA_FORMAT_32_32_32_32 0x0E
+#define V_008F0C_BUF_DATA_FORMAT_RESERVED_15 0x0F
+#define S_008F0C_ELEMENT_SIZE(x) (((x) & 0x03) << 19)
+#define G_008F0C_ELEMENT_SIZE(x) (((x) >> 19) & 0x03)
+#define C_008F0C_ELEMENT_SIZE 0xFFE7FFFF
+#define S_008F0C_INDEX_STRIDE(x) (((x) & 0x03) << 21)
+#define G_008F0C_INDEX_STRIDE(x) (((x) >> 21) & 0x03)
+#define C_008F0C_INDEX_STRIDE 0xFF9FFFFF
+#define S_008F0C_ADD_TID_ENABLE(x) (((x) & 0x1) << 23)
+#define G_008F0C_ADD_TID_ENABLE(x) (((x) >> 23) & 0x1)
+#define C_008F0C_ADD_TID_ENABLE 0xFF7FFFFF
+#define S_008F0C_HASH_ENABLE(x) (((x) & 0x1) << 25)
+#define G_008F0C_HASH_ENABLE(x) (((x) >> 25) & 0x1)
+#define C_008F0C_HASH_ENABLE 0xFDFFFFFF
+#define S_008F0C_HEAP(x) (((x) & 0x1) << 26)
+#define G_008F0C_HEAP(x) (((x) >> 26) & 0x1)
+#define C_008F0C_HEAP 0xFBFFFFFF
+#define S_008F0C_TYPE(x) (((x) & 0x03) << 30)
+#define G_008F0C_TYPE(x) (((x) >> 30) & 0x03)
+#define C_008F0C_TYPE 0x3FFFFFFF
+#define V_008F0C_SQ_RSRC_BUF 0x00
+#define V_008F0C_SQ_RSRC_BUF_RSVD_1 0x01
+#define V_008F0C_SQ_RSRC_BUF_RSVD_2 0x02
+#define V_008F0C_SQ_RSRC_BUF_RSVD_3 0x03
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 06e8b725da0..b59abc0b439 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -15,10 +15,11 @@
#include "SIISelLowering.h"
#include "AMDIL.h"
#include "AMDILIntrinsicInfo.h"
+#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "llvm/Argument.h"
+#include "llvm/IR/Argument.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -313,27 +314,42 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// The T# is composed of a 48-bit virtual address plus 80-bits of information
// about the buffer. These 80-bits of information encode things like buffer
// stride, buffer size, buffer swizzle as well as some configuration bits.
- // For simple loads and stores to memory, the only relevant part of the T# is
- // the virtual address. The rest of the bits can be set to zero.
- VirtualAddress = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, VirtualAddress);
+ // Set NumRecords to the maximum allowed value. This way we don't need to
+ // pass the number of records to he program and it should work for all
+ // buffer sizes.
+ //
+ // XXX: Is there any disadvantage to doing this?
+ //
+ uint64_t RsrcWord2 = 0xFFFFFFFF; //NUM_RECORDS
+
+ unsigned DataFormat;
+ switch (VT.getSizeInBits()) {
+ case 32: DataFormat = V_008F0C_BUF_DATA_FORMAT_32; break;
+ case 64: DataFormat = V_008F0C_BUF_DATA_FORMAT_32_32; break;
+ default: llvm_unreachable("Unhandle type size in store");
+ }
- SDValue Reg0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
- VirtualAddress, DAG.getConstant(0, MVT::i32));
- SDValue Reg1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
- VirtualAddress, DAG.getConstant(1, MVT::i32));
- SDValue Reg2 = DAG.getRegister(AMDGPU::SREG_LIT_0, MVT::i32);
- SDValue Reg3 = DAG.getRegister(AMDGPU::SREG_LIT_0, MVT::i32);
+ uint64_t RsrcWord3 =
+ // Always set the destination select values to XYZW.
+ // XXX: Is there any disadvantage to doing this?
+ S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ // NUM_FORMAT_UINT appears to work even if the storage type is float,
+ // so we'll use it for everything.
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
+ S_008F0C_DATA_FORMAT(DataFormat);
- SDValue TNum = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Reg0, Reg1,
- Reg2, Reg3);
- TNum = DAG.getNode(ISD::BITCAST, DL, MVT::i128, TNum);
+ SDValue RsrcWord23 = DAG.getConstant(RsrcWord2 | (RsrcWord3 << 32), MVT::i64);
- SDValue Vaddr = DAG.getConstant(0, MVT::i64);
+ SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
+ VirtualAddress, RsrcWord23);
SDValue Ops[2];
- Ops[0] = DAG.getNode(SIISD::BUFFER_STORE, DL, MVT::Other, Chain,
- Value, TNum, Vaddr);
+ Ops[0] = DAG.getNode(AMDGPUISD::BUFFER_STORE, DL, MVT::Other, Chain,
+ Value, SrcSrc, DAG.getConstant(0, MVT::i32));
Ops[1] = Chain;
return DAG.getMergeValues(Ops, 2, DL);
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 916435cbc21..6c5f93292cf 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -287,7 +287,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
// These 4 fields are matched based on order in some instructions. Do not
// reorder them.
bits<8> vdata;
- bits<7> srsrc;
+ bits<8> srsrc;
bits<12> offset;
bits<8> vaddr;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index ddc9964f129..fd9d989a758 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -26,7 +26,7 @@ def HI32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
}]>;
-def SIbuffer_store : SDNode<"SIISD::BUFFER_STORE",
+def SIbuffer_store : SDNode<"AMDGPUISD::BUFFER_STORE",
SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
[SDNPHasChain, SDNPMayStore]>;
@@ -57,7 +57,7 @@ def SIOperand {
int ZERO = 0x80;
}
-class GPR4Align <RegisterClass rc> : Operand <vAny> {
+class GPR4Align <RegisterClass rc, ValueType VT = vAny> : Operand <VT> {
let EncoderMethod = "GPR4AlignEncode";
let MIOperandInfo = (ops rc:$reg);
}
@@ -70,14 +70,14 @@ class GPR2Align <RegisterClass rc> : Operand <iPTR> {
def BufPtr : Operand<iPTR> {
// Changing the operand order for this instruction will break the code emitter,
// so do not reorder them.
- let MIOperandInfo = (ops SReg_128:$srsrc, i16imm:$offset);
+ let MIOperandInfo = (ops SReg_64:$srsrc, i16imm:$offset);
}
//===----------------------------------------------------------------------===//
// Complex patterns
//===----------------------------------------------------------------------===//
-def ADDR_MBUF : ComplexPattern<i128, 2, "SelectADDRMBUF", [], []>;
+def ADDR_MBUF : ComplexPattern<i64, 2, "SelectADDRMBUF", [], []>;
include "SIInstrFormats.td"
@@ -297,19 +297,20 @@ class MUBUF_LoadFormat <bits<7> op, string name, RegisterClass regClass,
// Changing the operand order for this instruction will break the code emitter,
// so do not reorder them.
class MUBUF_StoreBuffer <bits<7> op, string name, RegisterClass vdataClass> :
- MUBUF <op, (outs), (ins vdataClass:$VDATA, BufPtr:$PTR, VReg_64:$VADDR),
- name#" $VDATA, $PTR.srsrc + $VADDR + $PTR.offset",
- [(SIbuffer_store (i32 vdataClass:$VDATA), ADDR_MBUF:$PTR,
- (i64 VReg_64:$VADDR))]> {
+ MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_32:$vaddr),
+ name#" $vdata, $vaddr + $srsrc",
+ [(SIbuffer_store (i32 vdataClass:$vdata), (i128 SReg_128:$srsrc),
+ (i32 VReg_32:$vaddr))]> {
let mayLoad = 0;
let mayStore = 1;
// Encoding
- let offen = 0;
- let idxen = 1;
+ let offset = 0;
+ let offen = 1;
+ let idxen = 0;
let glc = 0;
- let addr64 = 1;
+ let addr64 = 0;
let lds = 0;
let slc = 0;
let tfe = 0;
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 0f1c7f52923..bd47bacf1bc 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -942,7 +942,7 @@ def : Pat <
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
- [(set (i64 SReg_64:$dst), (and SReg_64:$src0, SReg_64:$src1))]
+ [(set (i64 SReg_64:$dst), (and SSrc_64:$src0, SSrc_64:$src1))]
>;
def : Pat <