summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2012-09-11 15:24:32 -0400
committerTom Stellard <thomas.stellard@amd.com>2012-09-14 16:00:08 +0000
commit9e6fa84e89655092154c5fb60496039b49bf0497 (patch)
tree7e0d09b397d1f44c0cc9abdcb814cebea17bf3c5
parent818196e07a7fa9f5962151c7bce4579d7a02a055 (diff)
R600: Fix lowering of vbuildr600-review-v10
Some of the old AMDIL code was hard-coding subreg indices when creating the VBUILD node, which was making it difficult to match the vector_insert patterns.
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructions.td10
-rw-r--r--lib/Target/AMDGPU/AMDILISelLowering.cpp68
-rw-r--r--lib/Target/AMDGPU/AMDILInstrInfo.td6
-rw-r--r--lib/Target/AMDGPU/R600Instructions.td20
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td4
7 files changed, 19 insertions, 93 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 0a70164fcbf..59daf77503e 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -88,7 +88,6 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
// AMDIL DAG lowering
case ISD::SDIV: return LowerSDIV(Op, DAG);
case ISD::SREM: return LowerSREM(Op, DAG);
- case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
// AMDGPU DAG lowering
@@ -336,7 +335,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
NODE_NAME_CASE(DIV_INF);
- NODE_NAME_CASE(VBUILD);
NODE_NAME_CASE(RET_FLAG);
NODE_NAME_CASE(BRANCH_COND);
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 4c100da9690..a6d2a50d11b 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -90,7 +90,6 @@ private:
SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
@@ -105,7 +104,6 @@ enum
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
MAD, // 32bit Fused Multiply Add instruction
- VBUILD, // scalar to vector mov instruction
CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
DIV_INF, // Divide with infinity returned on zero divisor
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index 6f47445b07c..3e850ebe184 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -154,9 +154,13 @@ class Insert_Element <ValueType elem_type, ValueType vec_type,
>;
// Vector Build pattern
-class Vector_Build <ValueType vecType, RegisterClass elemClass> : Pat <
- (IL_vbuild elemClass:$src),
- (INSERT_SUBREG (vecType (IMPLICIT_DEF)), elemClass:$src, sel_x)
+class Vector_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
+ (elemType elemClass:$z), (elemType elemClass:$w))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
+ elemClass:$z, sel_z), elemClass:$w, sel_w)
>;
// bitconvert pattern
diff --git a/lib/Target/AMDGPU/AMDILISelLowering.cpp b/lib/Target/AMDGPU/AMDILISelLowering.cpp
index 680f0fc88a0..ce7a5e857b3 100644
--- a/lib/Target/AMDGPU/AMDILISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDILISelLowering.cpp
@@ -162,7 +162,6 @@ void AMDGPUTargetLowering::InitAMDILLowering()
{
MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
@@ -217,7 +216,6 @@ void AMDGPUTargetLowering::InitAMDILLowering()
setOperationAction(ISD::BRIND, MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
- setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
// Use the default implementation.
setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
@@ -352,72 +350,6 @@ AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
}
SDValue
-AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
-{
- EVT VT = Op.getValueType();
- SDValue Nodes1;
- SDValue second;
- SDValue third;
- SDValue fourth;
- DebugLoc DL = Op.getDebugLoc();
- Nodes1 = DAG.getNode(AMDGPUISD::VBUILD,
- DL,
- VT, Op.getOperand(0));
-#if 0
- bool allEqual = true;
- for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
- if (Op.getOperand(0) != Op.getOperand(x)) {
- allEqual = false;
- break;
- }
- }
- if (allEqual) {
- return Nodes1;
- }
-#endif
- switch(Op.getNumOperands()) {
- default:
- case 1:
- break;
- case 4:
- fourth = Op.getOperand(3);
- if (fourth.getOpcode() != ISD::UNDEF) {
- Nodes1 = DAG.getNode(
- ISD::INSERT_VECTOR_ELT,
- DL,
- Op.getValueType(),
- Nodes1,
- fourth,
- DAG.getConstant(7, MVT::i32));
- }
- case 3:
- third = Op.getOperand(2);
- if (third.getOpcode() != ISD::UNDEF) {
- Nodes1 = DAG.getNode(
- ISD::INSERT_VECTOR_ELT,
- DL,
- Op.getValueType(),
- Nodes1,
- third,
- DAG.getConstant(6, MVT::i32));
- }
- case 2:
- second = Op.getOperand(1);
- if (second.getOpcode() != ISD::UNDEF) {
- Nodes1 = DAG.getNode(
- ISD::INSERT_VECTOR_ELT,
- DL,
- Op.getValueType(),
- Nodes1,
- second,
- DAG.getConstant(5, MVT::i32));
- }
- break;
- };
- return Nodes1;
-}
-
-SDValue
AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
{
SDValue Data = Op.getOperand(0);
diff --git a/lib/Target/AMDGPU/AMDILInstrInfo.td b/lib/Target/AMDGPU/AMDILInstrInfo.td
index e52507b8c35..a096b6cacdb 100644
--- a/lib/Target/AMDGPU/AMDILInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDILInstrInfo.td
@@ -124,12 +124,6 @@ def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
-//===----------------------------------------------------------------------===//
-// Vector functions
-//===----------------------------------------------------------------------===//
-def IL_vbuild : SDNode<"AMDGPUISD::VBUILD", SDTIL_GenVecBuild,
- []>;
-
//===--------------------------------------------------------------------===//
// Custom Pattern DAG Nodes
//===--------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 75eb3ec01a0..9ec89960854 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -1247,23 +1247,23 @@ def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 4, sel_x>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 4, sel_x>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
-def : Vector_Build <v4f32, R600_Reg32>;
-def : Vector_Build <v4i32, R600_Reg32>;
+def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
+def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
// bitconvert patterns
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 1e166a23ece..ede52f47572 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1052,8 +1052,8 @@ def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>;
-def : Vector_Build <v4f32, VReg_32>;
-def : Vector_Build <v4i32, SReg_32>;
+def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
+def : Vector_Build <v4i32, SReg_128, i32, SReg_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;