summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/R600Instructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/R600Instructions.td')
-rw-r--r--lib/Target/AMDGPU/R600Instructions.td311
1 files changed, 230 insertions, 81 deletions
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 4ce256d1298..eba9e72a6c4 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -22,6 +22,7 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
bit Op3 = 0;
bit isVector = 0;
bits<2> FlagOperandIdx = 0;
+ bit HasNativeOperands = 0;
bits<11> op_code = inst;
//let Inst = inst;
@@ -39,6 +40,7 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
// instruction group
let TSFlags{6} = isVector;
let TSFlags{8-7} = FlagOperandIdx;
+ let TSFlags{9} = HasNativeOperands;
}
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -59,21 +61,163 @@ def MEMrr : Operand<iPTR> {
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
+def FRAMEri : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
+}
+
+class ALUDstBase <ValueType vt> : Operand <vt> {
+ let MIOperandInfo = (ops R600_Reg32:$gpr, InstFlag:$write_mask,
+ InstFlag:$omod, InstFlag:$rel, InstFlag:$clamp);
+}
+
+def ALUDst_I32 : ALUDstBase <i32>;
+def ALUDst_F32 : ALUDstBase <f32>;
+
+class ALUSrcOp2Base <ValueType vt, Operand immType> : Operand <vt> {
+ let MIOperandInfo = (ops R600_Reg32:$gpr, InstFlag:$neg, InstFlag:$rel,
+ InstFlag:$abs);
+}
+def ALUSrcOp2_I32 : ALUSrcOp2Base <i32, i32imm>;
+def ALUSrcOp2_F32 : ALUSrcOp2Base <f32, f32imm>;
+
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRFrame : ComplexPattern<i32, 2, "SelectADDRFrame", [], []>;
+class ALUDstPat<ValueType vt> : ComplexPattern<vt, 5, "SelectALUDst", [], []>;
+class ALUSrcOp2Pat<ValueType vt> : ComplexPattern<vt, 4,
+ "SelectALUSrcOp2", [], []>;
-class R600_ALU {
- bits<7> DST_GPR = 0;
- bits<9> SRC0_SEL = 0;
- bits<1> SRC0_NEG = 0;
- bits<9> SRC1_SEL = 0;
- bits<1> SRC1_NEG = 0;
- bits<1> CLAMP = 0;
-
+class R600ALU_Word0 {
+ field bits<64> Inst;
+
+ bits<11> src0;
+ bits<1> src0_neg;
+ bits<1> src0_rel;
+ bits<11> src1;
+ bits<1> src1_rel;
+ bits<1> src1_neg;
+ bits<3> index_mode;
+ bits<2> pred_sel;
+ bits<1> last;
+
+ let Inst{8-0} = src0{8-0};
+ let Inst{9} = src0_rel;
+ let Inst{11-10} = src0{10-9};
+ let Inst{12} = src0_neg;
+ let Inst{21-13} = src1{8-0};
+ let Inst{22} = src1_rel;
+ let Inst{24-23} = src1{10-9};
+ let Inst{25} = src1_neg;
+ let Inst{28-26} = index_mode;
+ let Inst{30-29} = pred_sel;
+ let Inst{31} = last;
+}
+
+class R600ALU_Word1_OP2 <bits<11> alu_inst> {
+ field bits<64> Inst;
+
+ bits<1> src0_abs;
+ bits<1> src1_abs;
+ bits<1> update_exec_mask;
+ bits<1> update_pred;
+ bits<1> write_mask;
+ bits<2> omod;
+ bits<3> bank_swizzle;
+ bits<9> dst;
+ bits<1> dst_rel;
+ bits<1> clamp;
+
+ let Inst{32} = src0_abs;
+ let Inst{33} = src1_abs;
+ let Inst{34} = update_exec_mask;
+ let Inst{35} = update_pred;
+ let Inst{36} = write_mask;
+ let Inst{38-37} = omod;
+ let Inst{49-39} = alu_inst;
+ let Inst{52-50} = bank_swizzle;
+ let Inst{59-53} = dst{6-0};
+ let Inst{60} = dst_rel;
+ let Inst{62-61} = dst{8-7};
+ let Inst{63} = clamp;
}
+class R600ALU_OP2 <bits<11> inst> {
+ field bits<64> Inst;
+
+ bits<11> dst;
+ bits<11> src0;
+ bits<1> src0_neg;
+ bits<1> src0_rel;
+ bits<1> src0_abs;
+ bits<11> src1;
+ bits<1> src1_neg;
+ bits<1> src1_rel;
+ bits<1> src1_abs;
+ bits<2> pred_sel;
+
+ // For now, the following fields are manually encoded in R600MCCodeEmitter:
+ bits<1> update_exec_mask = 0;
+ bits<1> update_pred = 0;
+ bits<1> write_mask = 0;
+ bits<2> omod = 0;
+ bits<3> bank_swizzle = 0;
+ bits<1> dst_rel = 0;
+ bits<1> clamp = 0;
+ bits<3> index_mode = 0;
+ bits<1> last = 0;
+
+ let Inst{8-0} = src0{8-0};
+ let Inst{9} = src0_rel;
+ let Inst{11-10} = src0{10-9};
+ let Inst{12} = src0_neg;
+ let Inst{21-13} = src1{8-0};
+ let Inst{22} = src1_rel;
+ let Inst{24-23} = src1{10-9};
+ let Inst{25} = src1_neg;
+ let Inst{28-26} = index_mode;
+ let Inst{30-29} = pred_sel;
+ let Inst{31} = last;
+
+ let Inst{32} = src0_abs;
+ let Inst{33} = src1_abs;
+ let Inst{34} = update_exec_mask;
+ let Inst{35} = update_pred;
+ let Inst{36} = write_mask;
+ let Inst{38-37} = omod;
+ let Inst{49-39} = inst;
+ let Inst{52-50} = bank_swizzle;
+ let Inst{59-53} = dst{6-0};
+ let Inst{60} = dst_rel;
+ let Inst{62-61} = dst{10-9};
+ let Inst{63} = clamp;
+// R600ALU_Word0,
+// R600ALU_Word1_OP2 <inst>;
+}
+
+/*
+class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
+
+ bits<1> fog_merge;
+ bits<10> alu_inst;
+
+ let Inst{37} = fog_merge;
+ let Inst{39-38} = omod;
+ let Inst{49-40} = alu_inst;
+}
+
+
+class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
+
+ bits<11> alu_inst;
+
+ let Inst{38-37} = omod;
+ let Inst{49-39} = alu_inst;
+}
+
+*/
+
def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
(ops PRED_SEL_OFF)>;
@@ -82,18 +226,33 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
- InstR600 <inst,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src, R600_Pred:$p, variable_ops),
- !strconcat(opName, " $dst, $src $p"),
- pattern,
- itin>{
- bits<7> dst;
- bits<9> src;
- let Inst{8-0} = src;
- let Inst{49-39} = inst;
- let Inst{59-53} = dst;
- }
+ InstR600 <0,
+ (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src0, InstFlag:$src0_neg, InstFlag:$src0_rel,
+ InstFlag:$src0_abs, R600_Pred:$pred_sel, InstFlag:$literal),
+ !strconcat(opName, " $dst, $src0 (neg$src0_neg rel$src0_rel "
+ "abs$src0_abs) $pred_sel, $literal"),
+ pattern,
+ itin>,
+ R600ALU_OP2<inst> {
+
+ let src1 = 0;
+ let src1_rel = 0;
+ let src1_neg = 0;
+ let src1_abs = 0;
+}
+
+class R600_1OP_F32 <bits<11> inst, string opName, SDPatternOperator node,
+ InstrItinClass itin = AnyALU> :
+ R600_1OP <inst, opName,
+ [(set R600_Reg32:$dst, (node ALUSrcOp2Pat<f32>:$src0))]
+>;
+
+class R600_1OP_I32 <bits<11> inst, string opName, SDPatternOperator node,
+ InstrItinClass itin = AnyALU> :
+ R600_1OP <inst, opName,
+ [(set R600_Reg32:$dst, (node ALUSrcOp2Pat<i32>:$src0))]
+>;
class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
@@ -383,30 +542,15 @@ def SNE : R600_2OP <
COND_NE))]
>;
-def FRACT : R600_1OP <
- 0x10, "FRACT",
- [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))]
->;
+def FRACT : R600_1OP_F32 <0x10, "FRACT", AMDGPUfract>;
-def TRUNC : R600_1OP <
- 0x11, "TRUNC",
- [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))]
->;
+def TRUNC : R600_1OP_F32 <0x11, "TRUNC", int_AMDGPU_trunc>;
-def CEIL : R600_1OP <
- 0x12, "CEIL",
- [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))]
->;
+def CEIL : R600_1OP_F32 <0x12, "CEIL", fceil>;
-def RNDNE : R600_1OP <
- 0x13, "RNDNE",
- [(set R600_Reg32:$dst, (frint R600_Reg32:$src))]
->;
+def RNDNE : R600_1OP_F32 <0x13, "RNDNE", frint>;
-def FLOOR : R600_1OP <
- 0x14, "FLOOR",
- [(set R600_Reg32:$dst, (ffloor R600_Reg32:$src))]
->;
+def FLOOR : R600_1OP_F32 <0x14, "FLOOR", ffloor>;
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
@@ -482,10 +626,7 @@ def XOR_INT : R600_2OP <
[(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))]
>;
-def NOT_INT : R600_1OP <
- 0x33, "NOT_INT",
- [(set R600_Reg32:$dst, (not R600_Reg32:$src))]
->;
+def NOT_INT : R600_1OP_I32 <0x33, "NOT_INT", not>;
def ADD_INT : R600_2OP <
0x34, "ADD_INT",
@@ -739,39 +880,32 @@ multiclass CUBE_Common <bits<11> inst> {
}
}
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
-class EXP_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "EXP_IEEE",
- [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))]
+class EXP_IEEE_Common <bits<11> inst> : R600_1OP_F32 <
+ inst, "EXP_IEEE", fexp2
>;
-class FLT_TO_INT_Common <bits<11> inst> : R600_1OP <
- inst, "FLT_TO_INT",
- [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))]
+class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_F32 <
+ inst, "FLT_TO_INT", fp_to_sint
>;
-class INT_TO_FLT_Common <bits<11> inst> : R600_1OP <
- inst, "INT_TO_FLT",
- [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))]
+class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_I32 <
+ inst, "INT_TO_FLT", sint_to_fp
>;
-class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP <
- inst, "FLT_TO_UINT",
- [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))]
+class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_F32 <
+ inst, "FLT_TO_UINT", fp_to_uint
>;
-class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP <
- inst, "UINT_TO_FLT",
- [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))]
+class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_I32 <
+ inst, "UINT_TO_FLT", uint_to_fp
>;
class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
- inst, "LOG_CLAMPED",
- []
+ inst, "LOG_CLAMPED", []
>;
-class LOG_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "LOG_IEEE",
- [(set R600_Reg32:$dst, (flog2 R600_Reg32:$src))]
+class LOG_IEEE_Common <bits<11> inst> : R600_1OP_F32 <
+ inst, "LOG_IEEE", flog2
>;
class LSHL_Common <bits<11> inst> : R600_2OP <
@@ -810,28 +944,23 @@ class MULLO_UINT_Common <bits<11> inst> : R600_2OP <
>;
class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_CLAMPED",
- []
+ inst, "RECIP_CLAMPED", []
>;
-class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_IEEE",
- [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))]
+class RECIP_IEEE_Common <bits<11> inst> : R600_1OP_F32 <
+ inst, "RECIP_IEEE", int_AMDGPU_rcp
>;
-class RECIP_UINT_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_INT $dst, $src",
- [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))]
+class RECIP_UINT_Common <bits<11> inst> : R600_1OP_I32 <
+ inst, "RECIP_UINT", AMDGPUurecip
>;
-class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP <
- inst, "RECIPSQRT_CLAMPED",
- [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))]
+class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_F32 <
+ inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
>;
class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "RECIPSQRT_IEEE",
- []
+ inst, "RECIPSQRT_IEEE", []
>;
class SIN_Common <bits<11> inst> : R600_1OP <
@@ -1037,11 +1166,11 @@ let Predicates = [isEGorCayman] in {
// XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
// which do not need to be truncated since the fp values are 0.0f or 1.0f.
// We should look into handling these cases separately.
- def : Pat<(fp_to_sint R600_Reg32:$src),
- (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>;
+ def : Pat<(fp_to_sint ALUSrcOp2Pat<f32>:$src0),
+ (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
- def : Pat<(fp_to_uint R600_Reg32:$src),
- (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>;
+ def : Pat<(fp_to_uint ALUSrcOp2Pat<f32>:$src0),
+ (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
def : Pat<(fsqrt R600_Reg32:$src),
(MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
@@ -1286,6 +1415,26 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
}
+//===----------------------------------------------------------------------===//
+// Frame loads and stores
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1, isPseudo = 1, isCodeGenOnly =1 in {
+
+def FrameLoad_i32 : InstR600 <0x0,
+ (outs R600_Reg32:$dst), (ins FRAMEri:$addr),
+ "FrameLoad_i32 $dst, $addr",
+ [(set (i32 R600_Reg32:$dst), (load ADDRFrame:$addr))], NullALU
+>;
+
+def FrameStore_i32 : InstR600 <0x0,
+ (outs), (ins R600_Reg32:$val, FRAMEri:$addr),
+ "FrameStore_i32 $val, $addr",
+ [(store (i32 R600_Reg32:$val), ADDRFrame:$addr)], NullALU
+>;
+
+} // End usesCustomInserter = 1, isPseudo = 1, isCodeGenOnly = 1
+
let Predicates = [isCayman] in {
let isVector = 1 in {