summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-07-19 11:50:00 -0700
committerTom Stellard <thomas.stellard@amd.com>2014-05-30 14:18:17 -0400
commit401e16b2908e8673951d4ac56777833cdbb50b4a (patch)
treef1c8b740ed9060304dbb89d0f93f31d1613cb83c
parentfefc8fe2c8a9a02c76141c9bc8278560d9389311 (diff)
R600/SI: Define a schedule model and enable the generic machine schedulersi-scheduler-v2
The schedule model is not complete yet, and could be improved.
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp9
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h6
-rw-r--r--lib/Target/R600/Processors.td22
-rw-r--r--lib/Target/R600/SIInstrFormats.td18
-rw-r--r--lib/Target/R600/SIInstructions.td97
-rw-r--r--lib/Target/R600/SISchedule.td74
6 files changed, 208 insertions, 18 deletions
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index f3b993204a5..4811fdc5ec8 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/MachineScheduler.h"
using namespace llvm;
@@ -110,3 +111,11 @@ std::string
AMDGPUSubtarget::getDeviceName() const {
return DevName;
}
+
+void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const {
+ if (getGeneration() >= SOUTHERN_ISLANDS)
+ Policy.OnlyTopDown = true;
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 1b041d6bd2b..34c9550a260 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -92,9 +92,13 @@ public:
bool hasCFAluBug() const;
bool enableMachineScheduler() const override {
- return getGeneration() <= NORTHERN_ISLANDS;
+ return true;
}
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin, MachineInstr *end,
+ unsigned NumRegionInstrs) const override;
+
// Helper functions to simplify if statements
bool isTargetELF() const;
std::string getDeviceName() const;
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index ce17d7cb7f1..e4ca37d84b5 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -83,28 +83,28 @@ def : Proc<"cayman", R600_VLIW4_Itin,
// Southern Islands
//===----------------------------------------------------------------------===//
-def : Proc<"SI", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"SI", SIModel, [FeatureSouthernIslands]>;
-def : Proc<"tahiti", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"tahiti", SIModel, [FeatureSouthernIslands]>;
-def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"pitcairn", SIModel, [FeatureSouthernIslands]>;
-def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"verde", SIModel, [FeatureSouthernIslands]>;
-def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"oland", SIModel, [FeatureSouthernIslands]>;
-def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>;
+def : ProcessorModel<"hainan", SIModel, [FeatureSouthernIslands]>;
//===----------------------------------------------------------------------===//
// Sea Islands
//===----------------------------------------------------------------------===//
-def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>;
+def : ProcessorModel<"bonaire", SIModel, [FeatureSeaIslands]>;
-def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>;
+def : ProcessorModel<"kabini", SIModel, [FeatureSeaIslands]>;
-def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>;
+def : ProcessorModel<"kaveri", SIModel, [FeatureSeaIslands]>;
-def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>;
+def : ProcessorModel<"hawaii", SIModel, [FeatureSeaIslands]>;
-def : Proc<"mullins", SI_Itin, [FeatureSeaIslands]>;
+def : ProcessorModel<"mullins", SIModel, [FeatureSeaIslands]>;
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 168eff25bb2..388567e966c 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -35,6 +35,8 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{7} = VOP3;
let TSFlags{8} = VOPC;
let TSFlags{9} = SALU;
+
+ let SchedRW = [WriteInt];
}
class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -55,6 +57,8 @@ class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
// Scalar operations
//===----------------------------------------------------------------------===//
+let SchedRW = [WriteSALU] in {
+
class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32<outs, ins, asm, pattern> {
@@ -144,6 +148,10 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
let SALU = 1;
}
+} // let SchedRW = [WriteSALU]
+
+let SchedRW = [WriteSMEM] in {
+
class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
list<dag> pattern> : Enc32<outs, ins, asm, pattern> {
@@ -162,6 +170,8 @@ class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
let SMRD = 1;
}
+} // SchedRW = [WriteSMEM]
+
//===----------------------------------------------------------------------===//
// Vector ALU operations
//===----------------------------------------------------------------------===//
@@ -343,6 +353,8 @@ class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
let LGKM_CNT = 1;
}
+let SchedRW = [WriteVMEM] in {
+
class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64<outs, ins, asm, pattern> {
@@ -456,6 +468,10 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let MIMG = 1;
}
+} // let SchedRW = [WriteVMEM]
+
+let SchedRW = [WriteExport] in {
+
def EXP : Enc64<
(outs),
(ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
@@ -487,4 +503,6 @@ def EXP : Enc64<
let EXP_CNT = 1;
}
+} // let SchedRW = [WriteExport]
+
} // End Uses = [EXEC]
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index ed4ad215028..826136ce70c 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -928,12 +928,14 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
-let neverHasSideEffects = 1, isMoveImm = 1 in {
+let neverHasSideEffects = 1, isMoveImm = 1, SchedRW = [WriteFloat] in {
defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
-} // End neverHasSideEffects = 1, isMoveImm = 1
+} // End neverHasSideEffects = 1, isMoveImm = 1, SchedRW = [WriteFloat]
let Uses = [EXEC] in {
+// FIXME: Specify SchedRW for READFIRSTLANE+B32
+
def V_READFIRSTLANE_B32 : VOP1 <
0x00000002,
(outs SReg_32:$vdst),
@@ -944,6 +946,8 @@ def V_READFIRSTLANE_B32 : VOP1 <
}
+let SchedRW = [WriteConversion] in {
+
defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
[(set i32:$dst, (fp_to_sint f64:$src0))]
>;
@@ -962,7 +966,14 @@ defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32",
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
[(set i32:$dst, (fp_to_sint f32:$src0))]
>;
+
+} // End SchedRW = [WriteConversion]
+
+// FIXME Specify SchedRW for V_MOV_FED_B32
defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
+
+let SchedRW = [WriteConversion] in {
+
////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
@@ -985,6 +996,10 @@ defm V_CVT_F64_U32 : VOP1_64_32 <0x00000016, "V_CVT_F64_U32",
[(set f64:$dst, (uint_to_fp i32:$src0))]
>;
+} // SchedRW = [WriteConversion]
+
+let SchedRW = [WriteFloat] in {
+
defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
[(set f32:$dst, (AMDGPUfract f32:$src0))]
>;
@@ -1000,6 +1015,11 @@ defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
[(set f32:$dst, (ffloor f32:$src0))]
>;
+
+} // SchedRW = [WriteFloat]
+
+let SchedRW = [WriteFloatTrans] in {
+
defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
[(set f32:$dst, (fexp2 f32:$src0))]
>;
@@ -1033,6 +1053,10 @@ defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64",
>;
defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
+
+} // SchedRW = [WriteFloatTrans]
+
+// FIXME: Specify SchedRW for the rest of the VOP1 instructions.
defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
@@ -1053,6 +1077,8 @@ defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
// VINTRP Instructions
//===----------------------------------------------------------------------===//
+// FIXME: Specify SchedRW for VINTRP insturctions.
+
def V_INTERP_P1_F32 : VINTRP <
0x00000000,
(outs VReg_32:$dst),
@@ -1087,6 +1113,8 @@ def V_INTERP_MOV_F32 : VINTRP <
// VOP2 Instructions
//===----------------------------------------------------------------------===//
+// FIXME: Specify SchedRW for V_CNDMASK and V_*LANE_B32
+
def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
"V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]",
@@ -1122,6 +1150,7 @@ def V_WRITELANE_B32 : VOP2 <
[]
>;
+let SchedRW = [WriteFloat] in {
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
[(set f32:$dst, (fadd f32:$src0, f32:$src1))]
@@ -1146,6 +1175,9 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
[(set f32:$dst, (fmul f32:$src0, f32:$src1))]
>;
+} // SchedRW = [WriteFloat]
+
+let SchedRW = [WriteI24] in {
defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24",
[(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))]
@@ -1156,6 +1188,9 @@ defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24",
>;
//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
+} // SchedRW = [WriteI24]
+
+//let SchedRW = [WriteFloat] in {
defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
[(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))]
@@ -1167,6 +1202,11 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
+
+//} // SchedRW = [WriteFloat]
+
+let SchedRW = [WriteInt] in {
+
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>;
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
@@ -1205,8 +1245,11 @@ defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
+} // let SchedRW = [WriteInt]
+
} // End isCommutable = 1
+// FIXME: Specify SchedRW for these instructions:
defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32",
[(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
@@ -1216,6 +1259,8 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
+let SchedRW = [WriteInt] in {
+
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
@@ -1235,7 +1280,9 @@ defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
"V_SUBB_U32">;
} // End Uses = [VCC]
} // End isCommutable = 1, Defs = [VCC]
+} // SchedRW = [WriteInt]
+// FIXME: Specify SchedRW for the rest of the VOP2 instructions.
defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
@@ -1252,10 +1299,17 @@ defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
let neverHasSideEffects = 1 in {
+let SchedRW = [WriteFloat] in {
+
defm V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32",
[(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
>;
+
+} // SchedRW = [WriteFloat]
+
+let SchedRW = [WriteI24] in {
+
defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24",
[(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))]
>;
@@ -1263,33 +1317,53 @@ defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24",
[(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))]
>;
+} // SchedRW = [WriteI24]
+
} // End neverHasSideEffects
+// FIXME: Specify SchedRW for V_CUBE*
defm V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
defm V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
defm V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
+
+let SchedRW = [WriteInt] in {
+
defm V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32",
[(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))]>;
defm V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32",
[(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>;
-}
-
defm V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32",
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))]>;
+
+} // SchedRW = [WriteInt]
+} // neverHasSideEffects = 1, mayLoad = 0, mayStore = 0
+
+let SchedRW = [WriteFloatFMA] in {
defm V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
[(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
>;
+} // SchedRW = [WriteFloatFMA]
+
+// FIXME: Specify SchedRW for V_FMA_F64
def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64",
[(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
>;
//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
+
+let SchedRW = [WriteInt] in {
defm V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
defm V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
+} // SchedRW = [WriteInt]
+
+let SchedRW = [WriteFloat] in {
+
defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
+
+} //SchedRW = [WriteFloat]
////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
@@ -1307,6 +1381,8 @@ defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+let SchedRW = [WriteInt] in {
+
def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64",
[(set i64:$dst, (shl i64:$src0, i32:$src1))]
>;
@@ -1317,26 +1393,39 @@ def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64",
[(set i64:$dst, (sra i64:$src0, i32:$src1))]
>;
+} // SchedRW = [WriteInt]
+
let isCommutable = 1 in {
+let SchedRW = [WriteDoubleAdd] in {
def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
+} // SchedRW = [WriteDoubleAdd]
+
+let SchedRW = [WriteDouble] in {
def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
+} //SchedRW = [WriteDouble]
} // isCommutable = 1
+// FIXME: Specify SchedRW for V_LDEXP_F64
def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
let isCommutable = 1 in {
+let SchedRW = [WriteIntMUL] in {
+
defm V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
defm V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
defm V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+} // SchedRW = [WriteIntMUL]
+
} // isCommutable = 1
+// FIXME: Specify SchedRW for the rest of VOP3 instructions.
defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td
index 28b65b82585..174974d3066 100644
--- a/lib/Target/R600/SISchedule.td
+++ b/lib/Target/R600/SISchedule.td
@@ -7,9 +7,79 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: This is just a place holder for now.
+// MachineModel definitions for Southern Islands (SI)
//
//===----------------------------------------------------------------------===//
-
def SI_Itin : ProcessorItineraries <[], [], []>;
+
+
+def WriteBranch : SchedWrite;
+def WriteExport : SchedWrite;
+def WriteLDS : SchedWrite;
+def WriteSALU : SchedWrite;
+def WriteSMEM : SchedWrite;
+def WriteVMEM : SchedWrite;
+
+// Vector ALU instructions
+def WriteDouble : SchedWrite;
+def WriteDoubleAdd : SchedWrite;
+def WriteFloat : SchedWrite;
+def WriteFloatFMA : SchedWrite;
+def WriteFloatTrans : SchedWrite;
+def WriteInt : SchedWrite;
+def WriteIntMUL : SchedWrite;
+def WriteConversion : SchedWrite;
+def WriteI24 : SchedWrite;
+
+def SIModel : SchedMachineModel;
+
+
+
+// BufferSize = 0 means the processors are in-order.
+let BufferSize = 0 in {
+
+// XXX: Are the resource counts correct?
+def HWBranch : ProcResource<1>;
+def HWExport : ProcResource<7>; // Taken from S_WAITCNT
+def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT
+def HWSALU : ProcResource<1>;
+def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT
+def HWVALU : ProcResource<1>;
+
+}
+
+let SchedModel = SIModel in {
+
+class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
+ int cycles> : WriteRes<write, resources> {
+ let ResourceCycles = [cycles];
+}
+
+class HWVALUWriteRes<SchedWrite write, int cycles> :
+ HWWriteRes<write, [HWVALU], cycles>;
+
+// The cycles numbers are taken from AMD Accelerated Parallel Processing
+// guide. They may not be acurate.
+
+def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ???
+def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ???
+def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64
+def : HWWriteRes<WriteSALU, [HWSALU], 1>;
+def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ???
+def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600
+
+// XXX: These definitions assume full double-precision speed, some devices are
+// slower. These are also taken from the AMD Accelerated Parallel Processing
+// guide and may not be accurate.
+def : HWVALUWriteRes<WriteDouble, 4>;
+def : HWVALUWriteRes<WriteDoubleAdd, 2>;
+def : HWVALUWriteRes<WriteFloat, 1>;
+def : HWVALUWriteRes<WriteFloatFMA, 1>;
+def : HWVALUWriteRes<WriteFloatTrans, 4>;
+def : HWVALUWriteRes<WriteInt, 1>;
+def : HWVALUWriteRes<WriteIntMUL, 4>;
+def : HWVALUWriteRes<WriteConversion, 4>;
+def : HWVALUWriteRes<WriteI24, 1>;
+
+} // End SchedModel = SIModel