diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-07-19 11:50:00 -0700 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2014-05-30 14:18:17 -0400 |
commit | 401e16b2908e8673951d4ac56777833cdbb50b4a (patch) | |
tree | f1c8b740ed9060304dbb89d0f93f31d1613cb83c | |
parent | fefc8fe2c8a9a02c76141c9bc8278560d9389311 (diff) |
R600/SI: Define a schedule model and enable the generic machine schedulersi-scheduler-v2
The schedule model is not complete yet, and could be improved.
-rw-r--r-- | lib/Target/R600/AMDGPUSubtarget.cpp | 9 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUSubtarget.h | 6 | ||||
-rw-r--r-- | lib/Target/R600/Processors.td | 22 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrFormats.td | 18 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 97 | ||||
-rw-r--r-- | lib/Target/R600/SISchedule.td | 74 |
6 files changed, 208 insertions, 18 deletions
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index f3b993204a5..4811fdc5ec8 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/MachineScheduler.h" using namespace llvm; @@ -110,3 +111,11 @@ std::string AMDGPUSubtarget::getDeviceName() const { return DevName; } + +void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const { + if (getGeneration() >= SOUTHERN_ISLANDS) + Policy.OnlyTopDown = true; +} diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 1b041d6bd2b..34c9550a260 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -92,9 +92,13 @@ public: bool hasCFAluBug() const; bool enableMachineScheduler() const override { - return getGeneration() <= NORTHERN_ISLANDS; + return true; } + void overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, MachineInstr *end, + unsigned NumRegionInstrs) const override; + // Helper functions to simplify if statements bool isTargetELF() const; std::string getDeviceName() const; diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index ce17d7cb7f1..e4ca37d84b5 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -83,28 +83,28 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : Proc<"SI", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"SI", SIModel, [FeatureSouthernIslands]>; -def : Proc<"tahiti", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"tahiti", SIModel, [FeatureSouthernIslands]>; -def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"pitcairn", SIModel, [FeatureSouthernIslands]>; -def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"verde", SIModel, [FeatureSouthernIslands]>; -def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"oland", SIModel, [FeatureSouthernIslands]>; -def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"hainan", SIModel, [FeatureSouthernIslands]>; //===----------------------------------------------------------------------===// // Sea Islands //===----------------------------------------------------------------------===// -def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"bonaire", SIModel, [FeatureSeaIslands]>; -def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"kabini", SIModel, [FeatureSeaIslands]>; -def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"kaveri", SIModel, [FeatureSeaIslands]>; -def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"hawaii", SIModel, [FeatureSeaIslands]>; -def : Proc<"mullins", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"mullins", SIModel, [FeatureSeaIslands]>; diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 168eff25bb2..388567e966c 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -35,6 +35,8 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : let TSFlags{7} = VOP3; let TSFlags{8} = VOPC; let TSFlags{9} = SALU; + + let SchedRW = [WriteInt]; } class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> : @@ -55,6 +57,8 @@ class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> : // Scalar operations //===----------------------------------------------------------------------===// +let SchedRW = [WriteSALU] in { + class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : Enc32<outs, ins, asm, pattern> { @@ -144,6 +148,10 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 < let SALU = 1; } +} // let SchedRW = [WriteSALU] + +let SchedRW = [WriteSMEM] in { + class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm, list<dag> pattern> : Enc32<outs, ins, asm, pattern> { @@ -162,6 +170,8 @@ class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm, let SMRD = 1; } +} // SchedRW = [WriteSMEM] + //===----------------------------------------------------------------------===// // Vector ALU operations //===----------------------------------------------------------------------===// @@ -343,6 +353,8 @@ class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : let LGKM_CNT = 1; } +let SchedRW = [WriteVMEM] in { + class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : Enc64<outs, ins, asm, pattern> { @@ -456,6 +468,10 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : let MIMG = 1; } +} // let SchedRW = [WriteVMEM] + +let SchedRW = [WriteExport] in { + def EXP : Enc64< (outs), (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, @@ -487,4 +503,6 @@ def EXP : Enc64< let EXP_CNT = 1; } +} // let SchedRW = [WriteExport] + } // End Uses = [EXEC] diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index ed4ad215028..826136ce70c 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -928,12 +928,14 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">; //def V_NOP : VOP1_ <0x00000000, "V_NOP", []>; -let neverHasSideEffects = 1, isMoveImm = 1 in { +let neverHasSideEffects = 1, isMoveImm = 1, SchedRW = [WriteFloat] in { defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; -} // End neverHasSideEffects = 1, isMoveImm = 1 +} // End neverHasSideEffects = 1, isMoveImm = 1, SchedRW = [WriteFloat] let Uses = [EXEC] in { +// FIXME: Specify SchedRW for READFIRSTLANE+B32 + def V_READFIRSTLANE_B32 : VOP1 < 0x00000002, (outs SReg_32:$vdst), @@ -944,6 +946,8 @@ def V_READFIRSTLANE_B32 : VOP1 < } +let SchedRW = [WriteConversion] in { + defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64", [(set i32:$dst, (fp_to_sint f64:$src0))] >; @@ -962,7 +966,14 @@ defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", [(set i32:$dst, (fp_to_sint f32:$src0))] >; + +} // End SchedRW = [WriteConversion] + +// FIXME Specify SchedRW for V_MOV_FED_B32 defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; + +let SchedRW = [WriteConversion] in { + ////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; //defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>; //defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>; @@ -985,6 +996,10 @@ defm V_CVT_F64_U32 : VOP1_64_32 <0x00000016, "V_CVT_F64_U32", [(set f64:$dst, (uint_to_fp i32:$src0))] >; +} // SchedRW = [WriteConversion] + +let SchedRW = [WriteFloat] in { + defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", [(set f32:$dst, (AMDGPUfract f32:$src0))] >; @@ -1000,6 +1015,11 @@ defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32", [(set f32:$dst, (ffloor f32:$src0))] >; + +} // SchedRW = [WriteFloat] + +let SchedRW = [WriteFloatTrans] in { + defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", [(set f32:$dst, (fexp2 f32:$src0))] >; @@ -1033,6 +1053,10 @@ defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", >; defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>; defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>; + +} // SchedRW = [WriteFloatTrans] + +// FIXME: Specify SchedRW for the rest of the VOP1 instructions. defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>; defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>; defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>; @@ -1053,6 +1077,8 @@ defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>; // VINTRP Instructions //===----------------------------------------------------------------------===// +// FIXME: Specify SchedRW for VINTRP insturctions. + def V_INTERP_P1_F32 : VINTRP < 0x00000000, (outs VReg_32:$dst), @@ -1087,6 +1113,8 @@ def V_INTERP_MOV_F32 : VINTRP < // VOP2 Instructions //===----------------------------------------------------------------------===// +// FIXME: Specify SchedRW for V_CNDMASK and V_*LANE_B32 + def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]", @@ -1122,6 +1150,7 @@ def V_WRITELANE_B32 : VOP2 < [] >; +let SchedRW = [WriteFloat] in { let isCommutable = 1 in { defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", [(set f32:$dst, (fadd f32:$src0, f32:$src1))] @@ -1146,6 +1175,9 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", [(set f32:$dst, (fmul f32:$src0, f32:$src1))] >; +} // SchedRW = [WriteFloat] + +let SchedRW = [WriteI24] in { defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))] @@ -1156,6 +1188,9 @@ defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", >; //defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; +} // SchedRW = [WriteI24] + +//let SchedRW = [WriteFloat] in { defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", [(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))] @@ -1167,6 +1202,11 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; + +//} // SchedRW = [WriteFloat] + +let SchedRW = [WriteInt] in { + defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>; defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", @@ -1205,8 +1245,11 @@ defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", [(set i32:$dst, (xor i32:$src0, i32:$src1))] >; +} // let SchedRW = [WriteInt] + } // End isCommutable = 1 +// FIXME: Specify SchedRW for these instructions: defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>; defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; @@ -1216,6 +1259,8 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; +let SchedRW = [WriteInt] in { + let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. @@ -1235,7 +1280,9 @@ defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32, "V_SUBB_U32">; } // End Uses = [VCC] } // End isCommutable = 1, Defs = [VCC] +} // SchedRW = [WriteInt] +// FIXME: Specify SchedRW for the rest of the VOP2 instructions. defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; @@ -1252,10 +1299,17 @@ defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", let neverHasSideEffects = 1 in { +let SchedRW = [WriteFloat] in { + defm V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>; defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] >; + +} // SchedRW = [WriteFloat] + +let SchedRW = [WriteI24] in { + defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))] >; @@ -1263,33 +1317,53 @@ defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))] >; +} // SchedRW = [WriteI24] + } // End neverHasSideEffects +// FIXME: Specify SchedRW for V_CUBE* defm V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; defm V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; defm V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in { + +let SchedRW = [WriteInt] in { + defm V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))]>; defm V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>; -} - defm V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))]>; + +} // SchedRW = [WriteInt] +} // neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 + +let SchedRW = [WriteFloatFMA] in { defm V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))] >; +} // SchedRW = [WriteFloatFMA] + +// FIXME: Specify SchedRW for V_FMA_F64 def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))] >; //def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; + +let SchedRW = [WriteInt] in { defm V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>; defm V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>; +} // SchedRW = [WriteInt] + +let SchedRW = [WriteFloat] in { + defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; + +} //SchedRW = [WriteFloat] ////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>; ////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>; ////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>; @@ -1307,6 +1381,8 @@ defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; +let SchedRW = [WriteInt] in { + def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64", [(set i64:$dst, (shl i64:$src0, i32:$src1))] >; @@ -1317,26 +1393,39 @@ def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", [(set i64:$dst, (sra i64:$src0, i32:$src1))] >; +} // SchedRW = [WriteInt] + let isCommutable = 1 in { +let SchedRW = [WriteDoubleAdd] in { def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; +} // SchedRW = [WriteDoubleAdd] + +let SchedRW = [WriteDouble] in { def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; +} //SchedRW = [WriteDouble] } // isCommutable = 1 +// FIXME: Specify SchedRW for V_LDEXP_F64 def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; let isCommutable = 1 in { +let SchedRW = [WriteIntMUL] in { + defm V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; defm V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; defm V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; +} // SchedRW = [WriteIntMUL] + } // isCommutable = 1 +// FIXME: Specify SchedRW for the rest of VOP3 instructions. defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td index 28b65b82585..174974d3066 100644 --- a/lib/Target/R600/SISchedule.td +++ b/lib/Target/R600/SISchedule.td @@ -7,9 +7,79 @@ // //===----------------------------------------------------------------------===// // -// TODO: This is just a place holder for now. +// MachineModel definitions for Southern Islands (SI) // //===----------------------------------------------------------------------===// - def SI_Itin : ProcessorItineraries <[], [], []>; + + +def WriteBranch : SchedWrite; +def WriteExport : SchedWrite; +def WriteLDS : SchedWrite; +def WriteSALU : SchedWrite; +def WriteSMEM : SchedWrite; +def WriteVMEM : SchedWrite; + +// Vector ALU instructions +def WriteDouble : SchedWrite; +def WriteDoubleAdd : SchedWrite; +def WriteFloat : SchedWrite; +def WriteFloatFMA : SchedWrite; +def WriteFloatTrans : SchedWrite; +def WriteInt : SchedWrite; +def WriteIntMUL : SchedWrite; +def WriteConversion : SchedWrite; +def WriteI24 : SchedWrite; + +def SIModel : SchedMachineModel; + + + +// BufferSize = 0 means the processors are in-order. +let BufferSize = 0 in { + +// XXX: Are the resource counts correct? +def HWBranch : ProcResource<1>; +def HWExport : ProcResource<7>; // Taken from S_WAITCNT +def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT +def HWSALU : ProcResource<1>; +def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT +def HWVALU : ProcResource<1>; + +} + +let SchedModel = SIModel in { + +class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, + int cycles> : WriteRes<write, resources> { + let ResourceCycles = [cycles]; +} + +class HWVALUWriteRes<SchedWrite write, int cycles> : + HWWriteRes<write, [HWVALU], cycles>; + +// The cycles numbers are taken from AMD Accelerated Parallel Processing +// guide. They may not be acurate. + +def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ??? +def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ??? +def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64 +def : HWWriteRes<WriteSALU, [HWSALU], 1>; +def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ??? +def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600 + +// XXX: These definitions assume full double-precision speed, some devices are +// slower. These are also taken from the AMD Accelerated Parallel Processing +// guide and may not be accurate. +def : HWVALUWriteRes<WriteDouble, 4>; +def : HWVALUWriteRes<WriteDoubleAdd, 2>; +def : HWVALUWriteRes<WriteFloat, 1>; +def : HWVALUWriteRes<WriteFloatFMA, 1>; +def : HWVALUWriteRes<WriteFloatTrans, 4>; +def : HWVALUWriteRes<WriteInt, 1>; +def : HWVALUWriteRes<WriteIntMUL, 4>; +def : HWVALUWriteRes<WriteConversion, 4>; +def : HWVALUWriteRes<WriteI24, 1>; + +} // End SchedModel = SIModel |