diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-07-19 11:50:00 -0700 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2015-01-08 20:07:31 -0800 |
commit | f11db5d5151e41e445cc8965f0d370414297ba23 (patch) | |
tree | 3bc3b43b7cff8dc845d4610f4f6ebc844d2745fc | |
parent | 260163c3963afa094645f3fe3d1ff731a81a380a (diff) |
R600/SI: Define a schedule model and enable the generic machine scheduler
The schedule model is not complete yet, and could be improved.
27 files changed, 302 insertions, 83 deletions
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index a80518807eb..3b73a310e01 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -19,8 +19,7 @@ #include "SIInstrInfo.h" #include "SIISelLowering.h" #include "llvm/ADT/SmallString.h" - -#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineScheduler.h" using namespace llvm; @@ -115,3 +114,21 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const { case SEA_ISLANDS: return 12; } } + +void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const { + if (getGeneration() >= SOUTHERN_ISLANDS) { + + // Track register pressure so the scheduler can try to decrease + // pressure once register usage is above the threshold defined by + // SIRegisterInfo::getRegPressureSetLimit() + Policy.ShouldTrackPressure = true; + + // Enabling both top down and bottom up scheduling seems to give us less + // register spills than just using one of these approaches on its own. + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + } +} diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 0642b4ad5c8..ff1b8b1df68 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -203,9 +203,13 @@ public: unsigned getAmdKernelCodeChipID() const; bool enableMachineScheduler() const override { - return getGeneration() <= NORTHERN_ISLANDS; + return true; } + void overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, MachineInstr *end, + unsigned NumRegionInstrs) const override; + // Helper functions to simplify if statements bool isTargetELF() const { return false; @@ -224,6 +228,13 @@ public: bool isAmdHsaOS() const { return TargetTriple.getOS() == Triple::AMDHSA; } + unsigned getMaxWavesPerCU() const { + if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 10; + + // FIXME: Not sure what this is for other subtagets. + llvm_unreachable("do not know max waves per CU for this subtarget."); + } }; } // End namespace llvm diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index fa78ffcbde0..cff97cdb3be 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -83,34 +83,38 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : Proc<"SI", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"SI", SIFullSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"tahiti", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"tahiti", SIFullSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>; //===----------------------------------------------------------------------===// // Sea Islands //===----------------------------------------------------------------------===// -def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"bonaire", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"kabini", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureSeaIslands]>; -def : Proc<"mullins", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"tonga", SI_Itin, [FeatureVolcanicIslands]>; +//===----------------------------------------------------------------------===// +// Volcanic Islands +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"tonga", SIFullSpeedModel, [FeatureVolcanicIslands]>; -def : Proc<"iceland", SI_Itin, [FeatureVolcanicIslands]>; +def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; -def : Proc<"carrizo", SI_Itin, [FeatureVolcanicIslands]>; +def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index ff8db67a619..99a1df36c1f 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -68,6 +68,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : // Most instructions require adjustments after selection to satisfy // operand requirements. let hasPostISelHook = 1; + let SchedRW = [Write32Bit]; } class Enc32 { @@ -214,9 +215,9 @@ class SMRDe <bits<5> op, bits<1> imm> : Enc32 { let Inst{31-27} = 0x18; //encoding } +let SchedRW = [WriteSALU] in { class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> : InstSI<outs, ins, asm, pattern> { - let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -274,6 +275,8 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> : let UseNamedOperandTable = 1; } +} // let SchedRW = [WriteSALU] + class SMRD <dag outs, dag ins, string asm, list<dag> pattern> : InstSI<outs, ins, asm, pattern> { @@ -283,6 +286,7 @@ class SMRD <dag outs, dag ins, string asm, list<dag> pattern> : let mayLoad = 1; let hasSideEffects = 0; let UseNamedOperandTable = 1; + let SchedRW = [WriteSMEM]; } //===----------------------------------------------------------------------===// @@ -588,6 +592,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> : let DS = 1; let UseNamedOperandTable = 1; let DisableEncoding = "$m0"; + let SchedRW = [WriteLDS]; } class DS_si <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : @@ -602,6 +607,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> : let hasSideEffects = 0; let UseNamedOperandTable = 1; + let SchedRW = [WriteVMEM]; } class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> : @@ -613,6 +619,7 @@ class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> : let hasSideEffects = 0; let UseNamedOperandTable = 1; + let SchedRW = [WriteVMEM]; } class FLAT <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : @@ -641,5 +648,4 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : } - } // End Uses = [EXEC] diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 4941d6491b1..0fe2b79a162 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1191,6 +1191,8 @@ defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>; let Uses = [EXEC] in { +// FIXME: Specify SchedRW for READFIRSTLANE_B32 + def V_READFIRSTLANE_B32 : VOP1 < 0x00000002, (outs SReg_32:$vdst), @@ -1201,6 +1203,8 @@ def V_READFIRSTLANE_B32 : VOP1 < } +let SchedRW = [WriteQuarterRate32] in { + defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64", VOP_I32_F64, fp_to_sint >; @@ -1253,6 +1257,9 @@ defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64", defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32", VOP_F64_I32, uint_to_fp >; + +} // let SchedRW = [WriteQuarterRate32] + defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32", VOP_F32_F32, AMDGPUfract >; @@ -1271,6 +1278,9 @@ defm V_FLOOR_F32 : VOP1Inst <vop1<0x24, 0x1f>, "v_floor_f32", defm V_EXP_F32 : VOP1Inst <vop1<0x25, 0x20>, "v_exp_f32", VOP_F32_F32, fexp2 >; + +let SchedRW = [WriteQuarterRate32] in { + defm V_LOG_F32 : VOP1Inst <vop1<0x27, 0x21>, "v_log_f32", VOP_F32_F32, flog2 >; @@ -1283,18 +1293,32 @@ defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b, 0x23>, "v_rcp_iflag_f32", defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32", VOP_F32_F32, AMDGPUrsq >; + +} //let SchedRW = [WriteQuarterRate32] + +let SchedRW = [WriteDouble] in { + defm V_RCP_F64 : VOP1Inst <vop1<0x2f, 0x25>, "v_rcp_f64", VOP_F64_F64, AMDGPUrcp >; defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64", VOP_F64_F64, AMDGPUrsq >; + +} // let SchedRW = [WriteDouble]; + defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32", VOP_F32_F32, fsqrt >; + +let SchedRW = [WriteDouble] in { + defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64", VOP_F64_F64, fsqrt >; + +} // let SchedRW = [WriteDouble] + defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32", VOP_F32_F32, AMDGPUsin >; @@ -1323,6 +1347,8 @@ defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32> // These instruction only exist on SI and CI let SubtargetPredicate = isSICI in { +let SchedRW = [WriteQuarterRate32] in { + defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>; defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>; defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>; @@ -1332,17 +1358,25 @@ defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32", defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy >; + +} // End let SchedRW = [WriteQuarterRate32] + +let SchedRW = [WriteDouble] in { + defm V_RCP_CLAMP_F64 : VOP1InstSI <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>; defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamped >; +} // End SchedRW = [WriteDouble] + } // End SubtargetPredicate = isSICI //===----------------------------------------------------------------------===// // VINTRP Instructions //===----------------------------------------------------------------------===// +// FIXME: Specify SchedRW for VINTRP insturctions. defm V_INTERP_P1_F32 : VINTRP_m < 0x00000000, "v_interp_p1_f32", (outs VReg_32:$dst), @@ -1656,11 +1690,15 @@ defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32", defm V_DIV_FIXUP_F32 : VOP3Inst < vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup >; + +let SchedRW = [WriteDouble] in { + defm V_DIV_FIXUP_F64 : VOP3Inst < vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup >; -// Only on SI +} // let SchedRW = [WriteDouble] + defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32, shl >; @@ -1675,6 +1713,7 @@ defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32, sra >; +let SchedRW = [WriteDouble] in { let isCommutable = 1 in { defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64", @@ -1697,7 +1736,9 @@ defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64", VOP_F64_F64_I32, AMDGPUldexp >; -let isCommutable = 1 in { +} // let SchedRW = [WriteDouble] + +let isCommutable = 1, SchedRW = [WriteQuarterRate32] in { defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169, 0x285>, "v_mul_lo_u32", VOP_I32_I32_I32 @@ -1713,30 +1754,37 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32", VOP_I32_I32_I32 >; -} // isCommutable = 1 +} // isCommutable = 1, SchedRW = [WriteQuarterRate32] defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>; +let SchedRW = [WriteDouble] in { // Double precision division pre-scale. defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>; +} // let SchedRW = [WriteDouble] let isCommutable = 1 in { defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fmas >; +let SchedRW = [WriteDouble] in { defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fmas >; +} // End SchedRW = [WriteDouble] } // End isCommutable = 1 //def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>; //def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>; //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>; +let SchedRW = [WriteDouble] in { defm V_TRIG_PREOP_F64 : VOP3Inst < vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop >; +} // let SchedRW = [WriteDouble] + //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 9dddd1ccd69..672f316f1f1 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -51,9 +51,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - return RC->getNumRegs(); +unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { + + // FIXME: We should adjust the max number of waves based on LDS size. + unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU()); + unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU()); + + for (regclass_iterator I = regclass_begin(), E = regclass_end(); + I != E; ++I) { + + unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1); + unsigned Limit; + + if (isSGPRClass(*I)) { + Limit = SGPRLimit / NumSubRegs; + } else { + Limit = VGPRLimit / NumSubRegs; + } + + const int *Sets = getRegClassPressureSets(*I); + assert(Sets); + for (unsigned i = 0; Sets[i] != -1; ++i) { + if (Sets[i] == (int)Idx) + return Limit; + } + } + return 256; } bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { @@ -481,3 +504,29 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, return AMDGPU::NoRegister; } +unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 24; + case 9: return 28; + case 8: return 32; + case 7: return 36; + case 6: return 40; + case 5: return 48; + case 4: return 64; + case 3: return 84; + case 2: return 128; + default: return 256; + } +} + +unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return 103; + } +} diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index 7040fd0c3e9..ffd0d84c802 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -17,6 +17,7 @@ #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" +#include "llvm/Support/Debug.h" namespace llvm { @@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const override; - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const override; + unsigned getRegPressureSetLimit(unsigned Idx) const override; bool requiresRegisterScavenging(const MachineFunction &Fn) const override; @@ -113,6 +113,14 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { unsigned getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const; + /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount + /// concurrent waves. + unsigned getNumVGPRsAllowed(unsigned WaveCount) const; + + /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount + /// concurrent waves. + unsigned getNumSGPRsAllowed(unsigned WaveCount) const; + unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC) const; diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td index 28b65b82585..9b1f676020b 100644 --- a/lib/Target/R600/SISchedule.td +++ b/lib/Target/R600/SISchedule.td @@ -7,9 +7,85 @@ // //===----------------------------------------------------------------------===// // -// TODO: This is just a place holder for now. +// MachineModel definitions for Southern Islands (SI) // //===----------------------------------------------------------------------===// +def WriteBranch : SchedWrite; +def WriteExport : SchedWrite; +def WriteLDS : SchedWrite; +def WriteSALU : SchedWrite; +def WriteSMEM : SchedWrite; +def WriteVMEM : SchedWrite; -def SI_Itin : ProcessorItineraries <[], [], []>; +// Vector ALU instructions +def Write32Bit : SchedWrite; +def WriteQuarterRate32 : SchedWrite; + +def WriteFloatFMA : SchedWrite; + +def WriteDouble : SchedWrite; +def WriteDoubleAdd : SchedWrite; + +def SIFullSpeedModel : SchedMachineModel; +def SIQuarterSpeedModel : SchedMachineModel; + +// BufferSize = 0 means the processors are in-order. +let BufferSize = 0 in { + +// XXX: Are the resource counts correct? +def HWBranch : ProcResource<1>; +def HWExport : ProcResource<7>; // Taken from S_WAITCNT +def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT +def HWSALU : ProcResource<1>; +def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT +def HWVALU : ProcResource<1>; + +} + +class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, + int latency> : WriteRes<write, resources> { + let Latency = latency; +} + +class HWVALUWriteRes<SchedWrite write, int latency> : + HWWriteRes<write, [HWVALU], latency>; + + +// The latency numbers are taken from AMD Accelerated Parallel Processing +// guide. They may not be acurate. + +// The latency values are 1 / (operations / cycle) / 4. +multiclass SICommonWriteRes { + + def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ??? + def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ??? + def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64 + def : HWWriteRes<WriteSALU, [HWSALU], 1>; + def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ??? + def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600 + + def : HWVALUWriteRes<Write32Bit, 1>; + def : HWVALUWriteRes<WriteQuarterRate32, 4>; +} + + +let SchedModel = SIFullSpeedModel in { + +defm : SICommonWriteRes; + +def : HWVALUWriteRes<WriteFloatFMA, 1>; +def : HWVALUWriteRes<WriteDouble, 4>; +def : HWVALUWriteRes<WriteDoubleAdd, 2>; + +} // End SchedModel = SIFullSpeedModel + +let SchedModel = SIQuarterSpeedModel in { + +defm : SICommonWriteRes; + +def : HWVALUWriteRes<WriteFloatFMA, 16>; +def : HWVALUWriteRes<WriteDouble, 16>; +def : HWVALUWriteRes<WriteDoubleAdd, 8>; + +} // End SchedModel = SIQuarterSpeedModel diff --git a/test/CodeGen/R600/atomic_cmp_swap_local.ll b/test/CodeGen/R600/atomic_cmp_swap_local.ll index 8a5a3b64cf6..1e28fefbef5 100644 --- a/test/CodeGen/R600/atomic_cmp_swap_local.ll +++ b/test/CodeGen/R600/atomic_cmp_swap_local.ll @@ -2,9 +2,9 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset: +; SI: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 ; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] ; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0] @@ -18,11 +18,11 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs } ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd ; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7 ; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]] ; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]] +; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd ; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] ; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll index a47bc876cb9..9cf2fe94fbb 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/R600/ctpop.ll @@ -38,11 +38,11 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali } ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: -; SI: buffer_load_dword [[VAL0:v[0-9]+]], ; SI: buffer_load_dword [[VAL1:v[0-9]+]], +; SI: buffer_load_dword [[VAL0:v[0-9]+]], ; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 ; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]] -; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll index 24834af2040..efd875e9317 100644 --- a/test/CodeGen/R600/ds_read2st64.ll +++ b/test/CodeGen/R600/ds_read2st64.ll @@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add ; SI-LABEL: @simple_read2st64_f32_over_max_offset ; SI-NOT: ds_read2st64_b32 -; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { @@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a ; SI-LABEL: @simple_read2st64_f64_over_max_offset ; SI-NOT: ds_read2st64_b64 -; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll index 77cd8eae402..107abe2fbbe 100644 --- a/test/CodeGen/R600/fceil64.ll +++ b/test/CodeGen/R600/fceil64.ll @@ -11,12 +11,12 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone ; FUNC-LABEL: {{^}}fceil_f64: ; CI: v_ceil_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 ; SI: s_lshr_b64 ; SI: s_not_b64 ; SI: s_and_b64 -; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI-DAG: cmp_lt_i32 +; SI: cmp_lt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 ; SI: cmp_gt_i32 diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll index 194d0aaa181..74491b85b1e 100644 --- a/test/CodeGen/R600/ffloor.ll +++ b/test/CodeGen/R600/ffloor.ll @@ -12,12 +12,12 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone ; CI: v_floor_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 ; SI: s_lshr_b64 ; SI: s_not_b64 ; SI: s_and_b64 -; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI-DAG: cmp_lt_i32 +; SI: cmp_lt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 ; SI: cmp_gt_i32 diff --git a/test/CodeGen/R600/fmax3.ll b/test/CodeGen/R600/fmax3.ll index 6f95bf20f73..78833d7606d 100644 --- a/test/CodeGen/R600/fmax3.ll +++ b/test/CodeGen/R600/fmax3.ll @@ -3,9 +3,9 @@ declare float @llvm.maxnum.f32(float, float) nounwind readnone ; SI-LABEL: {{^}}test_fmax3_olt_0: -; SI: buffer_load_dword [[REGA:v[0-9]+]] -; SI: buffer_load_dword [[REGB:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] +; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -21,8 +21,8 @@ define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %apt ; Commute operand of second fmax ; SI-LABEL: {{^}}test_fmax3_olt_1: -; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] ; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], diff --git a/test/CodeGen/R600/fmin3.ll b/test/CodeGen/R600/fmin3.ll index aeeed1c7dd3..94631a149c0 100644 --- a/test/CodeGen/R600/fmin3.ll +++ b/test/CodeGen/R600/fmin3.ll @@ -3,9 +3,9 @@ declare float @llvm.minnum.f32(float, float) nounwind readnone ; SI-LABEL: {{^}}test_fmin3_olt_0: -; SI: buffer_load_dword [[REGA:v[0-9]+]] -; SI: buffer_load_dword [[REGB:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] +; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -21,8 +21,8 @@ define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %apt ; Commute operand of second fmin ; SI-LABEL: {{^}}test_fmin3_olt_1: -; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] ; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], diff --git a/test/CodeGen/R600/fneg-fabs.f64.ll b/test/CodeGen/R600/fneg-fabs.f64.ll index 04a87e37785..c6e8608c79d 100644 --- a/test/CodeGen/R600/fneg-fabs.f64.ll +++ b/test/CodeGen/R600/fneg-fabs.f64.ll @@ -56,8 +56,8 @@ define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) { } ; FUNC-LABEL: {{^}}fneg_fabs_f64: -; SI: s_load_dwordx2 ; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}} +; SI: s_load_dwordx2 ; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 ; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]] ; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]] diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/R600/ftrunc.f64.ll index 2c7217ef056..faf9b401b32 100644 --- a/test/CodeGen/R600/ftrunc.f64.ll +++ b/test/CodeGen/R600/ftrunc.f64.ll @@ -23,12 +23,12 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) { ; CI: v_trunc_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 ; SI: s_lshr_b64 +; SI: cmp_lt_i32 ; SI: s_not_b64 ; SI: s_and_b64 -; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI: cmp_lt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 ; SI: cmp_gt_i32 diff --git a/test/CodeGen/R600/llvm.memcpy.ll b/test/CodeGen/R600/llvm.memcpy.ll index 9771062ed0d..23d392c61d5 100644 --- a/test/CodeGen/R600/llvm.memcpy.ll +++ b/test/CodeGen/R600/llvm.memcpy.ll @@ -6,39 +6,23 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1: ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 - ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 + ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 - ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 ; SI: ds_read_u8 - ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 @@ -65,6 +49,14 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace ; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: ds_write_b8 @@ -75,6 +67,14 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace ; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: s_endpgm diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/R600/local-atomics.ll index 16d3173f369..78d747d7e72 100644 --- a/test/CodeGen/R600/local-atomics.ll +++ b/test/CodeGen/R600/local-atomics.ll @@ -4,8 +4,8 @@ ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32: ; EG: LDS_WRXCHG_RET * -; SI: s_load_dword [[SPTR:s[0-9]+]], ; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; SI: s_load_dword [[SPTR:s[0-9]+]], ; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] ; SI: buffer_store_dword [[RESULT]], @@ -30,8 +30,8 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac ; XXX - Is it really necessary to load 4 into VGPR? ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: ; EG: LDS_ADD_RET * -; SI: s_load_dword [[SPTR:s[0-9]+]], ; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; SI: s_load_dword [[SPTR:s[0-9]+]], ; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] ; SI: buffer_store_dword [[RESULT]], diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/R600/local-atomics64.ll index 0e5a94f25cd..0b808571011 100644 --- a/test/CodeGen/R600/local-atomics64.ll +++ b/test/CodeGen/R600/local-atomics64.ll @@ -29,10 +29,10 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9 ; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] ; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] ; SI: buffer_store_dwordx2 [[RESULT]], diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll index 5c77ad5e6ca..86edfde4e34 100644 --- a/test/CodeGen/R600/local-memory-two-objects.ll +++ b/test/CodeGen/R600/local-memory-two-objects.ll @@ -31,8 +31,8 @@ ; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] ; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}} ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] [M0] -; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 [M0] -; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] [M0] +; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] [M0] +; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] offset:16 [M0] define void @local_memory_two_objects(i32 addrspace(1)* %out) { entry: diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll index b2f4a9ff05e..f6dcb388248 100644 --- a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll +++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll @@ -51,8 +51,8 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac ; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4 -; CI: buffer_store_dword ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8 +; CI: buffer_store_dword define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll index a66ad0201bf..de60554a779 100644 --- a/test/CodeGen/R600/smrd.ll +++ b/test/CodeGen/R600/smrd.ll @@ -37,10 +37,12 @@ entry: ; SMRD load with a 64-bit offset ; CHECK-LABEL: {{^}}smrd3: -; CHECK-DAG: s_mov_b32 s[[SHI:[0-9]+]], 4 -; CHECK-DAG: s_mov_b32 s[[SLO:[0-9]+]], 0 ; -; FIXME: We don't need to copy these values to VGPRs -; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] +; FIXME: There are too many copies here because we don't fold immediates +; through REG_SEQUENCE +; CHECK: s_mov_b32 s[[SLO:[0-9]+]], 0 ; +; CHECK: s_mov_b32 s[[SHI:[0-9]+]], 4 +; CHECK: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]] +; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]] ; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] ; FIXME: We should be able to use s_load_dword here ; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll index a4027178431..5a3c2ec9b03 100644 --- a/test/CodeGen/R600/valu-i1.ll +++ b/test/CodeGen/R600/valu-i1.ll @@ -81,7 +81,6 @@ exit: ; SI: buffer_store_dword ; SI: v_cmp_eq_i32_e32 vcc, ; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]] -; SI: v_add_i32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} ; SI: s_andn2_b64 exec, exec, [[OR_SREG]] ; SI: s_cbranch_execnz BB2_3 @@ -124,8 +123,8 @@ exit: ; Clear exec bits for workitems that load -1s ; SI: BB3_3: -; SI: buffer_load_dword [[A:v[0-9]+]] ; SI: buffer_load_dword [[B:v[0-9]+]] +; SI: buffer_load_dword [[A:v[0-9]+]] ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], [[A]], -1 ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_1:s\[[0-9]+:[0-9]+\]]], [[B]], -1 ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll index b30f86ccd0d..c54b8dcbfaf 100644 --- a/test/CodeGen/R600/wait.ll +++ b/test/CodeGen/R600/wait.ll @@ -3,9 +3,8 @@ ; CHECK-LABEL: {{^}}main: ; CHECK: s_load_dwordx4 ; CHECK: s_load_dwordx4 -; CHECK: s_waitcnt lgkmcnt(0){{$}} -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK: s_waitcnt expcnt(0) lgkmcnt(0){{$}} +; CHECK: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; CHECK: s_endpgm define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 { main_body: %tmp = getelementptr <16 x i8> addrspace(2)* %arg3, i32 0 diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll index b444b4ec08e..f7bc8757a2b 100644 --- a/test/CodeGen/R600/xor.ll +++ b/test/CodeGen/R600/xor.ll @@ -57,8 +57,8 @@ define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float ad } ; FUNC-LABEL: {{^}}v_xor_i1: -; SI: buffer_load_ubyte [[A:v[0-9]+]] ; SI: buffer_load_ubyte [[B:v[0-9]+]] +; SI: buffer_load_ubyte [[A:v[0-9]+]] ; SI: v_xor_b32_e32 [[XOR:v[0-9]+]], [[B]], [[A]] ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]] ; SI: buffer_store_byte [[RESULT]] diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll index 7df4b48c2b9..f22b8a29254 100644 --- a/test/CodeGen/R600/zero_extend.ll +++ b/test/CodeGen/R600/zero_extend.ll @@ -29,9 +29,9 @@ entry: } ; SI-CHECK-LABEL: {{^}}zext_i1_to_i64: +; SI-CHECK: s_mov_b32 s{{[0-9]+}}, 0 ; SI-CHECK: v_cmp_eq_i32 ; SI-CHECK: v_cndmask_b32 -; SI-CHECK: s_mov_b32 s{{[0-9]+}}, 0 define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %cmp = icmp eq i32 %a, %b %ext = zext i1 %cmp to i64 |