diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2014-11-25 00:47:33 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2014-11-25 00:47:33 +0000 |
commit | 078efa769933f1cc569b128bfad82170378d2cf8 (patch) | |
tree | 502bb132fb9f61639cbf604c1aad4614a9b3e093 | |
parent | 9a0cdc839c830314970912d9d1700b7a5b698a15 (diff) |
XXX: Load clustering fix.
-rw-r--r-- | lib/Target/R600/SIInstrInfo.cpp | 51 |
1 files changed, 41 insertions, 10 deletions
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 1a0010c03dc..07d4f869bb2 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -272,20 +272,51 @@ bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, unsigned Opc0 = FirstLdSt->getOpcode(); unsigned Opc1 = SecondLdSt->getOpcode(); - // TODO: This needs finer tuning - if (NumLoads > 4) - return false; + const MachineOperand *FirstDst = nullptr; + const MachineOperand *SecondDst = nullptr; - if (isDS(Opc0) && isDS(Opc1)) - return true; + if (isDS(Opc0) && isDS(Opc1)) { + FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdst); + SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdst); + } - if (isSMRD(Opc0) && isSMRD(Opc1)) - return true; + if (isSMRD(Opc0) && isSMRD(Opc1)) { + FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::dst); + SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::dst); + } - if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) - return true; + if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { + FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdata); + SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdata); + } - return false; + if (!FirstDst || !SecondDst) + return false; + + // TODO: This needs finer tuning + unsigned LoadClusterThreshold = 4; + + // Check if these two loads are writing to different sub-registers of the + // same super-register. + if (FirstDst->getReg() == SecondDst->getReg() && + FirstDst->getSubReg() != SecondDst->getSubReg()) { + // These loads share a super-register. We want to cluster them, becuase + // LLVM does not currently support sub-register liveness tracking, so + // if these two loads end up far apart, the live range of the super + // register could be very big. This can be especially bad when we have a + // kernel that does several large vector loads. + const MachineRegisterInfo &MRI = + FirstLdSt->getParent()->getParent()->getRegInfo(); + const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg()); + const TargetRegisterClass *SubRC = FirstDst->getSubReg() ? + RI.getSubClassWithSubReg(DstRC, FirstDst->getSubReg()) : DstRC; + + LoadClusterThreshold = + std::max(LoadClusterThreshold, DstRC->getSize() / SubRC->getSize()); + + } + + return NumLoads <= LoadClusterThreshold; } void |