From b3427e7518fa0b3441a6c05c5969c666460fda10 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 18 Nov 2014 11:45:28 -0500 Subject: R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. --- lib/Target/R600/AMDGPU.h | 4 + lib/Target/R600/AMDGPUTargetMachine.cpp | 1 + lib/Target/R600/CMakeLists.txt | 1 + lib/Target/R600/SIFoldOperands.cpp | 204 ++++++++++++++++++++++++++++++++ test/CodeGen/R600/operand-folding.ll | 40 +++++++ 5 files changed, 250 insertions(+) create mode 100644 lib/Target/R600/SIFoldOperands.cpp create mode 100644 test/CodeGen/R600/operand-folding.ll diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 261075e1e95..13379e7e255 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -38,6 +38,7 @@ FunctionPass *createAMDGPUCFGStructurizerPass(); // SI Passes FunctionPass *createSITypeRewriter(); FunctionPass *createSIAnnotateControlFlowPass(); +FunctionPass *createSIFoldOperandsPass(); FunctionPass *createSILowerI1CopiesPass(); FunctionPass *createSIShrinkInstructionsPass(); FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm); @@ -47,6 +48,9 @@ FunctionPass *createSIFixSGPRLiveRangesPass(); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSIInsertWaits(TargetMachine &tm); +void initializeSIFoldOperandsPass(PassRegistry &); +extern char &SIFoldOperandsID; + void initializeSILowerI1CopiesPass(PassRegistry &); extern char &SILowerI1CopiesID; diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 6f8ce1ad47b..b560a1b1800 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -152,6 +152,7 @@ AMDGPUPassConfig::addPreISel() { bool AMDGPUPassConfig::addInstSelector() { addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); addPass(createSILowerI1CopiesPass()); + // addPass(createSIFoldOperandsPass()); return false; } diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index ed0a21684dc..3b703e72943 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_target(R600CodeGen SIAnnotateControlFlow.cpp SIFixSGPRCopies.cpp SIFixSGPRLiveRanges.cpp + SIFoldOperands.cpp SIInsertWaits.cpp SIInstrInfo.cpp SIISelLowering.cpp diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp new file mode 100644 index 00000000000..517f49d47ae --- /dev/null +++ b/lib/Target/R600/SIFoldOperands.cpp @@ -0,0 +1,204 @@ +//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// +// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "si-fold-operands" +using namespace llvm; + +namespace { + +class SIFoldOperands : public MachineFunctionPass { +public: + static char ID; + +public: + SIFoldOperands() : MachineFunctionPass(ID) { + initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "SI Fold Operands"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE, + "SI Fold Operands", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE, + "SI Fold Operands", false, false) + +char SIFoldOperands::ID = 0; + +char &llvm::SIFoldOperandsID = SIFoldOperands::ID; + +FunctionPass *llvm::createSIFoldOperandsPass() { + return new SIFoldOperands(); +} + +static bool isSafeToFold(unsigned Opcode) { + switch(Opcode) { + case AMDGPU::V_MOV_B32_e32: + case AMDGPU::V_MOV_B32_e64: + case AMDGPU::S_MOV_B32: + case AMDGPU::S_MOV_B64: + case AMDGPU::COPY: + return true; + default: + return false; + } +} + +static bool updateOperand(MachineInstr *MI, unsigned OpNo, + const MachineOperand &New, + const TargetRegisterInfo &TRI) { + MachineOperand &Old = MI->getOperand(OpNo); + assert(Old.isReg()); + + if (New.isImm()) { + Old.ChangeToImmediate(New.getImm()); + return true; + } + + if (New.isFPImm()) { + Old.ChangeToFPImmediate(New.getFPImm()); + return true; + } + + if (New.isReg()) { + if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && + TargetRegisterInfo::isVirtualRegister(New.getReg())) { + Old.substVirtReg(New.getReg(), New.getSubReg(), TRI); + return true; + } + } + + // FIXME: Handle physical registers. + + return false; +} + +bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const SIInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + std::vector I1Defs; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + MachineBasicBlock::iterator I, Next; + for (I = MBB.begin(); I != MBB.end(); I = Next) { + Next = std::next(I); + MachineInstr &MI = *I; + + MI.dump(); + if (!isSafeToFold(MI.getOpcode())) + continue; + + const MachineOperand &OpToFold = MI.getOperand(1); + + // FIXME: Fold operands with subregs. + if (OpToFold.isReg() && + (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || + OpToFold.getSubReg())) + continue; + + std::vector> FoldList; + for (MachineRegisterInfo::use_iterator + Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); + Use != E; ++Use) { + + MachineInstr *UseMI = Use->getParent(); + + // FIXME: Fold operands with subregs. + if (UseMI->getOperand(Use.getOperandNo()).isReg() && + UseMI->getOperand(Use.getOperandNo()).getSubReg()) { + continue; + } + + const MCInstrDesc &UseDesc = UseMI->getDesc(); + + // Don't fold into target independent nodes. Target independent opcodes + // don't have defined register classes. + if (UseDesc.isVariadic() || + UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1) + continue; + + // In order to fold immediates into copies, we need to change the + // copy to a MOV. + if ((OpToFold.isImm() || OpToFold.isFPImm()) && + UseMI->getOpcode() == AMDGPU::COPY) { + const TargetRegisterClass *TRC = + MRI.getRegClass(UseMI->getOperand(0).getReg()); + unsigned CopyOp; + if (TRI->getCommonSubClass(TRC, &AMDGPU::VReg_32RegClass)) { + CopyOp = AMDGPU::V_MOV_B32_e64; + } else if (TRI->getCommonSubClass(TRC, &AMDGPU::SReg_32RegClass)) { + CopyOp = AMDGPU::S_MOV_B32; + } else if (TRI->getCommonSubClass(TRC, &AMDGPU::SReg_64RegClass)) { + CopyOp = AMDGPU::S_MOV_B64; + } else { + continue; + } + UseMI->setDesc(TII->get(CopyOp)); + } + + if (UseMI->getOpcode() == AMDGPU::COPY) + continue; + + // Normal substitution + if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) { + FoldList.push_back(std::make_pair(UseMI, Use.getOperandNo())); + continue; + } + + // FIXME: We could commute the instruction to create more opportunites + // for folding. This will only be useful if we have 32-bit instructions. + + // FIXME: We could try to change the instruction from 64-bit to 32-bit + // to enable more folding opportunites. The shrink operands pass + // already does this. + } + + for (std::pair Fold : FoldList) { + if (updateOperand(Fold.first, Fold.second, OpToFold, *TRI)) { + DEBUG(dbgs() << "Folded OpNo " << Fold.second << " from " << + MI << " into " << Fold.first << "\n"); + } + } + } + } + return false; +} diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll new file mode 100644 index 00000000000..05177b475b1 --- /dev/null +++ b/test/CodeGen/R600/operand-folding.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s + +; CHECK-LABEL: {{^}}fold_sgpr: +; CHECK: v_add_i32_e32 v{{[0-9]+}}, s +define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) { +entry: + %tmp0 = icmp ne i32 %fold, 0 + br i1 %tmp0, label %if, label %endif + +if: + %id = call i32 @llvm.r600.read.tidig.x() + %offset = add i32 %fold, %id + %tmp1 = getelementptr i32 addrspace(1)* %out, i32 %offset + store i32 0, i32 addrspace(1)* %tmp1 + br label %endif + +endif: + ret void +} + +; CHECK-LABEL: {{^}}fold_imm: +; CHECK v_or_i32_e32 v{{[0-9]+}}, 5 +define void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) { +entry: + %fold = add i32 3, 2 + %tmp0 = icmp ne i32 %cmp, 0 + br i1 %tmp0, label %if, label %endif + +if: + %id = call i32 @llvm.r600.read.tidig.x() + %val = or i32 %id, %fold + store i32 %val, i32 addrspace(1)* %out + br label %endif + +endif: + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #0 +attributes #0 = { readnone } -- cgit v1.2.3