diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2012-04-25 13:47:38 -0400 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2012-04-25 14:29:52 -0400 |
commit | 42272b8346de16d56a1426cee45704c5e4079217 (patch) | |
tree | 67213e243b619744467a0b2dd56895b3339768d0 | |
parent | b856d555b0336234cb4d5e2d77c3d506efa17550 (diff) |
AMDIL: Add core backend files for R600/SI codegen
70 files changed, 13364 insertions, 0 deletions
diff --git a/lib/Target/AMDIL/AMDGPU.h b/lib/Target/AMDIL/AMDGPU.h new file mode 100644 index 00000000000..eff002a5eae --- /dev/null +++ b/lib/Target/AMDIL/AMDGPU.h @@ -0,0 +1,47 @@ +//===-- AMDGPU.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_H +#define AMDGPU_H + +#include "AMDGPUTargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class FunctionPass; + class AMDGPUTargetMachine; + + FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS); + FunctionPass *createR600LowerShaderInstructionsPass(TargetMachine &tm); + FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm); + + FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); + FunctionPass *createSIInitMachineFunctionInfoPass(TargetMachine &tm); + FunctionPass *createSILowerShaderInstructionsPass(TargetMachine &tm); + FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm); + FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); + + FunctionPass *createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm); + + FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm); + FunctionPass *createAMDGPULowerShaderInstructionsPass(TargetMachine &tm); + + FunctionPass *createAMDGPUDelimitInstGroupsPass(TargetMachine &tm); + + FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); + + FunctionPass *createAMDGPUFixRegClassesPass(TargetMachine &tm); + +} /* End namespace llvm */ +#endif /* AMDGPU_H */ diff --git a/lib/Target/AMDIL/AMDGPUISelLowering.cpp b/lib/Target/AMDIL/AMDGPUISelLowering.cpp new file mode 100644 index 00000000000..2c1052fd8ea --- /dev/null +++ b/lib/Target/AMDIL/AMDGPUISelLowering.cpp @@ -0,0 +1,31 @@ +//===-- AMDGPUISelLowering.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUISelLowering.h" +#include "AMDGPUUtil.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : + AMDILTargetLowering(TM) +{ +} + +void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI, + MachineFunction * MF, MachineRegisterInfo & MRI, + const struct TargetInstrInfo * TII, unsigned reg) const +{ + AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg()); +} + diff --git a/lib/Target/AMDIL/AMDGPUISelLowering.h b/lib/Target/AMDIL/AMDGPUISelLowering.h new file mode 100644 index 00000000000..3c5beb1cdae --- /dev/null +++ b/lib/Target/AMDIL/AMDGPUISelLowering.h @@ -0,0 +1,35 @@ +//===-- AMDGPUISelLowering.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUISELLOWERING_H +#define AMDGPUISELLOWERING_H + +#include "AMDILISelLowering.h" + +namespace llvm { + +class AMDGPUTargetLowering : public AMDILTargetLowering +{ +protected: + void addLiveIn(MachineInstr * MI, MachineFunction * MF, + MachineRegisterInfo & MRI, const struct TargetInstrInfo * TII, + unsigned reg) const; + +public: + AMDGPUTargetLowering(TargetMachine &TM); + +}; + +} /* End namespace llvm */ + +#endif /* AMDGPUISELLOWERING_H */ diff --git a/lib/Target/AMDIL/AMDGPUInstrInfo.cpp b/lib/Target/AMDIL/AMDGPUInstrInfo.cpp new file mode 100644 index 00000000000..4742283f688 --- /dev/null +++ b/lib/Target/AMDIL/AMDGPUInstrInfo.cpp @@ -0,0 +1,116 @@ +//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the TargetInstrInfo class that is +// common to all AMD GPUs. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUInstrInfo.h" +#include "AMDGPURegisterInfo.h" +#include "AMDGPUTargetMachine.h" +#include "AMDIL.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +AMDGPUInstrInfo::AMDGPUInstrInfo(AMDGPUTargetMachine &tm) + : AMDILInstrInfo(tm), TM(tm) +{ + const AMDILDevice * dev = TM.getSubtarget<AMDILSubtarget>().device(); + for (unsigned i = 0; i < AMDIL::INSTRUCTION_LIST_END; i++) { + const MCInstrDesc & instDesc = get(i); + uint32_t instGen = (instDesc.TSFlags >> 40) & 0x7; + uint32_t inst = (instDesc.TSFlags >> 48) & 0xffff; + if (inst == 0) { + continue; + } + switch (instGen) { + case AMDGPUInstrInfo::R600_CAYMAN: + if (dev->getGeneration() > AMDILDeviceInfo::HD6XXX) { + continue; + } + break; + case AMDGPUInstrInfo::R600: + if (dev->getGeneration() != AMDILDeviceInfo::HD4XXX) { + continue; + } + break; + case AMDGPUInstrInfo::EG_CAYMAN: + if (dev->getGeneration() < AMDILDeviceInfo::HD5XXX + || dev->getGeneration() > AMDILDeviceInfo::HD6XXX) { + continue; + } + break; + case AMDGPUInstrInfo::CAYMAN: + if (dev->getDeviceFlag() != OCL_DEVICE_CAYMAN) { + continue; + } + break; + case AMDGPUInstrInfo::SI: + if (dev->getGeneration() != AMDILDeviceInfo::HD7XXX) { + continue; + } + break; + default: + abort(); + break; + } + + unsigned amdilOpcode = GetRealAMDILOpcode(inst); + amdilToISA[amdilOpcode] = instDesc.Opcode; + } +} + +MachineInstr * AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, + DebugLoc DL) const +{ + MachineInstrBuilder newInstr; + MachineRegisterInfo &MRI = MF.getRegInfo(); + const AMDGPURegisterInfo & RI = getRegisterInfo(); + unsigned ISAOpcode = getISAOpcode(MI.getOpcode()); + + /* Create the new instruction */ + newInstr = BuildMI(MF, DL, TM.getInstrInfo()->get(ISAOpcode)); + + for (unsigned i = 0; i < MI.getNumOperands(); i++) { + MachineOperand &MO = MI.getOperand(i); + /* Convert dst regclass to one that is supported by the ISA */ + if (MO.isReg() && MO.isDef()) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg()); + const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass); + + assert(newRegClass); + + MRI.setRegClass(MO.getReg(), newRegClass); + } + } + /* Add the operand to the new instruction */ + newInstr.addOperand(MO); + } + + return newInstr; +} + +unsigned AMDGPUInstrInfo::getISAOpcode(unsigned opcode) const +{ + if (amdilToISA.count(opcode) == 0) { + return opcode; + } else { + return amdilToISA.find(opcode)->second; + } +} + +bool AMDGPUInstrInfo::isRegPreload(const MachineInstr &MI) const +{ + return (get(MI.getOpcode()).TSFlags >> AMDGPU_TFLAG_SHIFTS::PRELOAD_REG) & 0x1; +} + +#include "AMDGPUInstrEnums.include" diff --git a/lib/Target/AMDIL/AMDGPUInstrInfo.h b/lib/Target/AMDIL/AMDGPUInstrInfo.h new file mode 100644 index 00000000000..fa009bc6302 --- /dev/null +++ b/lib/Target/AMDIL/AMDGPUInstrInfo.h @@ -0,0 +1,59 @@ +//===-- AMDGPUInstrInfo.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUINSTRUCTIONINFO_H_ +#define AMDGPUINSTRUCTIONINFO_H_ + +#include "AMDGPURegisterInfo.h" +#include "AMDILInstrInfo.h" + +#include <map> + +namespace llvm { + + class AMDGPUTargetMachine; + class MachineFunction; + class MachineInstr; + class MachineInstrBuilder; + + class AMDGPUInstrInfo : public AMDILInstrInfo { + private: + AMDGPUTargetMachine & TM; + std::map<unsigned, unsigned> amdilToISA; + + public: + explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm); + + virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; + + virtual unsigned getISAOpcode(unsigned AMDILopcode) const; + + virtual MachineInstr * convertToISA(MachineInstr & MI, MachineFunction &MF, + DebugLoc DL) const; + + bool isRegPreload(const MachineInstr &MI) const; + + #include "AMDGPUInstrEnums.h.include" + }; + +} // End llvm namespace + +/* AMDGPU target flags are stored in bits 32-39 */ +namespace AMDGPU_TFLAG_SHIFTS { + enum TFLAGS { + PRELOAD_REG = 32 + }; +} + + +#endif // AMDGPUINSTRINFO_H_ diff --git a/lib/Target/AMDIL/AMDGPURegisterInfo.cpp b/lib/Target/AMDIL/AMDGPURegisterInfo.cpp new file mode 100644 index 00000000000..162a49116a0 --- /dev/null +++ b/lib/Target/AMDIL/AMDGPURegisterInfo.cpp @@ -0,0 +1,24 @@ +//===-- AMDGPURegisterInfo.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDGPURegisterInfo.h" +#include "AMDGPUTargetMachine.h" + +using namespace llvm; + +AMDGPURegisterInfo::AMDGPURegisterInfo(AMDGPUTargetMachine &tm, + const TargetInstrInfo &tii) +: AMDILRegisterInfo(tm, tii), + TM(tm), + TII(tii) + { } diff --git a/lib/Target/AMDIL/AMDGPURegisterInfo.h b/lib/Target/AMDIL/AMDGPURegisterInfo.h new file mode 100644 index 00000000000..f4492e9795d --- /dev/null +++ b/lib/Target/AMDIL/AMDGPURegisterInfo.h @@ -0,0 +1,38 @@ +//===-- AMDGPURegisterInfo.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUREGISTERINFO_H_ +#define AMDGPUREGISTERINFO_H_ + +#include "AMDILRegisterInfo.h" + +namespace llvm { + + class AMDGPUTargetMachine; + class TargetInstrInfo; + + struct AMDGPURegisterInfo : public AMDILRegisterInfo + { + AMDGPUTargetMachine &TM; + const TargetInstrInfo &TII; + + AMDGPURegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); + + virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0; + + virtual const TargetRegisterClass * + getISARegClass(const TargetRegisterClass * rc) const = 0; + }; +} // End namespace llvm + +#endif // AMDIDSAREGISTERINFO_H_ diff --git a/lib/Target/AMDIL/AMDGPUTargetMachine.cpp b/lib/Target/AMDIL/AMDGPUTargetMachine.cpp new file mode 100644 index 00000000000..313349ce01b --- /dev/null +++ b/lib/Target/AMDIL/AMDGPUTargetMachine.cpp @@ -0,0 +1,161 @@ +//===-- AMDGPUTargetMachine.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUTargetMachine.h" +#include "AMDGPU.h" +#include "AMDILTargetMachine.h" +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" +#include "R600KernelParameters.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/PassManager.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OptLevel +) +: + AMDILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), + Subtarget(TT, CPU, FS), + mDump(false) + +{ + /* TLInfo uses InstrInfo so it must be initialized after. */ + if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { + InstrInfo = new R600InstrInfo(*this); + TLInfo = new R600TargetLowering(*this); + } else { + InstrInfo = new SIInstrInfo(*this); + TLInfo = new SITargetLowering(*this); + } +} + +AMDGPUTargetMachine::~AMDGPUTargetMachine() +{ +} + +bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + bool DisableVerify) { + /* XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are + * only using it to access addPassesToGenerateCode() */ + bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType, + DisableVerify); + assert(fail); + + const AMDILSubtarget &STM = getSubtarget<AMDILSubtarget>(); + std::string gpu = STM.getDeviceName(); + if (gpu == "SI") { + PM.add(createSICodeEmitterPass(Out)); + } else if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { + PM.add(createR600CodeEmitterPass(Out)); + } else { + abort(); + return true; + } + PM.add(createGCInfoDeleter()); + + return false; +} + +namespace { +class AMDGPUPassConfig : public TargetPassConfig { +public: + AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + AMDGPUTargetMachine &getAMDGPUTargetMachine() const { + return getTM<AMDGPUTargetMachine>(); + } + + virtual bool addPreISel(); + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPostRegAlloc(); + virtual bool addPreSched2(); + virtual bool addPreEmitPass(); +}; +} // End of anonymous namespace + +TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) { + return new AMDGPUPassConfig(this, PM); +} + +bool +AMDGPUPassConfig::addPreISel() +{ + const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>(); + if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { + PM.add(createR600KernelParametersPass( + getAMDGPUTargetMachine().getTargetData())); + } + return false; +} + +bool AMDGPUPassConfig::addInstSelector() { + PM.add(createAMDILPeepholeOpt(*TM)); + PM.add(createAMDILISelDag(getAMDGPUTargetMachine())); + return false; +} + +bool AMDGPUPassConfig::addPreRegAlloc() { + const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>(); + + PM.add(createAMDGPUReorderPreloadInstructionsPass(*TM)); + if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { + PM.add(createR600LowerShaderInstructionsPass(*TM)); + PM.add(createR600LowerInstructionsPass(*TM)); + } else { + PM.add(createSILowerShaderInstructionsPass(*TM)); + PM.add(createSIAssignInterpRegsPass(*TM)); + } + PM.add(createAMDGPULowerInstructionsPass(*TM)); + PM.add(createAMDGPUConvertToISAPass(*TM)); + return false; +} + +bool AMDGPUPassConfig::addPostRegAlloc() { + return false; +} + +bool AMDGPUPassConfig::addPreSched2() { + return false; +} + +bool AMDGPUPassConfig::addPreEmitPass() { + const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>(); + PM.add(createAMDILCFGPreparationPass(*TM)); + PM.add(createAMDILCFGStructurizerPass(*TM)); + if (ST.device()->getGeneration() == AMDILDeviceInfo::HD7XXX) { + PM.add(createSIPropagateImmReadsPass(*TM)); + } + + return false; +} + diff --git a/lib/Target/AMDIL/AMDGPUTargetMachine.h b/lib/Target/AMDIL/AMDGPUTargetMachine.h new file mode 100644 index 00000000000..d4165b09e84 --- /dev/null +++ b/lib/Target/AMDIL/AMDGPUTargetMachine.h @@ -0,0 +1,62 @@ +//===-- AMDGPUTargetMachine.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_TARGET_MACHINE_H +#define AMDGPU_TARGET_MACHINE_H + +#include "AMDGPUInstrInfo.h" +#include "AMDILTargetMachine.h" +#include "R600ISelLowering.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { + +MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT); + +class AMDGPUTargetMachine : public AMDILTargetMachine { + AMDILSubtarget Subtarget; + const AMDGPUInstrInfo * InstrInfo; + AMDGPUTargetLowering * TLInfo; + AMDILGlobalManager *mGM; + AMDILKernelManager *mKM; + bool mDump; + +public: + AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS, + StringRef CPU, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + ~AMDGPUTargetMachine(); + virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;} + virtual const AMDILSubtarget *getSubtargetImpl() const {return &Subtarget; } + virtual const AMDGPURegisterInfo *getRegisterInfo() const { + return &InstrInfo->getRegisterInfo(); + } + virtual AMDGPUTargetLowering * getTargetLowering() const { + return TLInfo; + } + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + bool DisableVerify); +public: + void dumpCode() { mDump = true; } + bool shouldDumpCode() const { return mDump; } +}; + +} /* End namespace llvm */ + +#endif /* AMDGPU_TARGET_MACHINE_H */ diff --git a/lib/Target/AMDIL/AMDGPUUtil.cpp b/lib/Target/AMDIL/AMDGPUUtil.cpp new file mode 100644 index 00000000000..a5045436ab4 --- /dev/null +++ b/lib/Target/AMDIL/AMDGPUUtil.cpp @@ -0,0 +1,126 @@ +//===-- AMDGPUUtil.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUUtil.h" +#include "AMDGPURegisterInfo.h" +#include "AMDIL.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +/* Some instructions act as place holders to emulate operations that the GPU + * hardware does automatically. This function can be used to check if + * an opcode falls into this category. */ +bool llvm::isPlaceHolderOpcode(unsigned opcode) +{ + switch (opcode) { + default: return false; + case AMDIL::EXPORT_REG: + case AMDIL::RETURN: + case AMDIL::LOAD_INPUT: + case AMDIL::LAST: + case AMDIL::RESERVE_REG: + return true; + } +} + +bool llvm::isTransOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + + case AMDIL::COS_f32: + case AMDIL::COS_r600: + case AMDIL::COS_eg: + case AMDIL::RSQ_f32: + case AMDIL::FTOI: + case AMDIL::ITOF: + case AMDIL::MULLIT: + case AMDIL::MUL_LIT_r600: + case AMDIL::MUL_LIT_eg: + case AMDIL::SHR_i32: + case AMDIL::SIN_f32: + case AMDIL::EXP_f32: + case AMDIL::EXP_IEEE_r600: + case AMDIL::EXP_IEEE_eg: + case AMDIL::LOG_CLAMPED_r600: + case AMDIL::LOG_IEEE_r600: + case AMDIL::LOG_CLAMPED_eg: + case AMDIL::LOG_IEEE_eg: + case AMDIL::LOG_f32: + return true; + } +} + +bool llvm::isTexOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + case AMDIL::TEX_SAMPLE: + case AMDIL::TEX_SAMPLE_C: + case AMDIL::TEX_SAMPLE_L: + case AMDIL::TEX_SAMPLE_C_L: + case AMDIL::TEX_SAMPLE_LB: + case AMDIL::TEX_SAMPLE_C_LB: + case AMDIL::TEX_SAMPLE_G: + case AMDIL::TEX_SAMPLE_C_G: + return true; + } +} + +bool llvm::isReductionOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + case AMDIL::DOT4_r600: + case AMDIL::DOT4_eg: + return true; + } +} + +bool llvm::isFCOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + case AMDIL::BREAK_LOGICALZ_f32: + case AMDIL::BREAK_LOGICALNZ_i32: + case AMDIL::BREAK_LOGICALZ_i32: + case AMDIL::CONTINUE_LOGICALNZ_f32: + case AMDIL::IF_LOGICALNZ_i32: + case AMDIL::IF_LOGICALZ_f32: + case AMDIL::ELSE: + case AMDIL::ENDIF: + case AMDIL::ENDLOOP: + case AMDIL::IF_LOGICALNZ_f32: + case AMDIL::WHILELOOP: + return true; + } +} + +void AMDGPU::utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, + const struct TargetInstrInfo * TII, unsigned physReg, unsigned virtReg) +{ + if (!MRI.isLiveIn(physReg)) { + MRI.addLiveIn(physReg, virtReg); + BuildMI(MF->front(), MF->front().begin(), DebugLoc(), + TII->get(TargetOpcode::COPY), virtReg) + .addReg(physReg); + } else { + MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg)); + } +} diff --git a/lib/Target/AMDIL/AMDGPUUtil.h b/lib/Target/AMDIL/AMDGPUUtil.h new file mode 100644 index 00000000000..299146e1ba7 --- /dev/null +++ b/lib/Target/AMDIL/AMDGPUUtil.h @@ -0,0 +1,49 @@ +//===-- AMDGPUUtil.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_UTIL_H +#define AMDGPU_UTIL_H + +#include "AMDGPURegisterInfo.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class AMDILMachineFunctionInfo; + +class TargetMachine; +class TargetRegisterInfo; + +bool isPlaceHolderOpcode(unsigned opcode); + +bool isTransOp(unsigned opcode); +bool isTexOp(unsigned opcode); +bool isReductionOp(unsigned opcode); +bool isFCOp(unsigned opcode); + +/* XXX: Move these to AMDGPUInstrInfo.h */ +#define MO_FLAG_CLAMP (1 << 0) +#define MO_FLAG_NEG (1 << 1) +#define MO_FLAG_ABS (1 << 2) +#define MO_FLAG_MASK (1 << 3) + +} /* End namespace llvm */ + +namespace AMDGPU { + +void utilAddLiveIn(llvm::MachineFunction * MF, llvm::MachineRegisterInfo & MRI, + const struct llvm::TargetInstrInfo * TII, unsigned physReg, unsigned virtReg); + +} // End namespace AMDGPU + +#endif /* AMDGPU_UTIL_H */ diff --git a/lib/Target/AMDIL/AMDIL.h b/lib/Target/AMDIL/AMDIL.h new file mode 100644 index 00000000000..317ea124f66 --- /dev/null +++ b/lib/Target/AMDIL/AMDIL.h @@ -0,0 +1,280 @@ +//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// AMDIL back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDIL_H_ +#define AMDIL_H_ + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" + +#define AMDIL_MAJOR_VERSION 2 +#define AMDIL_MINOR_VERSION 0 +#define AMDIL_REVISION_NUMBER 74 +#define ARENA_SEGMENT_RESERVED_UAVS 12 +#define DEFAULT_ARENA_UAV_ID 8 +#define DEFAULT_RAW_UAV_ID 7 +#define GLOBAL_RETURN_RAW_UAV_ID 11 +#define HW_MAX_NUM_CB 8 +#define MAX_NUM_UNIQUE_UAVS 8 +#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8 +#define OPENCL_MAX_READ_IMAGES 128 +#define OPENCL_MAX_WRITE_IMAGES 8 +#define OPENCL_MAX_SAMPLERS 16 + +// The next two values can never be zero, as zero is the ID that is +// used to assert against. +#define DEFAULT_LDS_ID 1 +#define DEFAULT_GDS_ID 1 +#define DEFAULT_SCRATCH_ID 1 +#define DEFAULT_VEC_SLOTS 8 + +// SC->CAL version matchings. +#define CAL_VERSION_SC_150 1700 +#define CAL_VERSION_SC_149 1700 +#define CAL_VERSION_SC_148 1525 +#define CAL_VERSION_SC_147 1525 +#define CAL_VERSION_SC_146 1525 +#define CAL_VERSION_SC_145 1451 +#define CAL_VERSION_SC_144 1451 +#define CAL_VERSION_SC_143 1441 +#define CAL_VERSION_SC_142 1441 +#define CAL_VERSION_SC_141 1420 +#define CAL_VERSION_SC_140 1400 +#define CAL_VERSION_SC_139 1387 +#define CAL_VERSION_SC_138 1387 +#define CAL_APPEND_BUFFER_SUPPORT 1340 +#define CAL_VERSION_SC_137 1331 +#define CAL_VERSION_SC_136 982 +#define CAL_VERSION_SC_135 950 +#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990 + +#define OCL_DEVICE_RV710 0x0001 +#define OCL_DEVICE_RV730 0x0002 +#define OCL_DEVICE_RV770 0x0004 +#define OCL_DEVICE_CEDAR 0x0008 +#define OCL_DEVICE_REDWOOD 0x0010 +#define OCL_DEVICE_JUNIPER 0x0020 +#define OCL_DEVICE_CYPRESS 0x0040 +#define OCL_DEVICE_CAICOS 0x0080 +#define OCL_DEVICE_TURKS 0x0100 +#define OCL_DEVICE_BARTS 0x0200 +#define OCL_DEVICE_CAYMAN 0x0400 +#define OCL_DEVICE_ALL 0x3FFF + +/// The number of function ID's that are reserved for +/// internal compiler usage. +const unsigned int RESERVED_FUNCS = 1024; + +#define AMDIL_OPT_LEVEL_DECL +#define AMDIL_OPT_LEVEL_VAR +#define AMDIL_OPT_LEVEL_VAR_NO_COMMA + +namespace llvm { +class AMDILInstrPrinter; +class AMDILTargetMachine; +class FunctionPass; +class MCAsmInfo; +class raw_ostream; +class Target; +class TargetMachine; + +/// Instruction selection passes. +FunctionPass* + createAMDILISelDag(AMDILTargetMachine &TM AMDIL_OPT_LEVEL_DECL); +FunctionPass* + createAMDILPeepholeOpt(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); + +/// Pre regalloc passes. +FunctionPass* + createAMDILMachinePeephole(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); + +/// Pre emit passes. +FunctionPass* + createAMDILCFGPreparationPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); +FunctionPass* + createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); + +extern Target TheAMDILTarget; +extern Target TheAMDGPUTarget; +} // end namespace llvm; + +#define GET_REGINFO_ENUM +#include "AMDILGenRegisterInfo.inc" +#define GET_INSTRINFO_ENUM +#include "AMDILGenInstrInfo.inc" + +/// Include device information enumerations +#include "AMDILDeviceInfo.h" + +namespace llvm { +/// OpenCL uses address spaces to differentiate between +/// various memory regions on the hardware. On the CPU +/// all of the address spaces point to the same memory, +/// however on the GPU, each address space points to +/// a seperate piece of memory that is unique from other +/// memory locations. +namespace AMDILAS { +enum AddressSpaces { + PRIVATE_ADDRESS = 0, // Address space for private memory. + GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0). + CONSTANT_ADDRESS = 2, // Address space for constant memory. + LOCAL_ADDRESS = 3, // Address space for local memory. + REGION_ADDRESS = 4, // Address space for region memory. + ADDRESS_NONE = 5, // Address space for unknown memory. + PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0) + PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1) + LAST_ADDRESS = 8 +}; + +// We are piggybacking on the CommentFlag enum in MachineInstr.h to +// set bits in AsmPrinterFlags of the MachineInstruction. We will +// start at bit 16 and allocate down while LLVM will start at bit +// 1 and allocate up. + +// This union/struct combination is an easy way to read out the +// exact bits that are needed. +typedef union ResourceRec { + struct { +#ifdef __BIG_ENDIAN__ + unsigned short isImage : 1; // Reserved for future use/llvm. + unsigned short ResourceID : 10; // Flag to specify the resourece ID for + // the op. + unsigned short HardwareInst : 1; // Flag to specify that this instruction + // is a hardware instruction. + unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a + // conflict. + unsigned short ByteStore : 1; // Flag to specify if the op is a byte + // store op. + unsigned short PointerPath : 1; // Flag to specify if the op is on the + // pointer path. + unsigned short CacheableRead : 1; // Flag to specify if the read is + // cacheable. +#else + unsigned short CacheableRead : 1; // Flag to specify if the read is + // cacheable. + unsigned short PointerPath : 1; // Flag to specify if the op is on the + // pointer path. + unsigned short ByteStore : 1; // Flag to specify if the op is byte + // store op. + unsigned short ConflictPtr : 1; // Flag to specify that the pointer has + // a conflict. + unsigned short HardwareInst : 1; // Flag to specify that this instruction + // is a hardware instruction. + unsigned short ResourceID : 10; // Flag to specify the resource ID for + // the op. + unsigned short isImage : 1; // Reserved for future use. +#endif + } bits; + unsigned short u16all; +} InstrResEnc; + +} // namespace AMDILAS + +// The OpSwizzle encodes a subset of all possible +// swizzle combinations into a number of bits using +// only the combinations utilized by the backend. +// The lower 128 are for source swizzles and the +// upper 128 or for destination swizzles. +// The valid mappings can be found in the +// getSrcSwizzle and getDstSwizzle functions of +// AMDILUtilityFunctions.cpp. +typedef union SwizzleRec { + struct { +#ifdef __BIG_ENDIAN__ + unsigned char dst : 1; + unsigned char swizzle : 7; +#else + unsigned char swizzle : 7; + unsigned char dst : 1; +#endif + } bits; + unsigned char u8all; +} OpSwizzle; +// Enums corresponding to AMDIL condition codes for IL. These +// values must be kept in sync with the ones in the .td file. +namespace AMDILCC { +enum CondCodes { + // AMDIL specific condition codes. These correspond to the IL_CC_* + // in AMDILInstrInfo.td and must be kept in the same order. + IL_CC_D_EQ = 0, // DEQ instruction. + IL_CC_D_GE = 1, // DGE instruction. + IL_CC_D_LT = 2, // DLT instruction. + IL_CC_D_NE = 3, // DNE instruction. + IL_CC_F_EQ = 4, // EQ instruction. + IL_CC_F_GE = 5, // GE instruction. + IL_CC_F_LT = 6, // LT instruction. + IL_CC_F_NE = 7, // NE instruction. + IL_CC_I_EQ = 8, // IEQ instruction. + IL_CC_I_GE = 9, // IGE instruction. + IL_CC_I_LT = 10, // ILT instruction. + IL_CC_I_NE = 11, // INE instruction. + IL_CC_U_GE = 12, // UGE instruction. + IL_CC_U_LT = 13, // ULE instruction. + // Pseudo IL Comparison instructions here. + IL_CC_F_GT = 14, // GT instruction. + IL_CC_U_GT = 15, + IL_CC_I_GT = 16, + IL_CC_D_GT = 17, + IL_CC_F_LE = 18, // LE instruction + IL_CC_U_LE = 19, + IL_CC_I_LE = 20, + IL_CC_D_LE = 21, + IL_CC_F_UNE = 22, + IL_CC_F_UEQ = 23, + IL_CC_F_ULT = 24, + IL_CC_F_UGT = 25, + IL_CC_F_ULE = 26, + IL_CC_F_UGE = 27, + IL_CC_F_ONE = 28, + IL_CC_F_OEQ = 29, + IL_CC_F_OLT = 30, + IL_CC_F_OGT = 31, + IL_CC_F_OLE = 32, + IL_CC_F_OGE = 33, + IL_CC_D_UNE = 34, + IL_CC_D_UEQ = 35, + IL_CC_D_ULT = 36, + IL_CC_D_UGT = 37, + IL_CC_D_ULE = 38, + IL_CC_D_UGE = 39, + IL_CC_D_ONE = 40, + IL_CC_D_OEQ = 41, + IL_CC_D_OLT = 42, + IL_CC_D_OGT = 43, + IL_CC_D_OLE = 44, + IL_CC_D_OGE = 45, + IL_CC_U_EQ = 46, + IL_CC_U_NE = 47, + IL_CC_F_O = 48, + IL_CC_D_O = 49, + IL_CC_F_UO = 50, + IL_CC_D_UO = 51, + IL_CC_L_LE = 52, + IL_CC_L_GE = 53, + IL_CC_L_EQ = 54, + IL_CC_L_NE = 55, + IL_CC_L_LT = 56, + IL_CC_L_GT = 57, + IL_CC_UL_LE = 58, + IL_CC_UL_GE = 59, + IL_CC_UL_EQ = 60, + IL_CC_UL_NE = 61, + IL_CC_UL_LT = 62, + IL_CC_UL_GT = 63, + COND_ERROR = 64 +}; + +} // end namespace AMDILCC +} // end namespace llvm +#endif // AMDIL_H_ diff --git a/lib/Target/AMDIL/AMDIL7XXDevice.cpp b/lib/Target/AMDIL/AMDIL7XXDevice.cpp new file mode 100644 index 00000000000..e1b6b8fbb2d --- /dev/null +++ b/lib/Target/AMDIL/AMDIL7XXDevice.cpp @@ -0,0 +1,128 @@ +//===-- AMDIL7XXDevice.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#include "AMDIL7XXDevice.h" +#include "AMDILDevice.h" + +using namespace llvm; + +AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST) +{ + setCaps(); + std::string name = mSTM->getDeviceName(); + if (name == "rv710") { + mDeviceFlag = OCL_DEVICE_RV710; + } else if (name == "rv730") { + mDeviceFlag = OCL_DEVICE_RV730; + } else { + mDeviceFlag = OCL_DEVICE_RV770; + } +} + +AMDIL7XXDevice::~AMDIL7XXDevice() +{ +} + +void AMDIL7XXDevice::setCaps() +{ + mSWBits.set(AMDILDeviceInfo::LocalMem); +} + +size_t AMDIL7XXDevice::getMaxLDSSize() const +{ + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_700; + } + return 0; +} + +size_t AMDIL7XXDevice::getWavefrontSize() const +{ + return AMDILDevice::HalfWavefrontSize; +} + +uint32_t AMDIL7XXDevice::getGeneration() const +{ + return AMDILDeviceInfo::HD4XXX; +} + +uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const +{ + switch (DeviceID) { + default: + assert(0 && "ID type passed in is unknown!"); + break; + case GLOBAL_ID: + case CONSTANT_ID: + case RAW_UAV_ID: + case ARENA_UAV_ID: + break; + case LDS_ID: + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return DEFAULT_LDS_ID; + } + break; + case SCRATCH_ID: + if (usesHardware(AMDILDeviceInfo::PrivateMem)) { + return DEFAULT_SCRATCH_ID; + } + break; + case GDS_ID: + assert(0 && "GDS UAV ID is not supported on this chip"); + if (usesHardware(AMDILDeviceInfo::RegionMem)) { + return DEFAULT_GDS_ID; + } + break; + }; + + return 0; +} + +uint32_t AMDIL7XXDevice::getMaxNumUAVs() const +{ + return 1; +} + +AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST) +{ + setCaps(); +} + +AMDIL770Device::~AMDIL770Device() +{ +} + +void AMDIL770Device::setCaps() +{ + if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) { + mSWBits.set(AMDILDeviceInfo::FMA); + mHWBits.set(AMDILDeviceInfo::DoubleOps); + } + mSWBits.set(AMDILDeviceInfo::BarrierDetect); + mHWBits.reset(AMDILDeviceInfo::LongOps); + mSWBits.set(AMDILDeviceInfo::LongOps); + mSWBits.set(AMDILDeviceInfo::LocalMem); +} + +size_t AMDIL770Device::getWavefrontSize() const +{ + return AMDILDevice::WavefrontSize; +} + +AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST) +{ +} + +AMDIL710Device::~AMDIL710Device() +{ +} + +size_t AMDIL710Device::getWavefrontSize() const +{ + return AMDILDevice::QuarterWavefrontSize; +} diff --git a/lib/Target/AMDIL/AMDIL7XXDevice.h b/lib/Target/AMDIL/AMDIL7XXDevice.h new file mode 100644 index 00000000000..4d8d47a8b33 --- /dev/null +++ b/lib/Target/AMDIL/AMDIL7XXDevice.h @@ -0,0 +1,71 @@ +//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===----------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the +// specific hardware. +//===----------------------------------------------------------------------===// +#ifndef _AMDIL7XXDEVICEIMPL_H_ +#define _AMDIL7XXDEVICEIMPL_H_ +#include "AMDILDevice.h" +#include "AMDILSubtarget.h" + +namespace llvm { +class AMDILSubtarget; + +//===----------------------------------------------------------------------===// +// 7XX generation of devices and their respective sub classes +//===----------------------------------------------------------------------===// + +// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX +// devices are derived from this class. The AMDIL7XX device will only +// support the minimal features that are required to be considered OpenCL 1.0 +// compliant and nothing more. +class AMDIL7XXDevice : public AMDILDevice { +public: + AMDIL7XXDevice(AMDILSubtarget *ST); + virtual ~AMDIL7XXDevice(); + virtual size_t getMaxLDSSize() const; + virtual size_t getWavefrontSize() const; + virtual uint32_t getGeneration() const; + virtual uint32_t getResourceID(uint32_t DeviceID) const; + virtual uint32_t getMaxNumUAVs() const; + +protected: + virtual void setCaps(); +}; // AMDIL7XXDevice + +// The AMDIL770Device class represents the RV770 chip and it's +// derivative cards. The difference between this device and the base +// class is this device device adds support for double precision +// and has a larger wavefront size. +class AMDIL770Device : public AMDIL7XXDevice { +public: + AMDIL770Device(AMDILSubtarget *ST); + virtual ~AMDIL770Device(); + virtual size_t getWavefrontSize() const; +private: + virtual void setCaps(); +}; // AMDIL770Device + +// The AMDIL710Device class derives from the 7XX base class, but this +// class is a smaller derivative, so we need to overload some of the +// functions in order to correctly specify this information. +class AMDIL710Device : public AMDIL7XXDevice { +public: + AMDIL710Device(AMDILSubtarget *ST); + virtual ~AMDIL710Device(); + virtual size_t getWavefrontSize() const; +}; // AMDIL710Device + +} // namespace llvm +#endif // _AMDILDEVICEIMPL_H_ diff --git a/lib/Target/AMDIL/AMDILDevice.cpp b/lib/Target/AMDIL/AMDILDevice.cpp new file mode 100644 index 00000000000..aa6d8af7012 --- /dev/null +++ b/lib/Target/AMDIL/AMDILDevice.cpp @@ -0,0 +1,137 @@ +//===-- AMDILDevice.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#include "AMDILDevice.h" +#include "AMDILSubtarget.h" + +using namespace llvm; +// Default implementation for all of the classes. +AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST) +{ + mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities); + mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities); + setCaps(); + mDeviceFlag = OCL_DEVICE_ALL; +} + +AMDILDevice::~AMDILDevice() +{ + mHWBits.clear(); + mSWBits.clear(); +} + +size_t AMDILDevice::getMaxGDSSize() const +{ + return 0; +} + +uint32_t +AMDILDevice::getDeviceFlag() const +{ + return mDeviceFlag; +} + +size_t AMDILDevice::getMaxNumCBs() const +{ + if (usesHardware(AMDILDeviceInfo::ConstantMem)) { + return HW_MAX_NUM_CB; + } + + return 0; +} + +size_t AMDILDevice::getMaxCBSize() const +{ + if (usesHardware(AMDILDeviceInfo::ConstantMem)) { + return MAX_CB_SIZE; + } + + return 0; +} + +size_t AMDILDevice::getMaxScratchSize() const +{ + return 65536; +} + +uint32_t AMDILDevice::getStackAlignment() const +{ + return 16; +} + +void AMDILDevice::setCaps() +{ + mSWBits.set(AMDILDeviceInfo::HalfOps); + mSWBits.set(AMDILDeviceInfo::ByteOps); + mSWBits.set(AMDILDeviceInfo::ShortOps); + mSWBits.set(AMDILDeviceInfo::HW64BitDivMod); + if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) { + mSWBits.set(AMDILDeviceInfo::NoInline); + } + if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) { + mSWBits.set(AMDILDeviceInfo::MacroDB); + } + if (mSTM->isOverride(AMDILDeviceInfo::Debug)) { + mSWBits.set(AMDILDeviceInfo::ConstantMem); + } else { + mHWBits.set(AMDILDeviceInfo::ConstantMem); + } + if (mSTM->isOverride(AMDILDeviceInfo::Debug)) { + mSWBits.set(AMDILDeviceInfo::PrivateMem); + } else { + mHWBits.set(AMDILDeviceInfo::PrivateMem); + } + if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) { + mSWBits.set(AMDILDeviceInfo::BarrierDetect); + } + mSWBits.set(AMDILDeviceInfo::ByteLDSOps); + mSWBits.set(AMDILDeviceInfo::LongOps); +} + +AMDILDeviceInfo::ExecutionMode +AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const +{ + if (mHWBits[Caps]) { + assert(!mSWBits[Caps] && "Cannot set both SW and HW caps"); + return AMDILDeviceInfo::Hardware; + } + + if (mSWBits[Caps]) { + assert(!mHWBits[Caps] && "Cannot set both SW and HW caps"); + return AMDILDeviceInfo::Software; + } + + return AMDILDeviceInfo::Unsupported; + +} + +bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const +{ + return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported; +} + +bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const +{ + return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware; +} + +bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const +{ + return getExecutionMode(Mode) == AMDILDeviceInfo::Software; +} + +std::string +AMDILDevice::getDataLayout() const +{ + return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048" + "-n8:16:32:64"); +} diff --git a/lib/Target/AMDIL/AMDILDevice.h b/lib/Target/AMDIL/AMDILDevice.h new file mode 100644 index 00000000000..706dd82d6e2 --- /dev/null +++ b/lib/Target/AMDIL/AMDILDevice.h @@ -0,0 +1,116 @@ +//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===----------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the +// specific hardware. +//===----------------------------------------------------------------------===// +#ifndef _AMDILDEVICEIMPL_H_ +#define _AMDILDEVICEIMPL_H_ +#include "AMDIL.h" +#include "llvm/ADT/BitVector.h" + +namespace llvm { + class AMDILSubtarget; + class MCStreamer; +//===----------------------------------------------------------------------===// +// Interface for data that is specific to a single device +//===----------------------------------------------------------------------===// +class AMDILDevice { +public: + AMDILDevice(AMDILSubtarget *ST); + virtual ~AMDILDevice(); + + // Enum values for the various memory types. + enum { + RAW_UAV_ID = 0, + ARENA_UAV_ID = 1, + LDS_ID = 2, + GDS_ID = 3, + SCRATCH_ID = 4, + CONSTANT_ID = 5, + GLOBAL_ID = 6, + MAX_IDS = 7 + } IO_TYPE_IDS; + + // Returns the max LDS size that the hardware supports. Size is in + // bytes. + virtual size_t getMaxLDSSize() const = 0; + + // Returns the max GDS size that the hardware supports if the GDS is + // supported by the hardware. Size is in bytes. + virtual size_t getMaxGDSSize() const; + + // Returns the max number of hardware constant address spaces that + // are supported by this device. + virtual size_t getMaxNumCBs() const; + + // Returns the max number of bytes a single hardware constant buffer + // can support. Size is in bytes. + virtual size_t getMaxCBSize() const; + + // Returns the max number of bytes allowed by the hardware scratch + // buffer. Size is in bytes. + virtual size_t getMaxScratchSize() const; + + // Get the flag that corresponds to the device. + virtual uint32_t getDeviceFlag() const; + + // Returns the number of work-items that exist in a single hardware + // wavefront. + virtual size_t getWavefrontSize() const = 0; + + // Get the generational name of this specific device. + virtual uint32_t getGeneration() const = 0; + + // Get the stack alignment of this specific device. + virtual uint32_t getStackAlignment() const; + + // Get the resource ID for this specific device. + virtual uint32_t getResourceID(uint32_t DeviceID) const = 0; + + // Get the max number of UAV's for this device. + virtual uint32_t getMaxNumUAVs() const = 0; + + + // API utilizing more detailed capabilities of each family of + // cards. If a capability is supported, then either usesHardware or + // usesSoftware returned true. If usesHardware returned true, then + // usesSoftware must return false for the same capability. Hardware + // execution means that the feature is done natively by the hardware + // and is not emulated by the softare. Software execution means + // that the feature could be done in the hardware, but there is + // software that emulates it with possibly using the hardware for + // support since the hardware does not fully comply with OpenCL + // specs. + bool isSupported(AMDILDeviceInfo::Caps Mode) const; + bool usesHardware(AMDILDeviceInfo::Caps Mode) const; + bool usesSoftware(AMDILDeviceInfo::Caps Mode) const; + virtual std::string getDataLayout() const; + static const unsigned int MAX_LDS_SIZE_700 = 16384; + static const unsigned int MAX_LDS_SIZE_800 = 32768; + static const unsigned int WavefrontSize = 64; + static const unsigned int HalfWavefrontSize = 32; + static const unsigned int QuarterWavefrontSize = 16; +protected: + virtual void setCaps(); + llvm::BitVector mHWBits; + llvm::BitVector mSWBits; + AMDILSubtarget *mSTM; + uint32_t mDeviceFlag; +private: + AMDILDeviceInfo::ExecutionMode + getExecutionMode(AMDILDeviceInfo::Caps Caps) const; +}; // AMDILDevice + +} // namespace llvm +#endif // _AMDILDEVICEIMPL_H_ diff --git a/lib/Target/AMDIL/AMDILDeviceInfo.cpp b/lib/Target/AMDIL/AMDILDeviceInfo.cpp new file mode 100644 index 00000000000..89b8312c294 --- /dev/null +++ b/lib/Target/AMDIL/AMDILDeviceInfo.cpp @@ -0,0 +1,87 @@ +//===-- AMDILDeviceInfo.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#include "AMDILDevices.h" +#include "AMDILSubtarget.h" + +using namespace llvm; +namespace llvm { + AMDILDevice* +getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit) +{ + if (deviceName.c_str()[2] == '7') { + switch (deviceName.c_str()[3]) { + case '1': + return new AMDIL710Device(ptr); + case '7': + return new AMDIL770Device(ptr); + default: + return new AMDIL7XXDevice(ptr); + }; + } else if (deviceName == "cypress") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILCypressDevice(ptr); + } else if (deviceName == "juniper") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILEvergreenDevice(ptr); + } else if (deviceName == "redwood") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILRedwoodDevice(ptr); + } else if (deviceName == "cedar") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILCedarDevice(ptr); + } else if (deviceName == "barts" + || deviceName == "turks") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILNIDevice(ptr); + } else if (deviceName == "cayman") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILCaymanDevice(ptr); + } else if (deviceName == "caicos") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILNIDevice(ptr); + } else if (deviceName == "SI") { + return new AMDILSIDevice(ptr); + } else { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDIL7XXDevice(ptr); + } +} +} diff --git a/lib/Target/AMDIL/AMDILDeviceInfo.h b/lib/Target/AMDIL/AMDILDeviceInfo.h new file mode 100644 index 00000000000..c4acf9145ae --- /dev/null +++ b/lib/Target/AMDIL/AMDILDeviceInfo.h @@ -0,0 +1,89 @@ +//===-- AMDILDeviceInfo.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#ifndef _AMDILDEVICEINFO_H_ +#define _AMDILDEVICEINFO_H_ + + +#include <string> + +namespace llvm +{ + class AMDILDevice; + class AMDILSubtarget; + namespace AMDILDeviceInfo + { + // Each Capabilities can be executed using a hardware instruction, + // emulated with a sequence of software instructions, or not + // supported at all. + enum ExecutionMode { + Unsupported = 0, // Unsupported feature on the card(Default value) + Software, // This is the execution mode that is set if the + // feature is emulated in software + Hardware // This execution mode is set if the feature exists + // natively in hardware + }; + + // Any changes to this needs to have a corresponding update to the + // twiki page GPUMetadataABI + enum Caps { + HalfOps = 0x1, // Half float is supported or not. + DoubleOps = 0x2, // Double is supported or not. + ByteOps = 0x3, // Byte(char) is support or not. + ShortOps = 0x4, // Short is supported or not. + LongOps = 0x5, // Long is supported or not. + Images = 0x6, // Images are supported or not. + ByteStores = 0x7, // ByteStores available(!HD4XXX). + ConstantMem = 0x8, // Constant/CB memory. + LocalMem = 0x9, // Local/LDS memory. + PrivateMem = 0xA, // Scratch/Private/Stack memory. + RegionMem = 0xB, // OCL GDS Memory Extension. + FMA = 0xC, // Use HW FMA or SW FMA. + ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023. + MultiUAV = 0xE, // Use for UAV per Pointer 0-7. + Reserved0 = 0xF, // ReservedFlag + NoAlias = 0x10, // Cached loads. + Signed24BitOps = 0x11, // Peephole Optimization. + // Debug mode implies that no hardware features or optimizations + // are performned and that all memory access go through a single + // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX). + Debug = 0x12, // Debug mode is enabled. + CachedMem = 0x13, // Cached mem is available or not. + BarrierDetect = 0x14, // Detect duplicate barriers. + Reserved1 = 0x15, // Reserved flag + ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available. + ArenaVectors = 0x17, // Flag to specify if vector loads from arena work. + TmrReg = 0x18, // Flag to specify if Tmr register is supported. + NoInline = 0x19, // Flag to specify that no inlining should occur. + MacroDB = 0x1A, // Flag to specify that backend handles macrodb. + HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod. + ArenaUAV = 0x1C, // Flag to specify that arena uav is supported. + PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's. + // If more capabilities are required, then + // this number needs to be increased. + // All capabilities must come before this + // number. + MaxNumberCapabilities = 0x20 + }; + // These have to be in order with the older generations + // having the lower number enumerations. + enum Generation { + HD4XXX = 0, // 7XX based devices. + HD5XXX, // Evergreen based devices. + HD6XXX, // NI/Evergreen+ based devices. + HD7XXX, + HDTEST, // Experimental feature testing device. + HDNUMGEN + }; + + + } // namespace AMDILDeviceInfo + llvm::AMDILDevice* + getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false); +} // namespace llvm +#endif // _AMDILDEVICEINFO_H_ diff --git a/lib/Target/AMDIL/AMDILDevices.h b/lib/Target/AMDIL/AMDILDevices.h new file mode 100644 index 00000000000..3fc5fa05669 --- /dev/null +++ b/lib/Target/AMDIL/AMDILDevices.h @@ -0,0 +1,19 @@ +//===-- AMDILDevices.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#ifndef __AMDIL_DEVICES_H_ +#define __AMDIL_DEVICES_H_ +// Include all of the device specific header files +// This file is for Internal use only! +#include "AMDIL7XXDevice.h" +#include "AMDILDevice.h" +#include "AMDILEvergreenDevice.h" +#include "AMDILNIDevice.h" +#include "AMDILSIDevice.h" + +#endif // _AMDIL_DEVICES_H_ diff --git a/lib/Target/AMDIL/AMDILEvergreenDevice.cpp b/lib/Target/AMDIL/AMDILEvergreenDevice.cpp new file mode 100644 index 00000000000..eb20beb8429 --- /dev/null +++ b/lib/Target/AMDIL/AMDILEvergreenDevice.cpp @@ -0,0 +1,183 @@ +//===-- AMDILEvergreenDevice.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#include "AMDILEvergreenDevice.h" + +using namespace llvm; + +AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST) +: AMDILDevice(ST) { + setCaps(); + std::string name = ST->getDeviceName(); + if (name == "cedar") { + mDeviceFlag = OCL_DEVICE_CEDAR; + } else if (name == "redwood") { + mDeviceFlag = OCL_DEVICE_REDWOOD; + } else if (name == "cypress") { + mDeviceFlag = OCL_DEVICE_CYPRESS; + } else { + mDeviceFlag = OCL_DEVICE_JUNIPER; + } +} + +AMDILEvergreenDevice::~AMDILEvergreenDevice() { +} + +size_t AMDILEvergreenDevice::getMaxLDSSize() const { + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_800; + } else { + return 0; + } +} +size_t AMDILEvergreenDevice::getMaxGDSSize() const { + if (usesHardware(AMDILDeviceInfo::RegionMem)) { + return MAX_LDS_SIZE_800; + } else { + return 0; + } +} +uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const { + return 12; +} + +uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const { + switch(id) { + default: + assert(0 && "ID type passed in is unknown!"); + break; + case CONSTANT_ID: + case RAW_UAV_ID: + if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) { + return GLOBAL_RETURN_RAW_UAV_ID; + } else { + return DEFAULT_RAW_UAV_ID; + } + case GLOBAL_ID: + case ARENA_UAV_ID: + return DEFAULT_ARENA_UAV_ID; + case LDS_ID: + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return DEFAULT_LDS_ID; + } else { + return DEFAULT_ARENA_UAV_ID; + } + case GDS_ID: + if (usesHardware(AMDILDeviceInfo::RegionMem)) { + return DEFAULT_GDS_ID; + } else { + return DEFAULT_ARENA_UAV_ID; + } + case SCRATCH_ID: + if (usesHardware(AMDILDeviceInfo::PrivateMem)) { + return DEFAULT_SCRATCH_ID; + } else { + return DEFAULT_ARENA_UAV_ID; + } + }; + return 0; +} + +size_t AMDILEvergreenDevice::getWavefrontSize() const { + return AMDILDevice::WavefrontSize; +} + +uint32_t AMDILEvergreenDevice::getGeneration() const { + return AMDILDeviceInfo::HD5XXX; +} + +void AMDILEvergreenDevice::setCaps() { + mSWBits.set(AMDILDeviceInfo::ArenaSegment); + mHWBits.set(AMDILDeviceInfo::ArenaUAV); + if (mSTM->calVersion() >= CAL_VERSION_SC_140) { + mHWBits.set(AMDILDeviceInfo::HW64BitDivMod); + mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod); + } + mSWBits.set(AMDILDeviceInfo::Signed24BitOps); + if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) { + mHWBits.set(AMDILDeviceInfo::ByteStores); + } + if (mSTM->isOverride(AMDILDeviceInfo::Debug)) { + mSWBits.set(AMDILDeviceInfo::LocalMem); + mSWBits.set(AMDILDeviceInfo::RegionMem); + } else { + mHWBits.set(AMDILDeviceInfo::LocalMem); + mHWBits.set(AMDILDeviceInfo::RegionMem); + } + mHWBits.set(AMDILDeviceInfo::Images); + if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) { + mHWBits.set(AMDILDeviceInfo::NoAlias); + } + if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) { + mHWBits.set(AMDILDeviceInfo::CachedMem); + } + if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) { + mHWBits.set(AMDILDeviceInfo::MultiUAV); + } + if (mSTM->calVersion() > CAL_VERSION_SC_136) { + mHWBits.set(AMDILDeviceInfo::ByteLDSOps); + mSWBits.reset(AMDILDeviceInfo::ByteLDSOps); + mHWBits.set(AMDILDeviceInfo::ArenaVectors); + } else { + mSWBits.set(AMDILDeviceInfo::ArenaVectors); + } + if (mSTM->calVersion() > CAL_VERSION_SC_137) { + mHWBits.set(AMDILDeviceInfo::LongOps); + mSWBits.reset(AMDILDeviceInfo::LongOps); + } + mHWBits.set(AMDILDeviceInfo::TmrReg); +} + +AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) { + setCaps(); +} + +AMDILCypressDevice::~AMDILCypressDevice() { +} + +void AMDILCypressDevice::setCaps() { + if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) { + mHWBits.set(AMDILDeviceInfo::DoubleOps); + mHWBits.set(AMDILDeviceInfo::FMA); + } +} + + +AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) { + setCaps(); +} + +AMDILCedarDevice::~AMDILCedarDevice() { +} + +void AMDILCedarDevice::setCaps() { + mSWBits.set(AMDILDeviceInfo::FMA); +} + +size_t AMDILCedarDevice::getWavefrontSize() const { + return AMDILDevice::QuarterWavefrontSize; +} + +AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) { + setCaps(); +} + +AMDILRedwoodDevice::~AMDILRedwoodDevice() +{ +} + +void AMDILRedwoodDevice::setCaps() { + mSWBits.set(AMDILDeviceInfo::FMA); +} + +size_t AMDILRedwoodDevice::getWavefrontSize() const { + return AMDILDevice::HalfWavefrontSize; +} diff --git a/lib/Target/AMDIL/AMDILEvergreenDevice.h b/lib/Target/AMDIL/AMDILEvergreenDevice.h new file mode 100644 index 00000000000..2639ab84024 --- /dev/null +++ b/lib/Target/AMDIL/AMDILEvergreenDevice.h @@ -0,0 +1,87 @@ +//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===----------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the +// specific hardware. +//===----------------------------------------------------------------------===// +#ifndef _AMDILEVERGREENDEVICE_H_ +#define _AMDILEVERGREENDEVICE_H_ +#include "AMDILDevice.h" +#include "AMDILSubtarget.h" + +namespace llvm { + class AMDILSubtarget; +//===----------------------------------------------------------------------===// +// Evergreen generation of devices and their respective sub classes +//===----------------------------------------------------------------------===// + + +// The AMDILEvergreenDevice is the base device class for all of the Evergreen +// series of cards. This class contains information required to differentiate +// the Evergreen device from the generic AMDILDevice. This device represents +// that capabilities of the 'Juniper' cards, also known as the HD57XX. +class AMDILEvergreenDevice : public AMDILDevice { +public: + AMDILEvergreenDevice(AMDILSubtarget *ST); + virtual ~AMDILEvergreenDevice(); + virtual size_t getMaxLDSSize() const; + virtual size_t getMaxGDSSize() const; + virtual size_t getWavefrontSize() const; + virtual uint32_t getGeneration() const; + virtual uint32_t getMaxNumUAVs() const; + virtual uint32_t getResourceID(uint32_t) const; +protected: + virtual void setCaps(); +}; // AMDILEvergreenDevice + +// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has +// support for double precision operations. This device is used to represent +// both the Cypress and Hemlock cards, which are commercially known as HD58XX +// and HD59XX cards. +class AMDILCypressDevice : public AMDILEvergreenDevice { +public: + AMDILCypressDevice(AMDILSubtarget *ST); + virtual ~AMDILCypressDevice(); +private: + virtual void setCaps(); +}; // AMDILCypressDevice + + +// The AMDILCedarDevice is the class that represents all of the 'Cedar' based +// devices. This class differs from the base AMDILEvergreenDevice in that the +// device is a ~quarter of the 'Juniper'. These are commercially known as the +// HD54XX and HD53XX series of cards. +class AMDILCedarDevice : public AMDILEvergreenDevice { +public: + AMDILCedarDevice(AMDILSubtarget *ST); + virtual ~AMDILCedarDevice(); + virtual size_t getWavefrontSize() const; +private: + virtual void setCaps(); +}; // AMDILCedarDevice + +// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based +// devices. This class differs from the base class, in that these devices are +// considered about half of a 'Juniper' device. These are commercially known as +// the HD55XX and HD56XX series of cards. +class AMDILRedwoodDevice : public AMDILEvergreenDevice { +public: + AMDILRedwoodDevice(AMDILSubtarget *ST); + virtual ~AMDILRedwoodDevice(); + virtual size_t getWavefrontSize() const; +private: + virtual void setCaps(); +}; // AMDILRedwoodDevice + +} // namespace llvm +#endif // _AMDILEVERGREENDEVICE_H_ diff --git a/lib/Target/AMDIL/AMDILFrameLowering.cpp b/lib/Target/AMDIL/AMDILFrameLowering.cpp new file mode 100644 index 00000000000..87eca87e301 --- /dev/null +++ b/lib/Target/AMDIL/AMDILFrameLowering.cpp @@ -0,0 +1,53 @@ +//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface to describe a layout of a stack frame on a AMDIL target machine +// +//===----------------------------------------------------------------------===// +#include "AMDILFrameLowering.h" +#include "llvm/CodeGen/MachineFrameInfo.h" + +using namespace llvm; +AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl, + int LAO, unsigned TransAl) + : TargetFrameLowering(D, StackAl, LAO, TransAl) +{ +} + +AMDILFrameLowering::~AMDILFrameLowering() +{ +} + +/// getFrameIndexOffset - Returns the displacement from the frame register to +/// the stack frame of the specified index. +int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getObjectOffset(FI); +} + +const TargetFrameLowering::SpillSlot * +AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const +{ + NumEntries = 0; + return 0; +} +void +AMDILFrameLowering::emitPrologue(MachineFunction &MF) const +{ +} +void +AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const +{ +} +bool +AMDILFrameLowering::hasFP(const MachineFunction &MF) const +{ + return false; +} diff --git a/lib/Target/AMDIL/AMDILFrameLowering.h b/lib/Target/AMDIL/AMDILFrameLowering.h new file mode 100644 index 00000000000..b1d919ef524 --- /dev/null +++ b/lib/Target/AMDIL/AMDILFrameLowering.h @@ -0,0 +1,46 @@ +//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface to describe a layout of a stack frame on a AMDIL target machine +// +//===----------------------------------------------------------------------===// +#ifndef _AMDILFRAME_LOWERING_H_ +#define _AMDILFRAME_LOWERING_H_ + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetFrameLowering.h" + +/// Information about the stack frame layout on the AMDIL targets. It holds +/// the direction of the stack growth, the known stack alignment on entry to +/// each function, and the offset to the locals area. +/// See TargetFrameInfo for more comments. + +namespace llvm { + class AMDILFrameLowering : public TargetFrameLowering { + public: + AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned + TransAl = 1); + virtual ~AMDILFrameLowering(); + virtual int getFrameIndexOffset(const MachineFunction &MF, + int FI) const; + virtual const SpillSlot * + getCalleeSavedSpillSlots(unsigned &NumEntries) const; + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + virtual bool hasFP(const MachineFunction &MF) const; + }; // class AMDILFrameLowering +} // namespace llvm +#endif // _AMDILFRAME_LOWERING_H_ diff --git a/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp b/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp new file mode 100644 index 00000000000..ff04d9d55bf --- /dev/null +++ b/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp @@ -0,0 +1,457 @@ +//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the AMDIL target. +// +//===----------------------------------------------------------------------===// +#include "AMDILDevices.h" +#include "AMDILTargetMachine.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Compiler.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions +// //for SelectionDAG operations. +// +namespace { +class AMDILDAGToDAGISel : public SelectionDAGISel { + // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can + // make the right decision when generating code for different targets. + const AMDILSubtarget &Subtarget; +public: + AMDILDAGToDAGISel(AMDILTargetMachine &TM AMDIL_OPT_LEVEL_DECL); + virtual ~AMDILDAGToDAGISel(); + inline SDValue getSmallIPtrImm(unsigned Imm); + + SDNode *Select(SDNode *N); + // Complex pattern selectors + bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); + bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); + bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); + static bool isGlobalStore(const StoreSDNode *N); + static bool isPrivateStore(const StoreSDNode *N); + static bool isLocalStore(const StoreSDNode *N); + static bool isRegionStore(const StoreSDNode *N); + + static bool isCPLoad(const LoadSDNode *N); + static bool isConstantLoad(const LoadSDNode *N, int cbID); + static bool isGlobalLoad(const LoadSDNode *N); + static bool isPrivateLoad(const LoadSDNode *N); + static bool isLocalLoad(const LoadSDNode *N); + static bool isRegionLoad(const LoadSDNode *N); + + virtual const char *getPassName() const; +private: + SDNode *xformAtomicInst(SDNode *N); + + // Include the pieces autogenerated from the target description. +#include "AMDILGenDAGISel.inc" +}; +} // end anonymous namespace + +// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific +// DAG, ready for instruction scheduling. +// +FunctionPass *llvm::createAMDILISelDag(AMDILTargetMachine &TM + AMDIL_OPT_LEVEL_DECL) { + return new AMDILDAGToDAGISel(TM AMDIL_OPT_LEVEL_VAR); +} + +AMDILDAGToDAGISel::AMDILDAGToDAGISel(AMDILTargetMachine &TM + AMDIL_OPT_LEVEL_DECL) + : SelectionDAGISel(TM AMDIL_OPT_LEVEL_VAR), Subtarget(TM.getSubtarget<AMDILSubtarget>()) +{ +} + +AMDILDAGToDAGISel::~AMDILDAGToDAGISel() { +} + +SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); +} + +bool AMDILDAGToDAGISel::SelectADDRParam( + SDValue Addr, SDValue& R1, SDValue& R2) { + + if (Addr.getOpcode() == ISD::FrameIndex) { + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + R2 = CurDAG->getTargetConstant(0, MVT::i32); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i32); + } + } else if (Addr.getOpcode() == ISD::ADD) { + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i32); + } + return true; +} + +bool AMDILDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) { + return false; + } + return SelectADDRParam(Addr, R1, R2); +} + + +bool AMDILDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) { + return false; + } + + if (Addr.getOpcode() == ISD::FrameIndex) { + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); + R2 = CurDAG->getTargetConstant(0, MVT::i64); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i64); + } + } else if (Addr.getOpcode() == ISD::ADD) { + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i64); + } + return true; +} + +SDNode *AMDILDAGToDAGISel::Select(SDNode *N) { + unsigned int Opc = N->getOpcode(); + if (N->isMachineOpcode()) { + return NULL; // Already selected. + } + switch (Opc) { + default: break; + case ISD::FrameIndex: + { + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) { + unsigned int FI = FIN->getIndex(); + EVT OpVT = N->getValueType(0); + unsigned int NewOpc = AMDIL::MOVE_i32; + SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); + return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI); + } + } + break; + } + // For all atomic instructions, we need to add a constant + // operand that stores the resource ID in the instruction + if (Opc > AMDILISD::ADDADDR && Opc < AMDILISD::APPEND_ALLOC) { + N = xformAtomicInst(N); + } + return SelectCode(N); +} + +bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { + return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); +} + +bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { + return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) + && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) + && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)); +} + +bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) { + return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); +} + +bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) { + return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS); +} + +bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { + if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) { + return true; + } + MachineMemOperand *MMO = N->getMemOperand(); + const Value *V = MMO->getValue(); + const Value *BV = getBasePointerValue(V); + if (MMO + && MMO->getValue() + && ((V && dyn_cast<GlobalValue>(V)) + || (BV && dyn_cast<GlobalValue>( + getBasePointerValue(MMO->getValue()))))) { + return check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS); + } else { + return false; + } +} + +bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) { + return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); +} + +bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) { + return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); +} + +bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) { + return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS); +} + +bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) { + MachineMemOperand *MMO = N->getMemOperand(); + if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { + if (MMO) { + const Value *V = MMO->getValue(); + const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V); + if (PSV && PSV == PseudoSourceValue::getConstantPool()) { + return true; + } + } + } + return false; +} + +bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) { + if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { + // Check to make sure we are not a constant pool load or a constant load + // that is marked as a private load + if (isCPLoad(N) || isConstantLoad(N, -1)) { + return false; + } + } + if (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) + && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) + && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS) + && !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS) + && !check_type(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS) + && !check_type(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS)) + { + return true; + } + return false; +} + +const char *AMDILDAGToDAGISel::getPassName() const { + return "AMDIL DAG->DAG Pattern Instruction Selection"; +} + +SDNode* +AMDILDAGToDAGISel::xformAtomicInst(SDNode *N) +{ + uint32_t addVal = 1; + bool addOne = false; + // bool bitCastToInt = (N->getValueType(0) == MVT::f32); + unsigned opc = N->getOpcode(); + switch (opc) { + default: return N; + case AMDILISD::ATOM_G_ADD: + case AMDILISD::ATOM_G_AND: + case AMDILISD::ATOM_G_MAX: + case AMDILISD::ATOM_G_UMAX: + case AMDILISD::ATOM_G_MIN: + case AMDILISD::ATOM_G_UMIN: + case AMDILISD::ATOM_G_OR: + case AMDILISD::ATOM_G_SUB: + case AMDILISD::ATOM_G_RSUB: + case AMDILISD::ATOM_G_XCHG: + case AMDILISD::ATOM_G_XOR: + case AMDILISD::ATOM_G_ADD_NORET: + case AMDILISD::ATOM_G_AND_NORET: + case AMDILISD::ATOM_G_MAX_NORET: + case AMDILISD::ATOM_G_UMAX_NORET: + case AMDILISD::ATOM_G_MIN_NORET: + case AMDILISD::ATOM_G_UMIN_NORET: + case AMDILISD::ATOM_G_OR_NORET: + case AMDILISD::ATOM_G_SUB_NORET: + case AMDILISD::ATOM_G_RSUB_NORET: + case AMDILISD::ATOM_G_XCHG_NORET: + case AMDILISD::ATOM_G_XOR_NORET: + case AMDILISD::ATOM_L_ADD: + case AMDILISD::ATOM_L_AND: + case AMDILISD::ATOM_L_MAX: + case AMDILISD::ATOM_L_UMAX: + case AMDILISD::ATOM_L_MIN: + case AMDILISD::ATOM_L_UMIN: + case AMDILISD::ATOM_L_OR: + case AMDILISD::ATOM_L_SUB: + case AMDILISD::ATOM_L_RSUB: + case AMDILISD::ATOM_L_XCHG: + case AMDILISD::ATOM_L_XOR: + case AMDILISD::ATOM_L_ADD_NORET: + case AMDILISD::ATOM_L_AND_NORET: + case AMDILISD::ATOM_L_MAX_NORET: + case AMDILISD::ATOM_L_UMAX_NORET: + case AMDILISD::ATOM_L_MIN_NORET: + case AMDILISD::ATOM_L_UMIN_NORET: + case AMDILISD::ATOM_L_OR_NORET: + case AMDILISD::ATOM_L_SUB_NORET: + case AMDILISD::ATOM_L_RSUB_NORET: + case AMDILISD::ATOM_L_XCHG_NORET: + case AMDILISD::ATOM_L_XOR_NORET: + case AMDILISD::ATOM_R_ADD: + case AMDILISD::ATOM_R_AND: + case AMDILISD::ATOM_R_MAX: + case AMDILISD::ATOM_R_UMAX: + case AMDILISD::ATOM_R_MIN: + case AMDILISD::ATOM_R_UMIN: + case AMDILISD::ATOM_R_OR: + case AMDILISD::ATOM_R_SUB: + case AMDILISD::ATOM_R_RSUB: + case AMDILISD::ATOM_R_XCHG: + case AMDILISD::ATOM_R_XOR: + case AMDILISD::ATOM_R_ADD_NORET: + case AMDILISD::ATOM_R_AND_NORET: + case AMDILISD::ATOM_R_MAX_NORET: + case AMDILISD::ATOM_R_UMAX_NORET: + case AMDILISD::ATOM_R_MIN_NORET: + case AMDILISD::ATOM_R_UMIN_NORET: + case AMDILISD::ATOM_R_OR_NORET: + case AMDILISD::ATOM_R_SUB_NORET: + case AMDILISD::ATOM_R_RSUB_NORET: + case AMDILISD::ATOM_R_XCHG_NORET: + case AMDILISD::ATOM_R_XOR_NORET: + case AMDILISD::ATOM_G_CMPXCHG: + case AMDILISD::ATOM_G_CMPXCHG_NORET: + case AMDILISD::ATOM_L_CMPXCHG: + case AMDILISD::ATOM_L_CMPXCHG_NORET: + case AMDILISD::ATOM_R_CMPXCHG: + case AMDILISD::ATOM_R_CMPXCHG_NORET: + break; + case AMDILISD::ATOM_G_DEC: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_G_SUB; + } + break; + case AMDILISD::ATOM_G_INC: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_G_ADD; + } + break; + case AMDILISD::ATOM_G_DEC_NORET: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_G_SUB_NORET; + } + break; + case AMDILISD::ATOM_G_INC_NORET: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_G_ADD_NORET; + } + break; + case AMDILISD::ATOM_L_DEC: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_L_SUB; + } + break; + case AMDILISD::ATOM_L_INC: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_L_ADD; + } + break; + case AMDILISD::ATOM_L_DEC_NORET: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_L_SUB_NORET; + } + break; + case AMDILISD::ATOM_L_INC_NORET: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_L_ADD_NORET; + } + break; + case AMDILISD::ATOM_R_DEC: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_R_SUB; + } + break; + case AMDILISD::ATOM_R_INC: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_R_ADD; + } + break; + case AMDILISD::ATOM_R_DEC_NORET: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_R_SUB; + } + break; + case AMDILISD::ATOM_R_INC_NORET: + addOne = true; + if (Subtarget.calVersion() >= CAL_VERSION_SC_136) { + addVal = (uint32_t)-1; + } else { + opc = AMDILISD::ATOM_R_ADD_NORET; + } + break; + } + // The largest we can have is a cmpxchg w/ a return value and an output chain. + // The cmpxchg function has 3 inputs and a single output along with an + // output change and a target constant, giving a total of 6. + SDValue Ops[12]; + unsigned x = 0; + unsigned y = N->getNumOperands(); + for (x = 0; x < y; ++x) { + Ops[x] = N->getOperand(x); + } + if (addOne) { + Ops[x++] = SDValue(SelectCode(CurDAG->getConstant(addVal, MVT::i32).getNode()), 0); + } + Ops[x++] = CurDAG->getTargetConstant(0, MVT::i32); + SDVTList Tys = N->getVTList(); + MemSDNode *MemNode = dyn_cast<MemSDNode>(N); + assert(MemNode && "Atomic should be of MemSDNode type!"); + N = CurDAG->getMemIntrinsicNode(opc, N->getDebugLoc(), Tys, Ops, x, + MemNode->getMemoryVT(), MemNode->getMemOperand()).getNode(); + return N; +} + +#ifdef DEBUGTMP +#undef INT64_C +#endif +#undef DEBUGTMP diff --git a/lib/Target/AMDIL/AMDILISelLowering.cpp b/lib/Target/AMDIL/AMDILISelLowering.cpp new file mode 100644 index 00000000000..6179d112678 --- /dev/null +++ b/lib/Target/AMDIL/AMDILISelLowering.cpp @@ -0,0 +1,5576 @@ +//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file implements the interfaces that AMDIL uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "AMDILISelLowering.h" +#include "AMDILDevices.h" +#include "AMDILIntrinsicInfo.h" +#include "AMDILSubtarget.h" +#include "AMDILTargetMachine.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; +#define ISDBITCAST ISD::BITCAST +#define MVTGLUE MVT::Glue +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// +#include "AMDILGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation Help Functions Begin +//===----------------------------------------------------------------------===// + static SDValue +getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) +{ + DebugLoc DL = Src.getDebugLoc(); + EVT svt = Src.getValueType().getScalarType(); + EVT dvt = Dst.getValueType().getScalarType(); + if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { + if (dvt.bitsGT(svt)) { + Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); + } else if (svt.bitsLT(svt)) { + Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, + DAG.getConstant(1, MVT::i32)); + } + } else if (svt.isInteger() && dvt.isInteger()) { + if (!svt.bitsEq(dvt)) { + Src = DAG.getSExtOrTrunc(Src, DL, dvt); + } else { + Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src); + } + } else if (svt.isInteger()) { + unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; + if (!svt.bitsEq(dvt)) { + if (dvt.getSimpleVT().SimpleTy == MVT::f32) { + Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); + } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { + Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); + } else { + assert(0 && "We only support 32 and 64bit fp types"); + } + } + Src = DAG.getNode(opcode, DL, dvt, Src); + } else if (dvt.isInteger()) { + unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; + if (svt.getSimpleVT().SimpleTy == MVT::f32) { + Src = DAG.getNode(opcode, DL, MVT::i32, Src); + } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { + Src = DAG.getNode(opcode, DL, MVT::i64, Src); + } else { + assert(0 && "We only support 32 and 64bit fp types"); + } + Src = DAG.getSExtOrTrunc(Src, DL, dvt); + } + return Src; +} +// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC +// condition. + static AMDILCC::CondCodes +CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) +{ + switch (CC) { + default: + { + errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; + assert(0 && "Unknown condition code!"); + } + case ISD::SETO: + switch(type) { + case MVT::f32: + return AMDILCC::IL_CC_F_O; + case MVT::f64: + return AMDILCC::IL_CC_D_O; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUO: + switch(type) { + case MVT::f32: + return AMDILCC::IL_CC_F_UO; + case MVT::f64: + return AMDILCC::IL_CC_D_UO; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETGT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_GT; + case MVT::f32: + return AMDILCC::IL_CC_F_GT; + case MVT::f64: + return AMDILCC::IL_CC_D_GT; + case MVT::i64: + return AMDILCC::IL_CC_L_GT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETGE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_GE; + case MVT::f32: + return AMDILCC::IL_CC_F_GE; + case MVT::f64: + return AMDILCC::IL_CC_D_GE; + case MVT::i64: + return AMDILCC::IL_CC_L_GE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETLT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_LT; + case MVT::f32: + return AMDILCC::IL_CC_F_LT; + case MVT::f64: + return AMDILCC::IL_CC_D_LT; + case MVT::i64: + return AMDILCC::IL_CC_L_LT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETLE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_LE; + case MVT::f32: + return AMDILCC::IL_CC_F_LE; + case MVT::f64: + return AMDILCC::IL_CC_D_LE; + case MVT::i64: + return AMDILCC::IL_CC_L_LE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETNE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_NE; + case MVT::f32: + return AMDILCC::IL_CC_F_NE; + case MVT::f64: + return AMDILCC::IL_CC_D_NE; + case MVT::i64: + return AMDILCC::IL_CC_L_NE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETEQ: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_EQ; + case MVT::f32: + return AMDILCC::IL_CC_F_EQ; + case MVT::f64: + return AMDILCC::IL_CC_D_EQ; + case MVT::i64: + return AMDILCC::IL_CC_L_EQ; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUGT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_GT; + case MVT::f32: + return AMDILCC::IL_CC_F_UGT; + case MVT::f64: + return AMDILCC::IL_CC_D_UGT; + case MVT::i64: + return AMDILCC::IL_CC_UL_GT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUGE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_GE; + case MVT::f32: + return AMDILCC::IL_CC_F_UGE; + case MVT::f64: + return AMDILCC::IL_CC_D_UGE; + case MVT::i64: + return AMDILCC::IL_CC_UL_GE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETULT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_LT; + case MVT::f32: + return AMDILCC::IL_CC_F_ULT; + case MVT::f64: + return AMDILCC::IL_CC_D_ULT; + case MVT::i64: + return AMDILCC::IL_CC_UL_LT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETULE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_LE; + case MVT::f32: + return AMDILCC::IL_CC_F_ULE; + case MVT::f64: + return AMDILCC::IL_CC_D_ULE; + case MVT::i64: + return AMDILCC::IL_CC_UL_LE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUNE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_NE; + case MVT::f32: + return AMDILCC::IL_CC_F_UNE; + case MVT::f64: + return AMDILCC::IL_CC_D_UNE; + case MVT::i64: + return AMDILCC::IL_CC_UL_NE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUEQ: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_EQ; + case MVT::f32: + return AMDILCC::IL_CC_F_UEQ; + case MVT::f64: + return AMDILCC::IL_CC_D_UEQ; + case MVT::i64: + return AMDILCC::IL_CC_UL_EQ; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOGT: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OGT; + case MVT::f64: + return AMDILCC::IL_CC_D_OGT; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOGE: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OGE; + case MVT::f64: + return AMDILCC::IL_CC_D_OGE; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOLT: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OLT; + case MVT::f64: + return AMDILCC::IL_CC_D_OLT; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOLE: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OLE; + case MVT::f64: + return AMDILCC::IL_CC_D_OLE; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETONE: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_ONE; + case MVT::f64: + return AMDILCC::IL_CC_D_ONE; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOEQ: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OEQ; + case MVT::f64: + return AMDILCC::IL_CC_D_OEQ; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + }; +} + + static unsigned int +translateToOpcode(uint64_t CCCode, unsigned int regClass) +{ + switch (CCCode) { + case AMDILCC::IL_CC_D_EQ: + case AMDILCC::IL_CC_D_OEQ: + if (regClass == AMDIL::GPRV2F64RegClassID) { + return (unsigned int)AMDIL::DEQ_v2f64; + } else { + return (unsigned int)AMDIL::DEQ; + } + case AMDILCC::IL_CC_D_LE: + case AMDILCC::IL_CC_D_OLE: + case AMDILCC::IL_CC_D_ULE: + case AMDILCC::IL_CC_D_GE: + case AMDILCC::IL_CC_D_OGE: + case AMDILCC::IL_CC_D_UGE: + return (unsigned int)AMDIL::DGE; + case AMDILCC::IL_CC_D_LT: + case AMDILCC::IL_CC_D_OLT: + case AMDILCC::IL_CC_D_ULT: + case AMDILCC::IL_CC_D_GT: + case AMDILCC::IL_CC_D_OGT: + case AMDILCC::IL_CC_D_UGT: + return (unsigned int)AMDIL::DLT; + case AMDILCC::IL_CC_D_NE: + case AMDILCC::IL_CC_D_UNE: + return (unsigned int)AMDIL::DNE; + case AMDILCC::IL_CC_F_EQ: + case AMDILCC::IL_CC_F_OEQ: + return (unsigned int)AMDIL::FEQ; + case AMDILCC::IL_CC_F_LE: + case AMDILCC::IL_CC_F_ULE: + case AMDILCC::IL_CC_F_OLE: + case AMDILCC::IL_CC_F_GE: + case AMDILCC::IL_CC_F_UGE: + case AMDILCC::IL_CC_F_OGE: + return (unsigned int)AMDIL::FGE; + case AMDILCC::IL_CC_F_LT: + case AMDILCC::IL_CC_F_OLT: + case AMDILCC::IL_CC_F_ULT: + case AMDILCC::IL_CC_F_GT: + case AMDILCC::IL_CC_F_OGT: + case AMDILCC::IL_CC_F_UGT: + if (regClass == AMDIL::GPRV2F32RegClassID) { + return (unsigned int)AMDIL::FLT_v2f32; + } else if (regClass == AMDIL::GPRV4F32RegClassID) { + return (unsigned int)AMDIL::FLT_v4f32; + } else { + return (unsigned int)AMDIL::FLT; + } + case AMDILCC::IL_CC_F_NE: + case AMDILCC::IL_CC_F_UNE: + return (unsigned int)AMDIL::FNE; + case AMDILCC::IL_CC_I_EQ: + case AMDILCC::IL_CC_U_EQ: + if (regClass == AMDIL::GPRI32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::IEQ; + } else if (regClass == AMDIL::GPRV2I32RegClassID + || regClass == AMDIL::GPRV2I8RegClassID + || regClass == AMDIL::GPRV2I16RegClassID) { + return (unsigned int)AMDIL::IEQ_v2i32; + } else if (regClass == AMDIL::GPRV4I32RegClassID + || regClass == AMDIL::GPRV4I8RegClassID + || regClass == AMDIL::GPRV4I16RegClassID) { + return (unsigned int)AMDIL::IEQ_v4i32; + } else { + assert(!"Unknown reg class!"); + } + case AMDILCC::IL_CC_L_EQ: + case AMDILCC::IL_CC_UL_EQ: + return (unsigned int)AMDIL::LEQ; + case AMDILCC::IL_CC_I_GE: + case AMDILCC::IL_CC_I_LE: + if (regClass == AMDIL::GPRI32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::IGE; + } else if (regClass == AMDIL::GPRV2I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::IGE_v2i32; + } else if (regClass == AMDIL::GPRV4I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::IGE_v4i32; + } else { + assert(!"Unknown reg class!"); + } + case AMDILCC::IL_CC_I_LT: + case AMDILCC::IL_CC_I_GT: + if (regClass == AMDIL::GPRI32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::ILT; + } else if (regClass == AMDIL::GPRV2I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::ILT_v2i32; + } else if (regClass == AMDIL::GPRV4I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::ILT_v4i32; + } else { + assert(!"Unknown reg class!"); + } + case AMDILCC::IL_CC_L_GE: + return (unsigned int)AMDIL::LGE; + case AMDILCC::IL_CC_L_LE: + return (unsigned int)AMDIL::LLE; + case AMDILCC::IL_CC_L_LT: + return (unsigned int)AMDIL::LLT; + case AMDILCC::IL_CC_L_GT: + return (unsigned int)AMDIL::LGT; + case AMDILCC::IL_CC_I_NE: + case AMDILCC::IL_CC_U_NE: + if (regClass == AMDIL::GPRI32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::INE; + } else if (regClass == AMDIL::GPRV2I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::INE_v2i32; + } else if (regClass == AMDIL::GPRV4I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::INE_v4i32; + } else { + assert(!"Unknown reg class!"); + } + case AMDILCC::IL_CC_U_GE: + case AMDILCC::IL_CC_U_LE: + if (regClass == AMDIL::GPRI32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::UGE; + } else if (regClass == AMDIL::GPRV2I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::UGE_v2i32; + } else if (regClass == AMDIL::GPRV4I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::UGE_v4i32; + } else { + assert(!"Unknown reg class!"); + } + case AMDILCC::IL_CC_L_NE: + case AMDILCC::IL_CC_UL_NE: + return (unsigned int)AMDIL::LNE; + case AMDILCC::IL_CC_UL_GE: + return (unsigned int)AMDIL::ULGE; + case AMDILCC::IL_CC_UL_LE: + return (unsigned int)AMDIL::ULLE; + case AMDILCC::IL_CC_U_LT: + if (regClass == AMDIL::GPRI32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::ULT; + } else if (regClass == AMDIL::GPRV2I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::ULT_v2i32; + } else if (regClass == AMDIL::GPRV4I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::ULT_v4i32; + } else { + assert(!"Unknown reg class!"); + } + case AMDILCC::IL_CC_U_GT: + if (regClass == AMDIL::GPRI32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::UGT; + } else if (regClass == AMDIL::GPRV2I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::UGT_v2i32; + } else if (regClass == AMDIL::GPRV4I32RegClassID + || regClass == AMDIL::GPRI8RegClassID + || regClass == AMDIL::GPRI16RegClassID) { + return (unsigned int)AMDIL::UGT_v4i32; + } else { + assert(!"Unknown reg class!"); + } + case AMDILCC::IL_CC_UL_LT: + return (unsigned int)AMDIL::ULLT; + case AMDILCC::IL_CC_UL_GT: + return (unsigned int)AMDIL::ULGT; + case AMDILCC::IL_CC_F_UEQ: + case AMDILCC::IL_CC_D_UEQ: + case AMDILCC::IL_CC_F_ONE: + case AMDILCC::IL_CC_D_ONE: + case AMDILCC::IL_CC_F_O: + case AMDILCC::IL_CC_F_UO: + case AMDILCC::IL_CC_D_O: + case AMDILCC::IL_CC_D_UO: + // we don't care + return 0; + + } + errs()<<"Opcode: "<<CCCode<<"\n"; + assert(0 && "Unknown opcode retrieved"); + return 0; +} +SDValue +AMDILTargetLowering::LowerMemArgument( + SDValue Chain, + CallingConv::ID CallConv, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + MachineFrameInfo *MFI, + unsigned i) const +{ + // Create the nodes corresponding to a load from this parameter slot. + ISD::ArgFlagsTy Flags = Ins[i].Flags; + + bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && + getTargetMachine().Options.GuaranteedTailCallOpt; + bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); + + // FIXME: For now, all byval parameter objects are marked mutable. This can + // be changed with more analysis. + // In case of tail call optimization mark all arguments mutable. Since they + // could be overwritten by lowering of arguments in case of a tail call. + int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + VA.getLocMemOffset(), isImmutable); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + + if (Flags.isByVal()) + return FIN; + return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); +} +//===----------------------------------------------------------------------===// +// TargetLowering Implementation Help Functions End +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Instruction generation functions +//===----------------------------------------------------------------------===// +uint32_t +AMDILTargetLowering::addExtensionInstructions( + uint32_t reg, bool signedShift, + unsigned int simpleVT) const +{ + int shiftSize = 0; + uint32_t LShift, RShift; + switch(simpleVT) + { + default: + return reg; + case AMDIL::GPRI8RegClassID: + shiftSize = 24; + LShift = AMDIL::SHL_i8; + if (signedShift) { + RShift = AMDIL::SHR_i8; + } else { + RShift = AMDIL::USHR_i8; + } + break; + case AMDIL::GPRV2I8RegClassID: + shiftSize = 24; + LShift = AMDIL::SHL_v2i8; + if (signedShift) { + RShift = AMDIL::SHR_v2i8; + } else { + RShift = AMDIL::USHR_v2i8; + } + break; + case AMDIL::GPRV4I8RegClassID: + shiftSize = 24; + LShift = AMDIL::SHL_v4i8; + if (signedShift) { + RShift = AMDIL::SHR_v4i8; + } else { + RShift = AMDIL::USHR_v4i8; + } + break; + case AMDIL::GPRI16RegClassID: + shiftSize = 16; + LShift = AMDIL::SHL_i16; + if (signedShift) { + RShift = AMDIL::SHR_i16; + } else { + RShift = AMDIL::USHR_i16; + } + break; + case AMDIL::GPRV2I16RegClassID: + shiftSize = 16; + LShift = AMDIL::SHL_v2i16; + if (signedShift) { + RShift = AMDIL::SHR_v2i16; + } else { + RShift = AMDIL::USHR_v2i16; + } + break; + case AMDIL::GPRV4I16RegClassID: + shiftSize = 16; + LShift = AMDIL::SHL_v4i16; + if (signedShift) { + RShift = AMDIL::SHR_v4i16; + } else { + RShift = AMDIL::USHR_v4i16; + } + break; + }; + uint32_t LoadReg = genVReg(simpleVT); + uint32_t tmp1 = genVReg(simpleVT); + uint32_t tmp2 = genVReg(simpleVT); + generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize); + generateMachineInst(LShift, tmp1, reg, LoadReg); + generateMachineInst(RShift, tmp2, tmp1, LoadReg); + return tmp2; +} + +MachineOperand +AMDILTargetLowering::convertToReg(MachineOperand op) const +{ + if (op.isReg()) { + return op; + } else if (op.isImm()) { + uint32_t loadReg + = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); + generateMachineInst(AMDIL::LOADCONST_i32, loadReg) + .addImm(op.getImm()); + op.ChangeToRegister(loadReg, false); + } else if (op.isFPImm()) { + uint32_t loadReg + = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); + generateMachineInst(AMDIL::LOADCONST_f32, loadReg) + .addFPImm(op.getFPImm()); + op.ChangeToRegister(loadReg, false); + } else if (op.isMBB()) { + op.ChangeToRegister(0, false); + } else if (op.isFI()) { + op.ChangeToRegister(0, false); + } else if (op.isCPI()) { + op.ChangeToRegister(0, false); + } else if (op.isJTI()) { + op.ChangeToRegister(0, false); + } else if (op.isGlobal()) { + op.ChangeToRegister(0, false); + } else if (op.isSymbol()) { + op.ChangeToRegister(0, false); + }/* else if (op.isMetadata()) { + op.ChangeToRegister(0, false); + }*/ + return op; +} + +void +AMDILTargetLowering::generateCMPInstr( + MachineInstr *MI, + MachineBasicBlock *BB, + const TargetInstrInfo& TII) +const +{ + MachineOperand DST = MI->getOperand(0); + MachineOperand CC = MI->getOperand(1); + MachineOperand LHS = MI->getOperand(2); + MachineOperand RHS = MI->getOperand(3); + int64_t ccCode = CC.getImm(); + unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass; + unsigned int opCode = translateToOpcode(ccCode, simpleVT); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator BBI = MI; + setPrivateData(BB, BBI, &DL, &TII); + if (!LHS.isReg()) { + LHS = convertToReg(LHS); + } + if (!RHS.isReg()) { + RHS = convertToReg(RHS); + } + switch (ccCode) { + case AMDILCC::IL_CC_I_EQ: + case AMDILCC::IL_CC_I_NE: + case AMDILCC::IL_CC_I_GE: + case AMDILCC::IL_CC_I_LT: + { + uint32_t lhsreg = addExtensionInstructions( + LHS.getReg(), true, simpleVT); + uint32_t rhsreg = addExtensionInstructions( + RHS.getReg(), true, simpleVT); + generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg); + } + break; + case AMDILCC::IL_CC_U_EQ: + case AMDILCC::IL_CC_U_NE: + case AMDILCC::IL_CC_U_GE: + case AMDILCC::IL_CC_U_LT: + case AMDILCC::IL_CC_D_EQ: + case AMDILCC::IL_CC_F_EQ: + case AMDILCC::IL_CC_F_OEQ: + case AMDILCC::IL_CC_D_OEQ: + case AMDILCC::IL_CC_D_NE: + case AMDILCC::IL_CC_F_NE: + case AMDILCC::IL_CC_F_UNE: + case AMDILCC::IL_CC_D_UNE: + case AMDILCC::IL_CC_D_GE: + case AMDILCC::IL_CC_F_GE: + case AMDILCC::IL_CC_D_OGE: + case AMDILCC::IL_CC_F_OGE: + case AMDILCC::IL_CC_D_LT: + case AMDILCC::IL_CC_F_LT: + case AMDILCC::IL_CC_F_OLT: + case AMDILCC::IL_CC_D_OLT: + generateMachineInst(opCode, DST.getReg(), + LHS.getReg(), RHS.getReg()); + break; + case AMDILCC::IL_CC_I_GT: + case AMDILCC::IL_CC_I_LE: + { + uint32_t lhsreg = addExtensionInstructions( + LHS.getReg(), true, simpleVT); + uint32_t rhsreg = addExtensionInstructions( + RHS.getReg(), true, simpleVT); + generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg); + } + break; + case AMDILCC::IL_CC_U_GT: + case AMDILCC::IL_CC_U_LE: + case AMDILCC::IL_CC_F_GT: + case AMDILCC::IL_CC_D_GT: + case AMDILCC::IL_CC_F_OGT: + case AMDILCC::IL_CC_D_OGT: + case AMDILCC::IL_CC_F_LE: + case AMDILCC::IL_CC_D_LE: + case AMDILCC::IL_CC_D_OLE: + case AMDILCC::IL_CC_F_OLE: + generateMachineInst(opCode, DST.getReg(), + RHS.getReg(), LHS.getReg()); + break; + case AMDILCC::IL_CC_F_UGT: + case AMDILCC::IL_CC_F_ULE: + { + uint32_t VReg[4] = { + genVReg(simpleVT), genVReg(simpleVT), + genVReg(simpleVT), genVReg(simpleVT) + }; + generateMachineInst(opCode, VReg[0], + RHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::FNE, VReg[1], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::FNE, VReg[2], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::BINARY_OR_f32, + VReg[3], VReg[0], VReg[1]); + generateMachineInst(AMDIL::BINARY_OR_f32, + DST.getReg(), VReg[2], VReg[3]); + } + break; + case AMDILCC::IL_CC_F_ULT: + case AMDILCC::IL_CC_F_UGE: + { + uint32_t VReg[4] = { + genVReg(simpleVT), genVReg(simpleVT), + genVReg(simpleVT), genVReg(simpleVT) + }; + generateMachineInst(opCode, VReg[0], + LHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::FNE, VReg[1], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::FNE, VReg[2], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::BINARY_OR_f32, + VReg[3], VReg[0], VReg[1]); + generateMachineInst(AMDIL::BINARY_OR_f32, + DST.getReg(), VReg[2], VReg[3]); + } + break; + case AMDILCC::IL_CC_D_UGT: + case AMDILCC::IL_CC_D_ULE: + { + uint32_t regID = AMDIL::GPRF64RegClassID; + uint32_t VReg[4] = { + genVReg(regID), genVReg(regID), + genVReg(regID), genVReg(regID) + }; + // The result of a double comparison is a 32bit result + generateMachineInst(opCode, VReg[0], + RHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::DNE, VReg[1], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::DNE, VReg[2], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::BINARY_OR_f32, + VReg[3], VReg[0], VReg[1]); + generateMachineInst(AMDIL::BINARY_OR_f32, + DST.getReg(), VReg[2], VReg[3]); + } + break; + case AMDILCC::IL_CC_D_UGE: + case AMDILCC::IL_CC_D_ULT: + { + uint32_t regID = AMDIL::GPRF64RegClassID; + uint32_t VReg[4] = { + genVReg(regID), genVReg(regID), + genVReg(regID), genVReg(regID) + }; + // The result of a double comparison is a 32bit result + generateMachineInst(opCode, VReg[0], + LHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::DNE, VReg[1], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::DNE, VReg[2], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::BINARY_OR_f32, + VReg[3], VReg[0], VReg[1]); + generateMachineInst(AMDIL::BINARY_OR_f32, + DST.getReg(), VReg[2], VReg[3]); + } + break; + case AMDILCC::IL_CC_F_UEQ: + { + uint32_t VReg[4] = { + genVReg(simpleVT), genVReg(simpleVT), + genVReg(simpleVT), genVReg(simpleVT) + }; + generateMachineInst(AMDIL::FEQ, VReg[0], + LHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::FNE, VReg[1], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::FNE, VReg[2], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::BINARY_OR_f32, + VReg[3], VReg[0], VReg[1]); + generateMachineInst(AMDIL::BINARY_OR_f32, + DST.getReg(), VReg[2], VReg[3]); + } + break; + case AMDILCC::IL_CC_F_ONE: + { + uint32_t VReg[4] = { + genVReg(simpleVT), genVReg(simpleVT), + genVReg(simpleVT), genVReg(simpleVT) + }; + generateMachineInst(AMDIL::FNE, VReg[0], + LHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::FEQ, VReg[1], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::FEQ, VReg[2], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::BINARY_AND_f32, + VReg[3], VReg[0], VReg[1]); + generateMachineInst(AMDIL::BINARY_AND_f32, + DST.getReg(), VReg[2], VReg[3]); + } + break; + case AMDILCC::IL_CC_D_UEQ: + { + uint32_t regID = AMDIL::GPRF64RegClassID; + uint32_t VReg[4] = { + genVReg(regID), genVReg(regID), + genVReg(regID), genVReg(regID) + }; + // The result of a double comparison is a 32bit result + generateMachineInst(AMDIL::DEQ, VReg[0], + LHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::DNE, VReg[1], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::DNE, VReg[2], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::BINARY_OR_f32, + VReg[3], VReg[0], VReg[1]); + generateMachineInst(AMDIL::BINARY_OR_f32, + DST.getReg(), VReg[2], VReg[3]); + + } + break; + case AMDILCC::IL_CC_D_ONE: + { + uint32_t regID = AMDIL::GPRF64RegClassID; + uint32_t VReg[4] = { + genVReg(regID), genVReg(regID), + genVReg(regID), genVReg(regID) + }; + // The result of a double comparison is a 32bit result + generateMachineInst(AMDIL::DNE, VReg[0], + LHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::DEQ, VReg[1], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::DEQ, VReg[2], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::BINARY_AND_f32, + VReg[3], VReg[0], VReg[1]); + generateMachineInst(AMDIL::BINARY_AND_f32, + DST.getReg(), VReg[2], VReg[3]); + + } + break; + case AMDILCC::IL_CC_F_O: + { + uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) }; + generateMachineInst(AMDIL::FEQ, VReg[0], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::FEQ, VReg[1], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::BINARY_AND_f32, + DST.getReg(), VReg[0], VReg[1]); + } + break; + case AMDILCC::IL_CC_D_O: + { + uint32_t regID = AMDIL::GPRF64RegClassID; + uint32_t VReg[2] = { genVReg(regID), genVReg(regID) }; + // The result of a double comparison is a 32bit result + generateMachineInst(AMDIL::DEQ, VReg[0], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::DEQ, VReg[1], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::BINARY_AND_f32, + DST.getReg(), VReg[0], VReg[1]); + } + break; + case AMDILCC::IL_CC_F_UO: + { + uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) }; + generateMachineInst(AMDIL::FNE, VReg[0], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::FNE, VReg[1], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::BINARY_OR_f32, + DST.getReg(), VReg[0], VReg[1]); + } + break; + case AMDILCC::IL_CC_D_UO: + { + uint32_t regID = AMDIL::GPRF64RegClassID; + uint32_t VReg[2] = { genVReg(regID), genVReg(regID) }; + // The result of a double comparison is a 32bit result + generateMachineInst(AMDIL::DNE, VReg[0], + RHS.getReg(), RHS.getReg()); + generateMachineInst(AMDIL::DNE, VReg[1], + LHS.getReg(), LHS.getReg()); + generateMachineInst(AMDIL::BINARY_OR_f32, + DST.getReg(), VReg[0], VReg[1]); + } + break; + case AMDILCC::IL_CC_L_LE: + case AMDILCC::IL_CC_L_GE: + case AMDILCC::IL_CC_L_EQ: + case AMDILCC::IL_CC_L_NE: + case AMDILCC::IL_CC_L_LT: + case AMDILCC::IL_CC_L_GT: + case AMDILCC::IL_CC_UL_LE: + case AMDILCC::IL_CC_UL_GE: + case AMDILCC::IL_CC_UL_EQ: + case AMDILCC::IL_CC_UL_NE: + case AMDILCC::IL_CC_UL_LT: + case AMDILCC::IL_CC_UL_GT: + { + const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( + &this->getTargetMachine())->getSubtargetImpl(); + if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) { + generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg()); + } else { + generateLongRelational(MI, opCode); + } + } + break; + case AMDILCC::COND_ERROR: + assert(0 && "Invalid CC code"); + break; + }; +} + +//===----------------------------------------------------------------------===// +// TargetLowering Class Implementation Begins +//===----------------------------------------------------------------------===// + AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) +: TargetLowering(TM, new TargetLoweringObjectFileELF()) +{ + int types[] = + { + (int)MVT::i8, + (int)MVT::i16, + (int)MVT::i32, + (int)MVT::f32, + (int)MVT::f64, + (int)MVT::i64, + (int)MVT::v2i8, + (int)MVT::v4i8, + (int)MVT::v2i16, + (int)MVT::v4i16, + (int)MVT::v4f32, + (int)MVT::v4i32, + (int)MVT::v2f32, + (int)MVT::v2i32, + (int)MVT::v2f64, + (int)MVT::v2i64 + }; + + int IntTypes[] = + { + (int)MVT::i8, + (int)MVT::i16, + (int)MVT::i32, + (int)MVT::i64 + }; + + int FloatTypes[] = + { + (int)MVT::f32, + (int)MVT::f64 + }; + + int VectorTypes[] = + { + (int)MVT::v2i8, + (int)MVT::v4i8, + (int)MVT::v2i16, + (int)MVT::v4i16, + (int)MVT::v4f32, + (int)MVT::v4i32, + (int)MVT::v2f32, + (int)MVT::v2i32, + (int)MVT::v2f64, + (int)MVT::v2i64 + }; + size_t numTypes = sizeof(types) / sizeof(*types); + size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); + size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); + size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); + + const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( + &this->getTargetMachine())->getSubtargetImpl(); + // These are the current register classes that are + // supported + + addRegisterClass(MVT::i32, &AMDIL::GPRI32RegClass); + addRegisterClass(MVT::f32, &AMDIL::GPRF32RegClass); + + if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) { + addRegisterClass(MVT::f64, &AMDIL::GPRF64RegClass); + addRegisterClass(MVT::v2f64, &AMDIL::GPRV2F64RegClass); + } + if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) { + addRegisterClass(MVT::i8, &AMDIL::GPRI8RegClass); + addRegisterClass(MVT::v2i8, &AMDIL::GPRV2I8RegClass); + addRegisterClass(MVT::v4i8, &AMDIL::GPRV4I8RegClass); + setOperationAction(ISD::Constant , MVT::i8 , Legal); + } + if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) { + addRegisterClass(MVT::i16, &AMDIL::GPRI16RegClass); + addRegisterClass(MVT::v2i16, &AMDIL::GPRV2I16RegClass); + addRegisterClass(MVT::v4i16, &AMDIL::GPRV4I16RegClass); + setOperationAction(ISD::Constant , MVT::i16 , Legal); + } + addRegisterClass(MVT::v2f32, &AMDIL::GPRV2F32RegClass); + addRegisterClass(MVT::v4f32, &AMDIL::GPRV4F32RegClass); + addRegisterClass(MVT::v2i32, &AMDIL::GPRV2I32RegClass); + addRegisterClass(MVT::v4i32, &AMDIL::GPRV4I32RegClass); + if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) { + addRegisterClass(MVT::i64, &AMDIL::GPRI64RegClass); + addRegisterClass(MVT::v2i64, &AMDIL::GPRV2I64RegClass); + } + + for (unsigned int x = 0; x < numTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; + + //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types + // We cannot sextinreg, expand to shifts + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::FP_ROUND, VT, Expand); + setOperationAction(ISD::OR, VT, Custom); + setOperationAction(ISD::SUBE, VT, Expand); + setOperationAction(ISD::SUBC, VT, Expand); + setOperationAction(ISD::ADD, VT, Custom); + setOperationAction(ISD::ADDE, VT, Expand); + setOperationAction(ISD::ADDC, VT, Expand); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::BRCOND, VT, Custom); + setOperationAction(ISD::BR_CC, VT, Custom); + setOperationAction(ISD::BR_JT, VT, Expand); + setOperationAction(ISD::BRIND, VT, Expand); + // TODO: Implement custom UREM/SREM routines + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::SINT_TO_FP, VT, Custom); + setOperationAction(ISD::UINT_TO_FP, VT, Custom); + setOperationAction(ISD::FP_TO_SINT, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction(ISDBITCAST, VT, Custom); + setOperationAction(ISD::GlobalAddress, VT, Custom); + setOperationAction(ISD::JumpTable, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::SELECT_CC, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + if (VT != MVT::i64 && VT != MVT::v2i64) { + setOperationAction(ISD::SDIV, VT, Custom); + setOperationAction(ISD::UDIV, VT, Custom); + } + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + } + for (unsigned int x = 0; x < numFloatTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; + + // IL does not have these operations for floating point types + setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); + setOperationAction(ISD::FP_ROUND, VT, Custom); + setOperationAction(ISD::SETOLT, VT, Expand); + setOperationAction(ISD::SETOGE, VT, Expand); + setOperationAction(ISD::SETOGT, VT, Expand); + setOperationAction(ISD::SETOLE, VT, Expand); + setOperationAction(ISD::SETULT, VT, Expand); + setOperationAction(ISD::SETUGE, VT, Expand); + setOperationAction(ISD::SETUGT, VT, Expand); + setOperationAction(ISD::SETULE, VT, Expand); + } + + for (unsigned int x = 0; x < numIntTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; + + // GPU also does not have divrem function for signed or unsigned + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + setOperationAction(ISD::FP_ROUND, VT, Expand); + + // GPU does not have [S|U]MUL_LOHI functions as a single instruction + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + + // GPU doesn't have a rotl, rotr, or byteswap instruction + setOperationAction(ISD::ROTR, VT, Expand); + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::BSWAP, VT, Expand); + + // GPU doesn't have any counting operators + setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + } + + for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) + { + MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; + + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + setOperationAction(ISD::FP_ROUND, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + // setOperationAction(ISD::VSETCC, VT, Expand); + setOperationAction(ISD::SETCC, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::SELECT, VT, Expand); + + } + setOperationAction(ISD::FP_ROUND, MVT::Other, Expand); + if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) { + if (stm->calVersion() < CAL_VERSION_SC_139 + || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { + setOperationAction(ISD::MUL, MVT::i64, Custom); + } + setOperationAction(ISD::SUB, MVT::i64, Custom); + setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::v2i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::v2i64, Expand); + setOperationAction(ISD::MUL, MVT::v2i64, Expand); + setOperationAction(ISD::SUB, MVT::v2i64, Expand); + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SREM, MVT::v2i64, Expand); + setOperationAction(ISD::Constant , MVT::i64 , Legal); + setOperationAction(ISD::UDIV, MVT::v2i64, Expand); + setOperationAction(ISD::SDIV, MVT::v2i64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand); + setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); + setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); + setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); + } + if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) { + // we support loading/storing v2f64 but not operations on the type + setOperationAction(ISD::FADD, MVT::v2f64, Expand); + setOperationAction(ISD::FSUB, MVT::v2f64, Expand); + setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand); + setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); + setOperationAction(ISD::FDIV, MVT::v2f64, Expand); + // We want to expand vector conversions into their scalar + // counterparts. + setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand); + setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::v2f64, Expand); + } + // TODO: Fix the UDIV24 algorithm so it works for these + // types correctly. This needs vector comparisons + // for this to work correctly. + setOperationAction(ISD::UDIV, MVT::v2i8, Expand); + setOperationAction(ISD::UDIV, MVT::v4i8, Expand); + setOperationAction(ISD::UDIV, MVT::v2i16, Expand); + setOperationAction(ISD::UDIV, MVT::v4i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); + setOperationAction(ISD::SUBC, MVT::Other, Expand); + setOperationAction(ISD::ADDE, MVT::Other, Expand); + setOperationAction(ISD::ADDC, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + setOperationAction(ISD::BR_CC, MVT::Other, Custom); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::SETCC, MVT::Other, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); + setOperationAction(ISD::FDIV, MVT::f32, Custom); + setOperationAction(ISD::FDIV, MVT::v2f32, Custom); + setOperationAction(ISD::FDIV, MVT::v4f32, Custom); + + setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); + // Use the default implementation. + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); + setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); + setOperationAction(ISD::Constant , MVT::i32 , Legal); + setOperationAction(ISD::TRAP , MVT::Other , Legal); + + setStackPointerRegisterToSaveRestore(AMDIL::SP); + setSchedulingPreference(Sched::RegPressure); + setPow2DivIsCheap(false); + setPrefLoopAlignment(16); + setSelectIsExpensive(true); + setJumpIsExpensive(true); + computeRegisterProperties(); + + maxStoresPerMemcpy = 4096; + maxStoresPerMemmove = 4096; + maxStoresPerMemset = 4096; + +#undef numTypes +#undef numIntTypes +#undef numVectorTypes +#undef numFloatTypes +} + +const char * +AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const +{ + switch (Opcode) { + default: return 0; + case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY"; + case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP"; + case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP"; + case AMDILISD::BITCONV: return "AMDILISD::BITCONV"; + case AMDILISD::CMOV: return "AMDILISD::CMOV"; + case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; + case AMDILISD::INEGATE: return "AMDILISD::INEGATE"; + case AMDILISD::MAD: return "AMDILISD::MAD"; + case AMDILISD::UMAD: return "AMDILISD::UMAD"; + case AMDILISD::CALL: return "AMDILISD::CALL"; + case AMDILISD::RET: return "AMDILISD::RET"; + case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI"; + case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO"; + case AMDILISD::ADD: return "AMDILISD::ADD"; + case AMDILISD::UMUL: return "AMDILISD::UMUL"; + case AMDILISD::AND: return "AMDILISD::AND"; + case AMDILISD::OR: return "AMDILISD::OR"; + case AMDILISD::NOT: return "AMDILISD::NOT"; + case AMDILISD::XOR: return "AMDILISD::XOR"; + case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; + case AMDILISD::SMAX: return "AMDILISD::SMAX"; + case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE"; + case AMDILISD::MOVE: return "AMDILISD::MOVE"; + case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; + case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT"; + case AMDILISD::VINSERT: return "AMDILISD::VINSERT"; + case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT"; + case AMDILISD::LCREATE: return "AMDILISD::LCREATE"; + case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI"; + case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO"; + case AMDILISD::DCREATE: return "AMDILISD::DCREATE"; + case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI"; + case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO"; + case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2"; + case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2"; + case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2"; + case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2"; + case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2"; + case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2"; + case AMDILISD::CMP: return "AMDILISD::CMP"; + case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; + case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; + case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; + case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; + case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; + case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; + case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; + case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; + case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO"; + case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO"; + case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP"; + case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR"; + case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD"; + case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND"; + case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG"; + case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC"; + case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC"; + case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX"; + case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX"; + case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN"; + case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN"; + case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR"; + case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB"; + case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB"; + case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG"; + case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR"; + case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET"; + case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET"; + case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET"; + case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET"; + case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET"; + case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET"; + case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET"; + case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET"; + case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET"; + case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET"; + case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET"; + case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET"; + case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET"; + case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET"; + case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD"; + case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND"; + case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG"; + case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC"; + case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC"; + case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX"; + case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX"; + case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN"; + case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN"; + case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR"; + case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB"; + case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB"; + case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG"; + case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR"; + case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET"; + case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET"; + case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET"; + case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET"; + case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET"; + case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET"; + case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET"; + case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET"; + case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET"; + case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET"; + case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET"; + case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET"; + case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET"; + case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD"; + case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND"; + case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG"; + case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC"; + case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC"; + case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX"; + case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX"; + case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN"; + case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN"; + case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR"; + case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR"; + case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB"; + case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB"; + case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG"; + case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR"; + case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET"; + case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET"; + case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET"; + case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET"; + case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET"; + case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET"; + case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET"; + case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET"; + case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET"; + case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET"; + case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET"; + case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET"; + case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET"; + case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET"; + case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET"; + case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC"; + case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET"; + case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME"; + case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET"; + case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ"; + case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE"; + case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0"; + case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1"; + case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ"; + case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE"; + case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0"; + case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1"; + + }; +} +bool +AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, unsigned Intrinsic) const +{ + if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic + || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) { + return false; + } + bool bitCastToInt = false; + unsigned IntNo; + bool isRet = true; + const AMDILSubtarget *STM = &this->getTargetMachine() + .getSubtarget<AMDILSubtarget>(); + switch (Intrinsic) { + default: return false; // Don't custom lower most intrinsics. + case AMDGPUIntrinsic::AMDIL_atomic_add_gi32: + case AMDGPUIntrinsic::AMDIL_atomic_add_gu32: + IntNo = AMDILISD::ATOM_G_ADD; break; + case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_ADD_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_add_lu32: + case AMDGPUIntrinsic::AMDIL_atomic_add_li32: + IntNo = AMDILISD::ATOM_L_ADD; break; + case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_ADD_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_add_ru32: + case AMDGPUIntrinsic::AMDIL_atomic_add_ri32: + IntNo = AMDILISD::ATOM_R_ADD; break; + case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_ADD_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_and_gi32: + case AMDGPUIntrinsic::AMDIL_atomic_and_gu32: + IntNo = AMDILISD::ATOM_G_AND; break; + case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_AND_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_and_li32: + case AMDGPUIntrinsic::AMDIL_atomic_and_lu32: + IntNo = AMDILISD::ATOM_L_AND; break; + case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_AND_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_and_ri32: + case AMDGPUIntrinsic::AMDIL_atomic_and_ru32: + IntNo = AMDILISD::ATOM_R_AND; break; + case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_AND_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32: + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32: + IntNo = AMDILISD::ATOM_G_CMPXCHG; break; + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32: + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32: + IntNo = AMDILISD::ATOM_L_CMPXCHG; break; + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32: + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32: + IntNo = AMDILISD::ATOM_R_CMPXCHG; break; + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32: + case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32: + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_G_DEC; + } else { + IntNo = AMDILISD::ATOM_G_SUB; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret: + isRet = false; + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_G_DEC_NORET; + } else { + IntNo = AMDILISD::ATOM_G_SUB_NORET; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_dec_li32: + case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32: + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_L_DEC; + } else { + IntNo = AMDILISD::ATOM_L_SUB; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret: + isRet = false; + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_L_DEC_NORET; + } else { + IntNo = AMDILISD::ATOM_L_SUB_NORET; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32: + case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32: + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_R_DEC; + } else { + IntNo = AMDILISD::ATOM_R_SUB; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret: + isRet = false; + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_R_DEC_NORET; + } else { + IntNo = AMDILISD::ATOM_R_SUB_NORET; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32: + case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32: + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_G_INC; + } else { + IntNo = AMDILISD::ATOM_G_ADD; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret: + isRet = false; + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_G_INC_NORET; + } else { + IntNo = AMDILISD::ATOM_G_ADD_NORET; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_inc_li32: + case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32: + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_L_INC; + } else { + IntNo = AMDILISD::ATOM_L_ADD; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret: + isRet = false; + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_L_INC_NORET; + } else { + IntNo = AMDILISD::ATOM_L_ADD_NORET; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32: + case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32: + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_R_INC; + } else { + IntNo = AMDILISD::ATOM_R_ADD; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret: + isRet = false; + if (STM->calVersion() >= CAL_VERSION_SC_136) { + IntNo = AMDILISD::ATOM_R_INC_NORET; + } else { + IntNo = AMDILISD::ATOM_R_ADD_NORET; + } + break; + case AMDGPUIntrinsic::AMDIL_atomic_max_gi32: + IntNo = AMDILISD::ATOM_G_MAX; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_gu32: + IntNo = AMDILISD::ATOM_G_UMAX; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_MAX_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_UMAX_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_li32: + IntNo = AMDILISD::ATOM_L_MAX; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_lu32: + IntNo = AMDILISD::ATOM_L_UMAX; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_MAX_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_UMAX_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_ri32: + IntNo = AMDILISD::ATOM_R_MAX; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_ru32: + IntNo = AMDILISD::ATOM_R_UMAX; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_MAX_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_UMAX_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_gi32: + IntNo = AMDILISD::ATOM_G_MIN; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_gu32: + IntNo = AMDILISD::ATOM_G_UMIN; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_MIN_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_UMIN_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_li32: + IntNo = AMDILISD::ATOM_L_MIN; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_lu32: + IntNo = AMDILISD::ATOM_L_UMIN; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_MIN_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_UMIN_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_ri32: + IntNo = AMDILISD::ATOM_R_MIN; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_ru32: + IntNo = AMDILISD::ATOM_R_UMIN; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_MIN_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_UMIN_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_or_gi32: + case AMDGPUIntrinsic::AMDIL_atomic_or_gu32: + IntNo = AMDILISD::ATOM_G_OR; break; + case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_OR_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_or_li32: + case AMDGPUIntrinsic::AMDIL_atomic_or_lu32: + IntNo = AMDILISD::ATOM_L_OR; break; + case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_OR_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_or_ri32: + case AMDGPUIntrinsic::AMDIL_atomic_or_ru32: + IntNo = AMDILISD::ATOM_R_OR; break; + case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_OR_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32: + case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32: + IntNo = AMDILISD::ATOM_G_SUB; break; + case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_SUB_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_sub_li32: + case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32: + IntNo = AMDILISD::ATOM_L_SUB; break; + case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_SUB_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32: + case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32: + IntNo = AMDILISD::ATOM_R_SUB; break; + case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_SUB_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32: + case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32: + IntNo = AMDILISD::ATOM_G_RSUB; break; + case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_RSUB_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32: + case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32: + IntNo = AMDILISD::ATOM_L_RSUB; break; + case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_RSUB_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32: + case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32: + IntNo = AMDILISD::ATOM_R_RSUB; break; + case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_RSUB_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32: + bitCastToInt = true; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32: + case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32: + IntNo = AMDILISD::ATOM_G_XCHG; break; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret: + bitCastToInt = true; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_XCHG_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32: + bitCastToInt = true; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32: + case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32: + IntNo = AMDILISD::ATOM_L_XCHG; break; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret: + bitCastToInt = true; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_XCHG_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32: + bitCastToInt = true; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32: + case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32: + IntNo = AMDILISD::ATOM_R_XCHG; break; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret: + bitCastToInt = true; + case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_XCHG_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32: + case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32: + IntNo = AMDILISD::ATOM_G_XOR; break; + case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_G_XOR_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_xor_li32: + case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32: + IntNo = AMDILISD::ATOM_L_XOR; break; + case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_L_XOR_NORET; break; + case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32: + case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32: + IntNo = AMDILISD::ATOM_R_XOR; break; + case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret: + case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret: + isRet = false; + IntNo = AMDILISD::ATOM_R_XOR_NORET; break; + case AMDGPUIntrinsic::AMDIL_append_alloc_i32: + IntNo = AMDILISD::APPEND_ALLOC; break; + case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret: + isRet = false; + IntNo = AMDILISD::APPEND_ALLOC_NORET; break; + case AMDGPUIntrinsic::AMDIL_append_consume_i32: + IntNo = AMDILISD::APPEND_CONSUME; break; + case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret: + isRet = false; + IntNo = AMDILISD::APPEND_CONSUME_NORET; break; + }; + + Info.opc = IntNo; + Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32; + Info.ptrVal = I.getOperand(0); + Info.offset = 0; + Info.align = 4; + Info.vol = true; + Info.readMem = isRet; + Info.writeMem = true; + return true; +} +// The backend supports 32 and 64 bit floating point immediates +bool +AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const +{ + if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 + || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { + return true; + } else { + return false; + } +} + +bool +AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const +{ + if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 + || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { + return false; + } else { + return true; + } +} + + +// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to +// be zero. Op is expected to be a target specific node. Used by DAG +// combiner. + +void +AMDILTargetLowering::computeMaskedBitsForTargetNode( + const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const +{ + APInt KnownZero2; + APInt KnownOne2; + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything + switch (Op.getOpcode()) { + default: break; + case AMDILISD::SELECT_CC: + DAG.ComputeMaskedBits( + Op.getOperand(1), + KnownZero, + KnownOne, + Depth + 1 + ); + DAG.ComputeMaskedBits( + Op.getOperand(0), + KnownZero2, + KnownOne2 + ); + assert((KnownZero & KnownOne) == 0 + && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 + && "Bits known to be one AND zero?"); + // Only known if known in both the LHS and RHS + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + }; +} + +// This is the function that determines which calling convention should +// be used. Currently there is only one calling convention +CCAssignFn* +AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const +{ + //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + return CC_AMDIL32; +} + +// LowerCallResult - Lower the result values of an ISD::CALL into the +// appropriate copies out of appropriate physical registers. This assumes that +// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +// being lowered. The returns a SDNode with the same number of values as the +// ISD::CALL. +SDValue +AMDILTargetLowering::LowerCallResult( + SDValue Chain, + SDValue InFlag, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const +{ + // Assign locations to each value returned by this call + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + EVT CopyVT = RVLocs[i].getValVT(); + if (RVLocs[i].isRegLoc()) { + Chain = DAG.getCopyFromReg( + Chain, + dl, + RVLocs[i].getLocReg(), + CopyVT, + InFlag + ).getValue(1); + SDValue Val = Chain.getValue(0); + InFlag = Chain.getValue(2); + InVals.push_back(Val); + } + } + + return Chain; + +} + +//===----------------------------------------------------------------------===// +// Other Lowering Hooks +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +AMDILTargetLowering::EmitInstrWithCustomInserter( + MachineInstr *MI, MachineBasicBlock *BB) const +{ + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); + switch (MI->getOpcode()) { + ExpandCaseToAllTypes(AMDIL::CMP); + generateCMPInstr(MI, BB, TII); + MI->eraseFromParent(); + break; + default: + break; + } + return BB; +} + +// Recursively assign SDNodeOrdering to any unordered nodes +// This is necessary to maintain source ordering of instructions +// under -O0 to avoid odd-looking "skipping around" issues. + static const SDValue +Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) +{ + if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { + DAG.AssignOrdering( New.getNode(), order ); + for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) + Ordered( DAG, order, New.getOperand(i) ); + } + return New; +} + +#define LOWER(A) \ + case ISD:: A: \ +return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) + +SDValue +AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const +{ + switch (Op.getOpcode()) { + default: + Op.getNode()->dump(); + assert(0 && "Custom lowering code for this" + "instruction is not implemented yet!"); + break; + LOWER(GlobalAddress); + LOWER(JumpTable); + LOWER(ConstantPool); + LOWER(ExternalSymbol); + LOWER(FP_TO_SINT); + LOWER(FP_TO_UINT); + LOWER(SINT_TO_FP); + LOWER(UINT_TO_FP); + LOWER(ADD); + LOWER(MUL); + LOWER(SUB); + LOWER(FDIV); + LOWER(SDIV); + LOWER(SREM); + LOWER(UDIV); + LOWER(UREM); + LOWER(BUILD_VECTOR); + LOWER(INSERT_VECTOR_ELT); + LOWER(EXTRACT_VECTOR_ELT); + LOWER(EXTRACT_SUBVECTOR); + LOWER(SCALAR_TO_VECTOR); + LOWER(CONCAT_VECTORS); + LOWER(AND); + LOWER(OR); + LOWER(SELECT); + LOWER(SELECT_CC); + LOWER(SETCC); + LOWER(SIGN_EXTEND_INREG); + LOWER(BITCAST); + LOWER(DYNAMIC_STACKALLOC); + LOWER(BRCOND); + LOWER(BR_CC); + LOWER(FP_ROUND); + } + return Op; +} + +int +AMDILTargetLowering::getVarArgsFrameOffset() const +{ + return VarArgsFrameOffset; +} +#undef LOWER + +SDValue +AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const +{ + SDValue DST = Op; + const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op); + const GlobalValue *G = GADN->getGlobal(); + DebugLoc DL = Op.getDebugLoc(); + const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); + if (!GV) { + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + } else { + if (GV->hasInitializer()) { + const Constant *C = dyn_cast<Constant>(GV->getInitializer()); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + DST = DAG.getConstant(CI->getValue(), Op.getValueType()); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { + DST = DAG.getConstantFP(CF->getValueAPF(), + Op.getValueType()); + } else if (dyn_cast<ConstantAggregateZero>(C)) { + EVT VT = Op.getValueType(); + if (VT.isInteger()) { + DST = DAG.getConstant(0, VT); + } else { + DST = DAG.getConstantFP(0, VT); + } + } else { + assert(!"lowering this type of Global Address " + "not implemented yet!"); + C->dump(); + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + } + } else { + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + } + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const +{ + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); + return Result; +} +SDValue +AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const +{ + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + EVT PtrVT = Op.getValueType(); + SDValue Result; + if (CP->isMachineConstantPoolEntry()) { + Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); + } else { + Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); + } + return Result; +} + +SDValue +AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const +{ + const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); + SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); + return Result; +} +/// LowerFORMAL_ARGUMENTS - transform physical registers into +/// virtual registers and generate load operations for +/// arguments places on the stack. +/// TODO: isVarArg, hasStructRet, isMemReg + SDValue +AMDILTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) +const +{ + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + //const Function *Fn = MF.getFunction(); + //MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + SmallVector<CCValAssign, 16> ArgLocs; + CallingConv::ID CC = MF.getFunction()->getCallingConv(); + //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); + + CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + // When more calling conventions are added, they need to be chosen here + CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); + SDValue StackPtr; + + //unsigned int FirstStackArgLoc = 0; + + for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) { + EVT RegVT = VA.getLocVT(); + const TargetRegisterClass *RC = getRegClassFromType( + RegVT.getSimpleVT().SimpleTy); + + unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg( + Chain, + dl, + Reg, + RegVT); + // If this is an 8 or 16-bit value, it is really passed + // promoted to 32 bits. Insert an assert[sz]ext to capture + // this, then truncate to the right size. + + if (VA.getLocInfo() == CCValAssign::SExt) { + ArgValue = DAG.getNode( + ISD::AssertSext, + dl, + RegVT, + ArgValue, + DAG.getValueType(VA.getValVT())); + } else if (VA.getLocInfo() == CCValAssign::ZExt) { + ArgValue = DAG.getNode( + ISD::AssertZext, + dl, + RegVT, + ArgValue, + DAG.getValueType(VA.getValVT())); + } + if (VA.getLocInfo() != CCValAssign::Full) { + ArgValue = DAG.getNode( + ISD::TRUNCATE, + dl, + VA.getValVT(), + ArgValue); + } + // Add the value to the list of arguments + // to be passed in registers + InVals.push_back(ArgValue); + if (isVarArg) { + assert(0 && "Variable arguments are not yet supported"); + // See MipsISelLowering.cpp for ideas on how to implement + } + } else if(VA.isMemLoc()) { + InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, + dl, DAG, VA, MFI, i)); + } else { + assert(0 && "found a Value Assign that is " + "neither a register or a memory location"); + } + } + /*if (hasStructRet) { + assert(0 && "Has struct return is not yet implemented"); + // See MipsISelLowering.cpp for ideas on how to implement + }*/ + + if (isVarArg) { + assert(0 && "Variable arguments are not yet supported"); + // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement + } + // This needs to be changed to non-zero if the return function needs + // to pop bytes + return Chain; +} +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" with size and alignment information specified by +/// the specific parameter attribute. The copy will be passed as a byval +/// function parameter. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { + assert(0 && "MemCopy does not exist yet"); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + + return DAG.getMemcpy(Chain, + Src.getDebugLoc(), + Dst, Src, SizeNode, Flags.getByValAlign(), + /*IsVol=*/false, /*AlwaysInline=*/true, + MachinePointerInfo(), MachinePointerInfo()); +} + +SDValue +AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, + SDValue StackPtr, SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags) const +{ + unsigned int LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, + dl, + getPointerTy(), StackPtr, PtrOff); + if (Flags.isByVal()) { + PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); + } else { + PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo::getStack(LocMemOffset), + false, false, 0); + } + return PtrOff; +} +/// LowerCAL - functions arguments are copied from virtual +/// regs to (physical regs)/(stack frame), CALLSEQ_START and +/// CALLSEQ_END are emitted. +/// TODO: isVarArg, isTailCall, hasStructRet +SDValue +AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, + bool& isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) +const +{ + isTailCall = false; + MachineFunction& MF = DAG.getMachineFunction(); + // FIXME: DO we need to handle fast calling conventions and tail call + // optimizations?? X86/PPC ISelLowering + /*bool hasStructRet = (TheCall->getNumArgs()) + ? TheCall->getArgFlags(0).device()->isSRet() + : false;*/ + + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Analyze operands of the call, assigning locations to each operand + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + // Analyize the calling operands, but need to change + // if we have more than one calling convetion + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); + + unsigned int NumBytes = CCInfo.getNextStackOffset(); + if (isTailCall) { + assert(isTailCall && "Tail Call not handled yet!"); + // See X86/PPC ISelLowering + } + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + SDValue StackPtr; + //unsigned int FirstStacArgLoc = 0; + //int LastArgStackLoc = 0; + + // Walk the register/memloc assignments, insert copies/loads + for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers + // Arguments start after the 5 first operands of ISD::CALL + SDValue Arg = OutVals[i]; + //Promote the value if needed + switch(VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, + dl, + VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, + dl, + VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, + dl, + VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else if (VA.isMemLoc()) { + // Create the frame index object for this incoming parameter + int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); + SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); + + // emit ISD::STORE whichs stores the + // parameter value to a stack Location + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); + } else { + assert(0 && "Not a Reg/Mem Loc, major error!"); + } + } + if (!MemOpChains.empty()) { + Chain = DAG.getNode(ISD::TokenFactor, + dl, + MVT::Other, + &MemOpChains[0], + MemOpChains.size()); + } + SDValue InFlag; + if (!isTailCall) { + for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, + dl, + RegsToPass[i].first, + RegsToPass[i].second, + InFlag); + InFlag = Chain.getValue(1); + } + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress/ + // TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); + } + else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + } + else if (isTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1708 + } + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE); + SmallVector<SDValue, 8> Ops; + + if (isTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1721 + } + // If this is a direct call, pass the chain and the callee + if (Callee.getNode()) { + Ops.push_back(Chain); + Ops.push_back(Callee); + } + + if (isTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1739 + } + + // Add argument registers to the end of the list so that they are known + // live into the call + for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { + Ops.push_back(DAG.getRegister( + RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + } + if (InFlag.getNode()) { + Ops.push_back(InFlag); + } + + // Emit Tail Call + if (isTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1762 + } + + Chain = DAG.getNode(AMDILISD::CALL, + dl, + NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node + Chain = DAG.getCALLSEQ_END( + Chain, + DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), + InFlag); + InFlag = Chain.getValue(1); + // Handle result values, copying them out of physregs into vregs that + // we return + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals); +} +static void checkMADType( + SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD) +{ + bool globalLoadStore = false; + is24bitMAD = false; + is32bitMAD = false; + return; + assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for " + "this to work correctly!"); + if (Op.getNode()->use_empty()) { + return; + } + for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(), + nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) { + SDNode *ptr = *nBegin; + const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr); + // If we are not a LSBaseSDNode then we don't do this + // optimization. + // If we are a LSBaseSDNode, but the op is not the offset + // or base pointer, then we don't do this optimization + // (i.e. we are the value being stored) + if (!lsNode || + (lsNode->writeMem() && lsNode->getOperand(1) == Op)) { + return; + } + const PointerType *PT = + dyn_cast<PointerType>(lsNode->getSrcValue()->getType()); + unsigned as = PT->getAddressSpace(); + switch(as) { + default: + globalLoadStore = true; + case AMDILAS::PRIVATE_ADDRESS: + if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) { + globalLoadStore = true; + } + break; + case AMDILAS::CONSTANT_ADDRESS: + if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) { + globalLoadStore = true; + } + break; + case AMDILAS::LOCAL_ADDRESS: + if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) { + globalLoadStore = true; + } + break; + case AMDILAS::REGION_ADDRESS: + if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) { + globalLoadStore = true; + } + break; + } + } + if (globalLoadStore) { + is32bitMAD = true; + } else { + is24bitMAD = true; + } +} + +SDValue +AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const +{ + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue DST; + const AMDILSubtarget *stm = &this->getTargetMachine() + .getSubtarget<AMDILSubtarget>(); + bool isVec = OVT.isVector(); + if (OVT.getScalarType() == MVT::i64) { + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i64) { + INTTY = MVT::v2i32; + } + if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps) + && INTTY == MVT::i32) { + DST = DAG.getNode(AMDILISD::ADD, + DL, + OVT, + LHS, RHS); + } else { + SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; + // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 + LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); + RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); + LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); + RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); + INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO); + INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI); + SDValue cmp; + cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), + INTLO, RHSLO); + cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp); + INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); + DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, + INTLO, INTHI); + } + } else { + if (LHS.getOpcode() == ISD::FrameIndex || + RHS.getOpcode() == ISD::FrameIndex) { + DST = DAG.getNode(AMDILISD::ADDADDR, + DL, + OVT, + LHS, RHS); + } else { + if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem) + && LHS.getNumOperands() + && RHS.getNumOperands()) { + bool is24bitMAD = false; + bool is32bitMAD = false; + const ConstantSDNode *LHSConstOpCode = + dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1)); + const ConstantSDNode *RHSConstOpCode = + dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1)); + if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode) + || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) + || LHS.getOpcode() == ISD::MUL + || RHS.getOpcode() == ISD::MUL) { + SDValue Op1, Op2, Op3; + // FIXME: Fix this so that it works for unsigned 24bit ops. + if (LHS.getOpcode() == ISD::MUL) { + Op1 = LHS.getOperand(0); + Op2 = LHS.getOperand(1); + Op3 = RHS; + } else if (RHS.getOpcode() == ISD::MUL) { + Op1 = RHS.getOperand(0); + Op2 = RHS.getOperand(1); + Op3 = LHS; + } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) { + Op1 = LHS.getOperand(0); + Op2 = DAG.getConstant( + 1 << LHSConstOpCode->getZExtValue(), MVT::i32); + Op3 = RHS; + } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) { + Op1 = RHS.getOperand(0); + Op2 = DAG.getConstant( + 1 << RHSConstOpCode->getZExtValue(), MVT::i32); + Op3 = LHS; + } + checkMADType(Op, stm, is24bitMAD, is32bitMAD); + // We can possibly do a MAD transform! + if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) { + uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32; + SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/); + DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN, + DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32), + Op1, Op2, Op3); + } else if(is32bitMAD) { + SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/); + DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN, + DL, Tys, DAG.getEntryNode(), + DAG.getConstant( + AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32), + Op1, Op2, Op3); + } + } + } + DST = DAG.getNode(AMDILISD::ADD, + DL, + OVT, + LHS, RHS); + } + } + return DST; +} +SDValue +AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG, + uint32_t bits) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT INTTY = Op.getValueType(); + EVT FPTY; + if (INTTY.isVector()) { + FPTY = EVT(MVT::getVectorVT(MVT::f32, + INTTY.getVectorNumElements())); + } else { + FPTY = EVT(MVT::f32); + } + /* static inline uint + __clz_Nbit(uint x) + { + int xor = 0x3f800000U | x; + float tp = as_float(xor); + float t = tp + -1.0f; + uint tint = as_uint(t); + int cmp = (x != 0); + uint tsrc = tint >> 23; + uint tmask = tsrc & 0xffU; + uint cst = (103 + N)U - tmask; + return cmp ? cst : N; + } + */ + assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32 + && "genCLZu16 only works on 32bit types"); + // uint x = Op + SDValue x = Op; + // xornode = 0x3f800000 | x + SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY, + DAG.getConstant(0x3f800000, INTTY), x); + // float tp = as_float(xornode) + SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode); + // float t = tp + -1.0f + SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp, + DAG.getConstantFP(-1.0f, FPTY)); + // uint tint = as_uint(t) + SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t); + // int cmp = (x != 0) + SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x, + DAG.getConstant(0, INTTY)); + // uint tsrc = tint >> 23 + SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint, + DAG.getConstant(23, INTTY)); + // uint tmask = tsrc & 0xFF + SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc, + DAG.getConstant(0xFFU, INTTY)); + // uint cst = (103 + bits) - tmask + SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY, + DAG.getConstant((103U + bits), INTTY), tmask); + // return cmp ? cst : N + cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst, + DAG.getConstant(bits, INTTY)); + return cst; +} + +SDValue +AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const +{ + SDValue DST = SDValue(); + DebugLoc DL = Op.getDebugLoc(); + EVT INTTY = Op.getValueType(); + const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( + &this->getTargetMachine())->getSubtargetImpl(); + if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { + //__clz_32bit(uint u) + //{ + // int z = __amdil_ffb_hi(u) ; + // return z < 0 ? 32 : z; + // } + // uint u = op + SDValue u = Op; + // int z = __amdil_ffb_hi(u) + SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u); + // int cmp = z < 0 + SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), + z, DAG.getConstant(0, INTTY)); + // return cmp ? 32 : z + DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, + DAG.getConstant(32, INTTY), z); + } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { + // static inline uint + //__clz_32bit(uint x) + //{ + // uint zh = __clz_16bit(x >> 16); + // uint zl = __clz_16bit(x & 0xffffU); + // return zh == 16U ? 16U + zl : zh; + //} + // uint x = Op + SDValue x = Op; + // uint xs16 = x >> 16 + SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x, + DAG.getConstant(16, INTTY)); + // uint zh = __clz_16bit(xs16) + SDValue zh = genCLZuN(xs16, DAG, 16); + // uint xa16 = x & 0xFFFF + SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x, + DAG.getConstant(0xFFFFU, INTTY)); + // uint zl = __clz_16bit(xa16) + SDValue zl = genCLZuN(xa16, DAG, 16); + // uint cmp = zh == 16U + SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), + zh, DAG.getConstant(16U, INTTY)); + // uint zl16 = zl + 16 + SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY, + DAG.getConstant(16, INTTY), zl); + // return cmp ? zl16 : zh + DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, + cmp, zl16, zh); + } else { + assert(0 && "Attempting to generate a CLZ function with an" + " unknown graphics card"); + } + return DST; +} +SDValue +AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const +{ + SDValue DST = SDValue(); + DebugLoc DL = Op.getDebugLoc(); + EVT INTTY; + EVT LONGTY = Op.getValueType(); + bool isVec = LONGTY.isVector(); + if (isVec) { + INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType() + .getVectorNumElements())); + } else { + INTTY = EVT(MVT::i32); + } + const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( + &this->getTargetMachine())->getSubtargetImpl(); + if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { + // Evergreen: + // static inline uint + // __clz_u64(ulong x) + // { + //uint zhi = __clz_32bit((uint)(x >> 32)); + //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL)); + //return zhi == 32U ? 32U + zlo : zhi; + //} + //ulong x = op + SDValue x = Op; + // uint xhi = x >> 32 + SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); + // uint xlo = x & 0xFFFFFFFF + SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x); + // uint zhi = __clz_32bit(xhi) + SDValue zhi = genCLZu32(xhi, DAG); + // uint zlo = __clz_32bit(xlo) + SDValue zlo = genCLZu32(xlo, DAG); + // uint cmp = zhi == 32 + SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), + zhi, DAG.getConstant(32U, INTTY)); + // uint zlop32 = 32 + zlo + SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY, + DAG.getConstant(32U, INTTY), zlo); + // return cmp ? zlop32: zhi + DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi); + } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { + // HD4XXX: + // static inline uint + //__clz_64bit(ulong x) + //{ + //uint zh = __clz_23bit((uint)(x >> 46)) - 5U; + //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU); + //uint zl = __clz_23bit((uint)x & 0x7fffffU); + //uint r = zh == 18U ? 18U + zm : zh; + //return zh + zm == 41U ? 41U + zl : r; + //} + //ulong x = Op + SDValue x = Op; + // ulong xs46 = x >> 46 + SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x, + DAG.getConstant(46, LONGTY)); + // uint ixs46 = (uint)xs46 + SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46); + // ulong xs23 = x >> 23 + SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x, + DAG.getConstant(23, LONGTY)); + // uint ixs23 = (uint)xs23 + SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23); + // uint xs23m23 = ixs23 & 0x7FFFFF + SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23, + DAG.getConstant(0x7fffffU, INTTY)); + // uint ix = (uint)x + SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); + // uint xm23 = ix & 0x7FFFFF + SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix, + DAG.getConstant(0x7fffffU, INTTY)); + // uint zh = __clz_23bit(ixs46) + SDValue zh = genCLZuN(ixs46, DAG, 23); + // uint zm = __clz_23bit(xs23m23) + SDValue zm = genCLZuN(xs23m23, DAG, 23); + // uint zl = __clz_23bit(xm23) + SDValue zl = genCLZuN(xm23, DAG, 23); + // uint zhm5 = zh - 5 + SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh, + DAG.getConstant(-5U, INTTY)); + SDValue const18 = DAG.getConstant(18, INTTY); + SDValue const41 = DAG.getConstant(41, INTTY); + // uint cmp1 = zh = 18 + SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), + zhm5, const18); + // uint zhm5zm = zhm5 + zh + SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm); + // uint cmp2 = zhm5zm == 41 + SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), + zhm5zm, const41); + // uint zmp18 = zhm5 + 18 + SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18); + // uint zlp41 = zl + 41 + SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41); + // uint r = cmp1 ? zmp18 : zh + SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, + cmp1, zmp18, zhm5); + // return cmp2 ? zlp41 : r + DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r); + } else { + assert(0 && "Attempting to generate a CLZ function with an" + " unknown graphics card"); + } + return DST; +} +SDValue +AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG, + bool includeSign) const +{ + EVT INTVT; + EVT LONGVT; + SDValue DST; + DebugLoc DL = RHS.getDebugLoc(); + EVT RHSVT = RHS.getValueType(); + bool isVec = RHSVT.isVector(); + if (isVec) { + LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT + .getVectorNumElements())); + INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT + .getVectorNumElements())); + } else { + LONGVT = EVT(MVT::i64); + INTVT = EVT(MVT::i32); + } + const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( + &this->getTargetMachine())->getSubtargetImpl(); + if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + // unsigned version: + // uint uhi = (uint)(d * 0x1.0p-32); + // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d)); + // return as_ulong2((uint2)(ulo, uhi)); + // + // signed version: + // double ad = fabs(d); + // long l = unsigned_version(ad); + // long nl = -l; + // return d == ad ? l : nl; + SDValue d = RHS; + if (includeSign) { + d = DAG.getNode(ISD::FABS, DL, RHSVT, d); + } + SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d, + DAG.getConstantFP(0x2f800000, RHSVT)); + SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid); + SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi); + ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod, + DAG.getConstantFP(0xcf800000, RHSVT), d); + SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod); + SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi); + if (includeSign) { + SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l); + SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT, + DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32), + RHS, d); + l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl); + } + DST = l; + } else { + /* + __attribute__((always_inline)) long + cast_f64_to_i64(double d) + { + // Convert d in to 32-bit components + long x = as_long(d); + xhi = LCOMPHI(x); + xlo = LCOMPLO(x); + + // Generate 'normalized' mantissa + mhi = xhi | 0x00100000; // hidden bit + mhi <<= 11; + temp = xlo >> (32 - 11); + mhi |= temp + mlo = xlo << 11; + + // Compute shift right count from exponent + e = (xhi >> (52-32)) & 0x7ff; + sr = 1023 + 63 - e; + srge64 = sr >= 64; + srge32 = sr >= 32; + + // Compute result for 0 <= sr < 32 + rhi0 = mhi >> (sr &31); + rlo0 = mlo >> (sr &31); + temp = mhi << (32 - sr); + temp |= rlo0; + rlo0 = sr ? temp : rlo0; + + // Compute result for 32 <= sr + rhi1 = 0; + rlo1 = srge64 ? 0 : rhi0; + + // Pick between the 2 results + rhi = srge32 ? rhi1 : rhi0; + rlo = srge32 ? rlo1 : rlo0; + + // Optional saturate on overflow + srlt0 = sr < 0; + rhi = srlt0 ? MAXVALUE : rhi; + rlo = srlt0 ? MAXVALUE : rlo; + + // Create long + res = LCREATE( rlo, rhi ); + + // Deal with sign bit (ignoring whether result is signed or unsigned value) + if (includeSign) { + sign = ((signed int) xhi) >> 31; fill with sign bit + sign = LCREATE( sign, sign ); + res += sign; + res ^= sign; + } + + return res; + } + */ + SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); + SDValue c32 = DAG.getConstant( 32, INTVT ); + + // Convert d in to 32-bit components + SDValue d = RHS; + SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); + SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); + SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); + + // Generate 'normalized' mantissa + SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, + xhi, DAG.getConstant( 0x00100000, INTVT ) ); + mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); + SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, + xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); + mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); + SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 ); + + // Compute shift right count from exponent + SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, + xhi, DAG.getConstant( 52-32, INTVT ) ); + e = DAG.getNode( ISD::AND, DL, INTVT, + e, DAG.getConstant( 0x7ff, INTVT ) ); + SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, + DAG.getConstant( 1023 + 63, INTVT ), e ); + SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT, + DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), + sr, DAG.getConstant(64, INTVT)); + SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, + DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), + sr, DAG.getConstant(32, INTVT)); + + // Compute result for 0 <= sr < 32 + SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); + SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr ); + temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr ); + temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp ); + temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp ); + rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 ); + + // Compute result for 32 <= sr + SDValue rhi1 = DAG.getConstant( 0, INTVT ); + SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, + srge64, rhi1, rhi0 ); + + // Pick between the 2 results + SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, + srge32, rhi1, rhi0 ); + SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, + srge32, rlo1, rlo0 ); + + // Create long + SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); + + // Deal with sign bit + if (includeSign) { + SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, + xhi, DAG.getConstant( 31, INTVT ) ); + sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign ); + res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign ); + res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign ); + } + DST = res; + } + return DST; +} +SDValue +AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG, + bool includeSign) const +{ + EVT INTVT; + EVT LONGVT; + DebugLoc DL = RHS.getDebugLoc(); + EVT RHSVT = RHS.getValueType(); + bool isVec = RHSVT.isVector(); + if (isVec) { + LONGVT = EVT(MVT::getVectorVT(MVT::i64, + RHSVT.getVectorNumElements())); + INTVT = EVT(MVT::getVectorVT(MVT::i32, + RHSVT.getVectorNumElements())); + } else { + LONGVT = EVT(MVT::i64); + INTVT = EVT(MVT::i32); + } + /* + __attribute__((always_inline)) int + cast_f64_to_[u|i]32(double d) + { + // Convert d in to 32-bit components + long x = as_long(d); + xhi = LCOMPHI(x); + xlo = LCOMPLO(x); + + // Generate 'normalized' mantissa + mhi = xhi | 0x00100000; // hidden bit + mhi <<= 11; + temp = xlo >> (32 - 11); + mhi |= temp + + // Compute shift right count from exponent + e = (xhi >> (52-32)) & 0x7ff; + sr = 1023 + 31 - e; + srge32 = sr >= 32; + + // Compute result for 0 <= sr < 32 + res = mhi >> (sr &31); + res = srge32 ? 0 : res; + + // Optional saturate on overflow + srlt0 = sr < 0; + res = srlt0 ? MAXVALUE : res; + + // Deal with sign bit (ignoring whether result is signed or unsigned value) + if (includeSign) { + sign = ((signed int) xhi) >> 31; fill with sign bit + res += sign; + res ^= sign; + } + + return res; + } + */ + SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); + + // Convert d in to 32-bit components + SDValue d = RHS; + SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); + SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); + SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); + + // Generate 'normalized' mantissa + SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, + xhi, DAG.getConstant( 0x00100000, INTVT ) ); + mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); + SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, + xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); + mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); + + // Compute shift right count from exponent + SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, + xhi, DAG.getConstant( 52-32, INTVT ) ); + e = DAG.getNode( ISD::AND, DL, INTVT, + e, DAG.getConstant( 0x7ff, INTVT ) ); + SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, + DAG.getConstant( 1023 + 31, INTVT ), e ); + SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, + DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), + sr, DAG.getConstant(32, INTVT)); + + // Compute result for 0 <= sr < 32 + SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); + res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, + srge32, DAG.getConstant(0,INTVT), res ); + + // Deal with sign bit + if (includeSign) { + SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, + xhi, DAG.getConstant( 31, INTVT ) ); + res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign ); + res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign ); + } + return res; +} +SDValue +AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const +{ + SDValue RHS = Op.getOperand(0); + EVT RHSVT = RHS.getValueType(); + MVT RST = RHSVT.getScalarType().getSimpleVT(); + EVT LHSVT = Op.getValueType(); + MVT LST = LHSVT.getScalarType().getSimpleVT(); + DebugLoc DL = Op.getDebugLoc(); + SDValue DST; + const AMDILTargetMachine* + amdtm = reinterpret_cast<const AMDILTargetMachine*> + (&this->getTargetMachine()); + const AMDILSubtarget* + stm = dynamic_cast<const AMDILSubtarget*>( + amdtm->getSubtargetImpl()); + if (RST == MVT::f64 && RHSVT.isVector() + && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + // We dont support vector 64bit floating point convertions. + for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { + SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); + op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); + if (!x) { + DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); + } else { + DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, + DST, op, DAG.getTargetConstant(x, MVT::i32)); + } + } + } else { + if (RST == MVT::f64 + && LST == MVT::i32) { + if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + DST = SDValue(Op.getNode(), 0); + } else { + DST = genf64toi32(RHS, DAG, true); + } + } else if (RST == MVT::f64 + && LST == MVT::i64) { + DST = genf64toi64(RHS, DAG, true); + } else if (RST == MVT::f64 + && (LST == MVT::i8 || LST == MVT::i16)) { + if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); + } else { + SDValue ToInt = genf64toi32(RHS, DAG, true); + DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); + } + + } else { + DST = SDValue(Op.getNode(), 0); + } + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const +{ + SDValue DST; + SDValue RHS = Op.getOperand(0); + EVT RHSVT = RHS.getValueType(); + MVT RST = RHSVT.getScalarType().getSimpleVT(); + EVT LHSVT = Op.getValueType(); + MVT LST = LHSVT.getScalarType().getSimpleVT(); + DebugLoc DL = Op.getDebugLoc(); + const AMDILTargetMachine* + amdtm = reinterpret_cast<const AMDILTargetMachine*> + (&this->getTargetMachine()); + const AMDILSubtarget* + stm = dynamic_cast<const AMDILSubtarget*>( + amdtm->getSubtargetImpl()); + if (RST == MVT::f64 && RHSVT.isVector() + && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + // We dont support vector 64bit floating point convertions. + for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { + SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); + op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); + if (!x) { + DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); + } else { + DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, + DST, op, DAG.getTargetConstant(x, MVT::i32)); + } + + } + } else { + if (RST == MVT::f64 + && LST == MVT::i32) { + if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + DST = SDValue(Op.getNode(), 0); + } else { + DST = genf64toi32(RHS, DAG, false); + } + } else if (RST == MVT::f64 + && LST == MVT::i64) { + DST = genf64toi64(RHS, DAG, false); + } else if (RST == MVT::f64 + && (LST == MVT::i8 || LST == MVT::i16)) { + if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); + } else { + SDValue ToInt = genf64toi32(RHS, DAG, false); + DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); + } + + } else { + DST = SDValue(Op.getNode(), 0); + } + } + return DST; +} +SDValue +AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT, + SelectionDAG &DAG) const +{ + EVT RHSVT = RHS.getValueType(); + DebugLoc DL = RHS.getDebugLoc(); + EVT INTVT; + EVT LONGVT; + bool isVec = RHSVT.isVector(); + if (isVec) { + LONGVT = EVT(MVT::getVectorVT(MVT::i64, + RHSVT.getVectorNumElements())); + INTVT = EVT(MVT::getVectorVT(MVT::i32, + RHSVT.getVectorNumElements())); + } else { + LONGVT = EVT(MVT::i64); + INTVT = EVT(MVT::i32); + } + SDValue x = RHS; + const AMDILTargetMachine* + amdtm = reinterpret_cast<const AMDILTargetMachine*> + (&this->getTargetMachine()); + const AMDILSubtarget* + stm = dynamic_cast<const AMDILSubtarget*>( + amdtm->getSubtargetImpl()); + if (stm->calVersion() >= CAL_VERSION_SC_135) { + // unsigned x = RHS; + // ulong xd = (ulong)(0x4330_0000 << 32) | x; + // double d = as_double( xd ); + // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000 + SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x, + DAG.getConstant( 0x43300000, INTVT ) ); + SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); + SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT, + DAG.getConstant( 0x4330000000000000ULL, LONGVT ) ); + return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd ); + } else { + SDValue clz = genCLZu32(x, DAG); + + // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2 + // Except for an input 0... which requires a 0 exponent + SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, + DAG.getConstant( (1023+31), INTVT), clz ); + exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x ); + + // Normalize frac + SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz ); + + // Eliminate hidden bit + rhi = DAG.getNode( ISD::AND, DL, INTVT, + rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); + + // Pack exponent and frac + SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT, + rhi, DAG.getConstant( (32 - 11), INTVT ) ); + rhi = DAG.getNode( ISD::SRL, DL, INTVT, + rhi, DAG.getConstant( 11, INTVT ) ); + exp = DAG.getNode( ISD::SHL, DL, INTVT, + exp, DAG.getConstant( 20, INTVT ) ); + rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); + + // Convert 2 x 32 in to 1 x 64, then to double precision float type + SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); + return DAG.getNode(ISDBITCAST, DL, LHSVT, res); + } +} +SDValue +AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT, + SelectionDAG &DAG) const +{ + EVT RHSVT = RHS.getValueType(); + DebugLoc DL = RHS.getDebugLoc(); + EVT INTVT; + EVT LONGVT; + bool isVec = RHSVT.isVector(); + if (isVec) { + INTVT = EVT(MVT::getVectorVT(MVT::i32, + RHSVT.getVectorNumElements())); + } else { + INTVT = EVT(MVT::i32); + } + LONGVT = RHSVT; + SDValue x = RHS; + const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( + &this->getTargetMachine())->getSubtargetImpl(); + if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + // double dhi = (double)(as_uint2(x).y); + // double dlo = (double)(as_uint2(x).x); + // return mad(dhi, 0x1.0p+32, dlo) + SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x); + dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi); + SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x); + dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo); + return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi, + DAG.getConstantFP(0x4f800000, LHSVT), dlo); + } else if (stm->calVersion() >= CAL_VERSION_SC_135) { + // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL)); + // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32)); + // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo; + SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL + SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) ); + SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); + SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32 + SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) ); + SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe ); + SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT, + DAG.getConstant( 0x4530000000100000ULL, LONGVT ) ); + hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c ); + return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo ); + + } else { + SDValue clz = genCLZu64(x, DAG); + SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); + SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); + + // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2 + SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, + DAG.getConstant( (1023+63), INTVT), clz ); + SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo ); + exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, + mash, exp, mash ); // exp = exp, or 0 if input was 0 + + // Normalize frac + SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT, + clz, DAG.getConstant( 31, INTVT ) ); + SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT, + DAG.getConstant( 32, INTVT ), clz31 ); + SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 ); + SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift ); + t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 ); + SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 ); + SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); + SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); + SDValue rlo2 = DAG.getConstant( 0, INTVT ); + SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT, + clz, DAG.getConstant( 32, INTVT ) ); + SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, + clz32, rhi2, rhi1 ); + SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, + clz32, rlo2, rlo1 ); + + // Eliminate hidden bit + rhi = DAG.getNode( ISD::AND, DL, INTVT, + rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); + + // Save bits needed to round properly + SDValue round = DAG.getNode( ISD::AND, DL, INTVT, + rlo, DAG.getConstant( 0x7ff, INTVT ) ); + + // Pack exponent and frac + rlo = DAG.getNode( ISD::SRL, DL, INTVT, + rlo, DAG.getConstant( 11, INTVT ) ); + SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT, + rhi, DAG.getConstant( (32 - 11), INTVT ) ); + rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp ); + rhi = DAG.getNode( ISD::SRL, DL, INTVT, + rhi, DAG.getConstant( 11, INTVT ) ); + exp = DAG.getNode( ISD::SHL, DL, INTVT, + exp, DAG.getConstant( 20, INTVT ) ); + rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); + + // Compute rounding bit + SDValue even = DAG.getNode( ISD::AND, DL, INTVT, + rlo, DAG.getConstant( 1, INTVT ) ); + SDValue grs = DAG.getNode( ISD::AND, DL, INTVT, + round, DAG.getConstant( 0x3ff, INTVT ) ); + grs = DAG.getNode( AMDILISD::CMP, DL, INTVT, + DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32), + grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none + grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even ); + round = DAG.getNode( ISD::SRL, DL, INTVT, + round, DAG.getConstant( 10, INTVT ) ); + round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1 + + // Add rounding bit + SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, + round, DAG.getConstant( 0, INTVT ) ); + SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); + res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround ); + return DAG.getNode(ISDBITCAST, DL, LHSVT, res); + } +} +SDValue +AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const +{ + SDValue RHS = Op.getOperand(0); + EVT RHSVT = RHS.getValueType(); + MVT RST = RHSVT.getScalarType().getSimpleVT(); + EVT LHSVT = Op.getValueType(); + MVT LST = LHSVT.getScalarType().getSimpleVT(); + DebugLoc DL = Op.getDebugLoc(); + SDValue DST; + EVT INTVT; + EVT LONGVT; + const AMDILTargetMachine* + amdtm = reinterpret_cast<const AMDILTargetMachine*> + (&this->getTargetMachine()); + const AMDILSubtarget* + stm = dynamic_cast<const AMDILSubtarget*>( + amdtm->getSubtargetImpl()); + if (LST == MVT::f64 && LHSVT.isVector() + && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + // We dont support vector 64bit floating point convertions. + DST = Op; + for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { + SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); + op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); + if (!x) { + DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); + } else { + DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, + op, DAG.getTargetConstant(x, MVT::i32)); + } + + } + } else { + + if (RST == MVT::i32 + && LST == MVT::f64) { + if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + DST = SDValue(Op.getNode(), 0); + } else { + DST = genu32tof64(RHS, LHSVT, DAG); + } + } else if (RST == MVT::i64 + && LST == MVT::f64) { + DST = genu64tof64(RHS, LHSVT, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const +{ + SDValue RHS = Op.getOperand(0); + EVT RHSVT = RHS.getValueType(); + MVT RST = RHSVT.getScalarType().getSimpleVT(); + EVT INTVT; + EVT LONGVT; + SDValue DST; + bool isVec = RHSVT.isVector(); + DebugLoc DL = Op.getDebugLoc(); + EVT LHSVT = Op.getValueType(); + MVT LST = LHSVT.getScalarType().getSimpleVT(); + const AMDILTargetMachine* + amdtm = reinterpret_cast<const AMDILTargetMachine*> + (&this->getTargetMachine()); + const AMDILSubtarget* + stm = dynamic_cast<const AMDILSubtarget*>( + amdtm->getSubtargetImpl()); + if (LST == MVT::f64 && LHSVT.isVector() + && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + // We dont support vector 64bit floating point convertions. + for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { + SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); + op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); + if (!x) { + DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); + } else { + DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, + op, DAG.getTargetConstant(x, MVT::i32)); + } + + } + } else { + + if (isVec) { + LONGVT = EVT(MVT::getVectorVT(MVT::i64, + RHSVT.getVectorNumElements())); + INTVT = EVT(MVT::getVectorVT(MVT::i32, + RHSVT.getVectorNumElements())); + } else { + LONGVT = EVT(MVT::i64); + INTVT = EVT(MVT::i32); + } + MVT RST = RHSVT.getScalarType().getSimpleVT(); + if ((RST == MVT::i32 || RST == MVT::i64) + && LST == MVT::f64) { + if (RST == MVT::i32) { + if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + DST = SDValue(Op.getNode(), 0); + return DST; + } + } + SDValue c31 = DAG.getConstant( 31, INTVT ); + SDValue cSbit = DAG.getConstant( 0x80000000, INTVT ); + + SDValue S; // Sign, as 0 or -1 + SDValue Sbit; // Sign bit, as one bit, MSB only. + if (RST == MVT::i32) { + Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit ); + S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 ); + } else { // 64-bit case... SRA of 64-bit values is slow + SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS ); + Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit ); + SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 ); + S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp ); + } + + // get abs() of input value, given sign as S (0 or -1) + // SpI = RHS + S + SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S); + // SpIxS = SpI ^ S + SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S); + + // Convert unsigned value to double precision + SDValue R; + if (RST == MVT::i32) { + // r = cast_u32_to_f64(SpIxS) + R = genu32tof64(SpIxS, LHSVT, DAG); + } else { + // r = cast_u64_to_f64(SpIxS) + R = genu64tof64(SpIxS, LHSVT, DAG); + } + + // drop in the sign bit + SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R ); + SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t ); + SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t ); + thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit ); + t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi ); + DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t ); + } else { + DST = SDValue(Op.getNode(), 0); + } + } + return DST; +} +SDValue +AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const +{ + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue DST; + bool isVec = RHS.getValueType().isVector(); + if (OVT.getScalarType() == MVT::i64) { + /*const AMDILTargetMachine* + amdtm = reinterpret_cast<const AMDILTargetMachine*> + (&this->getTargetMachine()); + const AMDILSubtarget* + stm = dynamic_cast<const AMDILSubtarget*>( + amdtm->getSubtargetImpl());*/ + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i64) { + INTTY = MVT::v2i32; + } + SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; + // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 + LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); + RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); + LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); + RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); + INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO); + INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI); + //TODO: need to use IBORROW on HD5XXX and later hardware + SDValue cmp; + if (OVT == MVT::i64) { + cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), + LHSLO, RHSLO); + } else { + SDValue cmplo; + SDValue cmphi; + SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32)); + SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32)); + SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32)); + SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32)); + cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, + DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), + LHSRLO, RHSRLO); + cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, + DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), + LHSRHI, RHSRHI); + cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo); + cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32, + cmp, cmphi, DAG.getTargetConstant(1, MVT::i32)); + } + INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); + DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, + INTLO, INTHI); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} +SDValue +AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const +{ + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::f64) { + DST = LowerFDIV64(Op, DAG); + } else if (OVT.getScalarType() == MVT::f32) { + DST = LowerFDIV32(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const +{ + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::i64) { + DST = LowerSDIV64(Op, DAG); + } else if (OVT.getScalarType() == MVT::i32) { + DST = LowerSDIV32(Op, DAG); + } else if (OVT.getScalarType() == MVT::i16 + || OVT.getScalarType() == MVT::i8) { + DST = LowerSDIV24(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const +{ + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::i64) { + DST = LowerUDIV64(Op, DAG); + } else if (OVT.getScalarType() == MVT::i32) { + DST = LowerUDIV32(Op, DAG); + } else if (OVT.getScalarType() == MVT::i16 + || OVT.getScalarType() == MVT::i8) { + DST = LowerUDIV24(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const +{ + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::i64) { + DST = LowerSREM64(Op, DAG); + } else if (OVT.getScalarType() == MVT::i32) { + DST = LowerSREM32(Op, DAG); + } else if (OVT.getScalarType() == MVT::i16) { + DST = LowerSREM16(Op, DAG); + } else if (OVT.getScalarType() == MVT::i8) { + DST = LowerSREM8(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const +{ + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::i64) { + DST = LowerUREM64(Op, DAG); + } else if (OVT.getScalarType() == MVT::i32) { + DST = LowerUREM32(Op, DAG); + } else if (OVT.getScalarType() == MVT::i16) { + DST = LowerUREM16(Op, DAG); + } else if (OVT.getScalarType() == MVT::i8) { + DST = LowerUREM8(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue DST; + bool isVec = OVT.isVector(); + if (OVT.getScalarType() != MVT::i64) + { + DST = SDValue(Op.getNode(), 0); + } else { + assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!"); + // TODO: This needs to be turned into a tablegen pattern + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i64) { + INTTY = MVT::v2i32; + } + // mul64(h1, l1, h0, l0) + SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, + DL, + INTTY, LHS); + SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, + DL, + INTTY, LHS); + SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, + DL, + INTTY, RHS); + SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, + DL, + INTTY, RHS); + // MULLO_UINT_1 r1, h0, l1 + SDValue RHILLO = DAG.getNode(AMDILISD::UMUL, + DL, + INTTY, RHSHI, LHSLO); + // MULLO_UINT_1 r2, h1, l0 + SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL, + DL, + INTTY, RHSLO, LHSHI); + // ADD_INT hr, r1, r2 + SDValue ADDHI = DAG.getNode(ISD::ADD, + DL, + INTTY, RHILLO, RLOHHI); + // MULHI_UINT_1 r3, l1, l0 + SDValue RLOLLO = DAG.getNode(ISD::MULHU, + DL, + INTTY, RHSLO, LHSLO); + // ADD_INT hr, hr, r3 + SDValue HIGH = DAG.getNode(ISD::ADD, + DL, + INTTY, ADDHI, RLOLLO); + // MULLO_UINT_1 l3, l1, l0 + SDValue LOW = DAG.getNode(AMDILISD::UMUL, + DL, + INTTY, LHSLO, RHSLO); + DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, + DL, + OVT, LOW, HIGH); + } + return DST; +} +SDValue +AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const +{ + EVT VT = Op.getValueType(); + //printSDValue(Op, 1); + SDValue Nodes1; + SDValue second; + SDValue third; + SDValue fourth; + DebugLoc DL = Op.getDebugLoc(); + Nodes1 = DAG.getNode(AMDILISD::VBUILD, + DL, + VT, Op.getOperand(0)); + bool allEqual = true; + for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { + if (Op.getOperand(0) != Op.getOperand(x)) { + allEqual = false; + break; + } + } + if (allEqual) { + return Nodes1; + } + switch(Op.getNumOperands()) { + default: + case 1: + break; + case 4: + fourth = Op.getOperand(3); + if (fourth.getOpcode() != ISD::UNDEF) { + Nodes1 = DAG.getNode( + ISD::INSERT_VECTOR_ELT, + DL, + Op.getValueType(), + Nodes1, + fourth, + DAG.getConstant(7, MVT::i32)); + } + case 3: + third = Op.getOperand(2); + if (third.getOpcode() != ISD::UNDEF) { + Nodes1 = DAG.getNode( + ISD::INSERT_VECTOR_ELT, + DL, + Op.getValueType(), + Nodes1, + third, + DAG.getConstant(6, MVT::i32)); + } + case 2: + second = Op.getOperand(1); + if (second.getOpcode() != ISD::UNDEF) { + Nodes1 = DAG.getNode( + ISD::INSERT_VECTOR_ELT, + DL, + Op.getValueType(), + Nodes1, + second, + DAG.getConstant(5, MVT::i32)); + } + break; + }; + return Nodes1; +} + +SDValue +AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + const SDValue *ptr = NULL; + const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + uint32_t swizzleNum = 0; + SDValue DST; + if (!VT.isVector()) { + SDValue Res = Op.getOperand(0); + return Res; + } + + if (Op.getOperand(1).getOpcode() != ISD::UNDEF) { + ptr = &Op.getOperand(1); + } else { + ptr = &Op.getOperand(0); + } + if (CSDN) { + swizzleNum = (uint32_t)CSDN->getZExtValue(); + uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); + uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); + DST = DAG.getNode(AMDILISD::VINSERT, + DL, + VT, + Op.getOperand(0), + *ptr, + DAG.getTargetConstant(mask2, MVT::i32), + DAG.getTargetConstant(mask3, MVT::i32)); + } else { + uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); + uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); + SDValue res = DAG.getNode(AMDILISD::VINSERT, + DL, VT, Op.getOperand(0), *ptr, + DAG.getTargetConstant(mask2, MVT::i32), + DAG.getTargetConstant(mask3, MVT::i32)); + for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) { + mask2 = 0x04030201 & ~(0xFF << (x * 8)); + mask3 = 0x01010101 & (0xFF << (x * 8)); + SDValue t = DAG.getNode(AMDILISD::VINSERT, + DL, VT, Op.getOperand(0), *ptr, + DAG.getTargetConstant(mask2, MVT::i32), + DAG.getTargetConstant(mask3, MVT::i32)); + SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(), + DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), + Op.getOperand(2), DAG.getConstant(x, MVT::i32)); + c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c); + res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res); + } + DST = res; + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const +{ + EVT VT = Op.getValueType(); + //printSDValue(Op, 1); + const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + uint64_t swizzleNum = 0; + DebugLoc DL = Op.getDebugLoc(); + SDValue Res; + if (!Op.getOperand(0).getValueType().isVector()) { + Res = Op.getOperand(0); + return Res; + } + if (CSDN) { + // Static vector extraction + swizzleNum = CSDN->getZExtValue() + 1; + Res = DAG.getNode(AMDILISD::VEXTRACT, + DL, VT, + Op.getOperand(0), + DAG.getTargetConstant(swizzleNum, MVT::i32)); + } else { + SDValue Op1 = Op.getOperand(1); + uint32_t vecSize = 4; + SDValue Op0 = Op.getOperand(0); + SDValue res = DAG.getNode(AMDILISD::VEXTRACT, + DL, VT, Op0, + DAG.getTargetConstant(1, MVT::i32)); + if (Op0.getValueType().isVector()) { + vecSize = Op0.getValueType().getVectorNumElements(); + } + for (uint32_t x = 2; x <= vecSize; ++x) { + SDValue t = DAG.getNode(AMDILISD::VEXTRACT, + DL, VT, Op0, + DAG.getTargetConstant(x, MVT::i32)); + SDValue c = DAG.getNode(AMDILISD::CMP, + DL, Op1.getValueType(), + DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), + Op1, DAG.getConstant(x, MVT::i32)); + res = DAG.getNode(AMDILISD::CMOVLOG, DL, + VT, c, t, res); + + } + Res = res; + } + return Res; +} + +SDValue +AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, + SelectionDAG &DAG) const +{ + uint32_t vecSize = Op.getValueType().getVectorNumElements(); + SDValue src = Op.getOperand(0); + const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + uint64_t offset = 0; + EVT vecType = Op.getValueType().getVectorElementType(); + DebugLoc DL = Op.getDebugLoc(); + SDValue Result; + if (CSDN) { + offset = CSDN->getZExtValue(); + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL,vecType, src, DAG.getConstant(offset, MVT::i32)); + Result = DAG.getNode(AMDILISD::VBUILD, DL, + Op.getValueType(), Result); + for (uint32_t x = 1; x < vecSize; ++x) { + SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, + src, DAG.getConstant(offset + x, MVT::i32)); + if (elt.getOpcode() != ISD::UNDEF) { + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, + Op.getValueType(), Result, elt, + DAG.getConstant(x, MVT::i32)); + } + } + } else { + SDValue idx = Op.getOperand(1); + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, vecType, src, idx); + Result = DAG.getNode(AMDILISD::VBUILD, DL, + Op.getValueType(), Result); + for (uint32_t x = 1; x < vecSize; ++x) { + idx = DAG.getNode(ISD::ADD, DL, vecType, + idx, DAG.getConstant(1, MVT::i32)); + SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, + src, idx); + if (elt.getOpcode() != ISD::UNDEF) { + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, + Op.getValueType(), Result, elt, idx); + } + } + } + return Result; +} +SDValue +AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, + SelectionDAG &DAG) const +{ + SDValue Res = DAG.getNode(AMDILISD::VBUILD, + Op.getDebugLoc(), + Op.getValueType(), + Op.getOperand(0)); + return Res; +} +SDValue +AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const +{ + SDValue andOp; + andOp = DAG.getNode( + AMDILISD::AND, + Op.getDebugLoc(), + Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1)); + return andOp; +} +SDValue +AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const +{ + SDValue orOp; + orOp = DAG.getNode(AMDILISD::OR, + Op.getDebugLoc(), + Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1)); + return orOp; +} +SDValue +AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Cond = Op.getOperand(0); + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + Cond = getConversionNode(DAG, Cond, Op, true); + Cond = DAG.getNode(AMDILISD::CMOVLOG, + DL, + Op.getValueType(), Cond, LHS, RHS); + return Cond; +} +SDValue +AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Cond; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TRUE = Op.getOperand(2); + SDValue FALSE = Op.getOperand(3); + SDValue CC = Op.getOperand(4); + DebugLoc DL = Op.getDebugLoc(); + bool skipCMov = false; + bool genINot = false; + EVT OVT = Op.getValueType(); + + // Check for possible elimination of cmov + if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) { + const ConstantSDNode *trueConst + = dyn_cast<ConstantSDNode>( TRUE.getNode() ); + const ConstantSDNode *falseConst + = dyn_cast<ConstantSDNode>( FALSE.getNode() ); + if (trueConst && falseConst) { + // both possible result values are constants + if (trueConst->isAllOnesValue() + && falseConst->isNullValue()) { // and convenient constants + skipCMov = true; + } + else if (trueConst->isNullValue() + && falseConst->isAllOnesValue()) { // less convenient + skipCMov = true; + genINot = true; + } + } + } + ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); + unsigned int AMDILCC = CondCCodeToCC( + SetCCOpcode, + LHS.getValueType().getSimpleVT().SimpleTy); + assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); + Cond = DAG.getNode( + AMDILISD::CMP, + DL, + LHS.getValueType(), + DAG.getConstant(AMDILCC, MVT::i32), + LHS, + RHS); + Cond = getConversionNode(DAG, Cond, Op, true); + if (genINot) { + Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond); + } + if (!skipCMov) { + Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE); + } + return Cond; +} +SDValue +AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Cond; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); + unsigned int AMDILCC = CondCCodeToCC( + SetCCOpcode, + LHS.getValueType().getSimpleVT().SimpleTy); + assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); + Cond = DAG.getNode( + AMDILISD::CMP, + DL, + LHS.getValueType(), + DAG.getConstant(AMDILCC, MVT::i32), + LHS, + RHS); + Cond = getConversionNode(DAG, Cond, Op, true); + Cond = DAG.getNode( + ISD::AND, + DL, + Cond.getValueType(), + DAG.getConstant(1, Cond.getValueType()), + Cond); + return Cond; +} + +SDValue +AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Data = Op.getOperand(0); + VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); + DebugLoc DL = Op.getDebugLoc(); + EVT DVT = Data.getValueType(); + EVT BVT = BaseType->getVT(); + unsigned baseBits = BVT.getScalarType().getSizeInBits(); + unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; + unsigned shiftBits = srcBits - baseBits; + if (srcBits < 32) { + // If the op is less than 32 bits, then it needs to extend to 32bits + // so it can properly keep the upper bits valid. + EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); + Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); + shiftBits = 32 - baseBits; + DVT = IVT; + } + SDValue Shift = DAG.getConstant(shiftBits, DVT); + // Shift left by 'Shift' bits. + Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); + // Signed shift Right by 'Shift' bits. + Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); + if (srcBits < 32) { + // Once the sign extension is done, the op needs to be converted to + // its original type. + Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); + } + return Data; +} +EVT +AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const +{ + int iSize = (size * numEle); + int vEle = (iSize >> ((size == 64) ? 6 : 5)); + if (!vEle) { + vEle = 1; + } + if (size == 64) { + if (vEle == 1) { + return EVT(MVT::i64); + } else { + return EVT(MVT::getVectorVT(MVT::i64, vEle)); + } + } else { + if (vEle == 1) { + return EVT(MVT::i32); + } else { + return EVT(MVT::getVectorVT(MVT::i32, vEle)); + } + } +} + +SDValue +AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Src = Op.getOperand(0); + SDValue Dst = Op; + SDValue Res; + DebugLoc DL = Op.getDebugLoc(); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Dst.getValueType(); + // Lets bitcast the floating point types to an + // equivalent integer type before converting to vectors. + if (SrcVT.getScalarType().isFloatingPoint()) { + Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType( + SrcVT.getScalarType().getSimpleVT().getSizeInBits(), + SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1), + Src); + SrcVT = Src.getValueType(); + } + uint32_t ScalarSrcSize = SrcVT.getScalarType() + .getSimpleVT().getSizeInBits(); + uint32_t ScalarDstSize = DstVT.getScalarType() + .getSimpleVT().getSizeInBits(); + uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1; + bool isVec = SrcVT.isVector(); + if (DstVT.getScalarType().isInteger() && + (SrcVT.getScalarType().isInteger() + || SrcVT.getScalarType().isFloatingPoint())) { + if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16) + || (ScalarSrcSize == 64 + && DstNumEle == 4 + && ScalarDstSize == 16)) { + // This is the problematic case when bitcasting i64 <-> <4 x i16> + // This approach is a little different as we cannot generate a + // <4 x i64> vector + // as that is illegal in our backend and we are already past + // the DAG legalizer. + // So, in this case, we will do the following conversion. + // Case 1: + // %dst = <4 x i16> %src bitconvert i64 ==> + // %tmp = <4 x i16> %src convert <4 x i32> + // %tmp = <4 x i32> %tmp and 0xFFFF + // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16> + // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw + // %dst = <2 x i32> %tmp bitcast i64 + // case 2: + // %dst = i64 %src bitconvert <4 x i16> ==> + // %tmp = i64 %src bitcast <2 x i32> + // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy + // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16> + // %tmp = <4 x i32> %tmp and 0xFFFF + // %dst = <4 x i16> %tmp bitcast <4 x i32> + SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32, + DAG.getConstant(0xFFFF, MVT::i32)); + SDValue const16 = DAG.getConstant(16, MVT::i32); + if (ScalarDstSize == 64) { + // case 1 + Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32); + Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask); + SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, + Op, DAG.getConstant(0, MVT::i32)); + SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, + Op, DAG.getConstant(1, MVT::i32)); + y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16); + SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, + Op, DAG.getConstant(2, MVT::i32)); + SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, + Op, DAG.getConstant(3, MVT::i32)); + w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16); + x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y); + y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w); + Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y); + return Res; + } else { + // case 2 + SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src); + SDValue lor16 + = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16); + SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src); + SDValue hir16 + = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16); + SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL, + MVT::v4i32, lo); + SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, + getPointerTy(), DAG.getConstant(1, MVT::i32)); + resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, + resVec, lor16, idxVal); + idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, + getPointerTy(), DAG.getConstant(2, MVT::i32)); + resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, + resVec, hi, idxVal); + idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, + getPointerTy(), DAG.getConstant(3, MVT::i32)); + resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, + resVec, hir16, idxVal); + resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask); + Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16); + return Res; + } + } else { + // There are four cases we need to worry about for bitcasts + // where the size of all + // source, intermediates and result is <= 128 bits, unlike + // the above case + // 1) Sub32bit bitcast 32bitAlign + // %dst = <4 x i8> bitcast i32 + // (also <[2|4] x i16> to <[2|4] x i32>) + // 2) 32bitAlign bitcast Sub32bit + // %dst = i32 bitcast <4 x i8> + // 3) Sub32bit bitcast LargerSub32bit + // %dst = <2 x i8> bitcast i16 + // (also <4 x i8> to <2 x i16>) + // 4) Sub32bit bitcast SmallerSub32bit + // %dst = i16 bitcast <2 x i8> + // (also <2 x i16> to <4 x i8>) + // This also only handles types that are powers of two + if ((ScalarDstSize & (ScalarDstSize - 1)) + || (ScalarSrcSize & (ScalarSrcSize - 1))) { + } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) { + // case 1: + EVT IntTy = genIntType(ScalarDstSize, SrcNumEle); +#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors + SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy); +#else + SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, + DAG.getUNDEF(IntTy.getScalarType())); + for (uint32_t x = 0; x < SrcNumEle; ++x) { + SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, + getPointerTy(), DAG.getConstant(x, MVT::i32)); + SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + SrcVT.getScalarType(), Src, + DAG.getConstant(x, MVT::i32)); + temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType()); + res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy, + res, temp, idx); + } +#endif + SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, + DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32)); + SDValue *newEle = new SDValue[SrcNumEle]; + res = DAG.getNode(ISD::AND, DL, IntTy, res, mask); + for (uint32_t x = 0; x < SrcNumEle; ++x) { + newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + IntTy.getScalarType(), res, + DAG.getConstant(x, MVT::i32)); + } + uint32_t Ratio = SrcNumEle / DstNumEle; + for (uint32_t x = 0; x < SrcNumEle; ++x) { + if (x % Ratio) { + newEle[x] = DAG.getNode(ISD::SHL, DL, + IntTy.getScalarType(), newEle[x], + DAG.getConstant(ScalarSrcSize * (x % Ratio), + MVT::i32)); + } + } + for (uint32_t x = 0; x < SrcNumEle; x += 2) { + newEle[x] = DAG.getNode(ISD::OR, DL, + IntTy.getScalarType(), newEle[x], newEle[x + 1]); + } + if (ScalarSrcSize == 8) { + for (uint32_t x = 0; x < SrcNumEle; x += 4) { + newEle[x] = DAG.getNode(ISD::OR, DL, + IntTy.getScalarType(), newEle[x], newEle[x + 2]); + } + if (DstNumEle == 1) { + Dst = newEle[0]; + } else { + Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, + newEle[0]); + for (uint32_t x = 1; x < DstNumEle; ++x) { + SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, + getPointerTy(), DAG.getConstant(x, MVT::i32)); + Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, + DstVT, Dst, newEle[x * 4], idx); + } + } + } else { + if (DstNumEle == 1) { + Dst = newEle[0]; + } else { + Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, + newEle[0]); + for (uint32_t x = 1; x < DstNumEle; ++x) { + SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, + getPointerTy(), DAG.getConstant(x, MVT::i32)); + Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, + DstVT, Dst, newEle[x * 2], idx); + } + } + } + delete [] newEle; + return Dst; + } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) { + // case 2: + EVT IntTy = genIntType(ScalarSrcSize, DstNumEle); + SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, + DAG.getUNDEF(IntTy.getScalarType())); + uint32_t mult = (ScalarDstSize == 8) ? 4 : 2; + for (uint32_t x = 0; x < SrcNumEle; ++x) { + for (uint32_t y = 0; y < mult; ++y) { + SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, + getPointerTy(), + DAG.getConstant(x * mult + y, MVT::i32)); + SDValue t; + if (SrcNumEle > 1) { + t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, SrcVT.getScalarType(), Src, + DAG.getConstant(x, MVT::i32)); + } else { + t = Src; + } + if (y != 0) { + t = DAG.getNode(ISD::SRL, DL, t.getValueType(), + t, DAG.getConstant(y * ScalarDstSize, + MVT::i32)); + } + vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, + DL, IntTy, vec, t, idx); + } + } + Dst = DAG.getSExtOrTrunc(vec, DL, DstVT); + return Dst; + } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) { + // case 3: + SDValue *numEle = new SDValue[SrcNumEle]; + for (uint32_t x = 0; x < SrcNumEle; ++x) { + numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + MVT::i8, Src, DAG.getConstant(x, MVT::i32)); + numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16); + numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x], + DAG.getConstant(0xFF, MVT::i16)); + } + for (uint32_t x = 1; x < SrcNumEle; x += 2) { + numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x], + DAG.getConstant(8, MVT::i16)); + numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16, + numEle[x-1], numEle[x]); + } + if (DstNumEle > 1) { + // If we are not a scalar i16, the only other case is a + // v2i16 since we can't have v8i8 at this point, v4i16 + // cannot be generated + Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16, + numEle[0]); + SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, + getPointerTy(), DAG.getConstant(1, MVT::i32)); + Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16, + Dst, numEle[2], idx); + } else { + Dst = numEle[0]; + } + delete [] numEle; + return Dst; + } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) { + // case 4: + SDValue *numEle = new SDValue[DstNumEle]; + for (uint32_t x = 0; x < SrcNumEle; ++x) { + numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + MVT::i16, Src, DAG.getConstant(x, MVT::i32)); + numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16, + numEle[x * 2], DAG.getConstant(8, MVT::i16)); + } + MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16; + Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]); + for (uint32_t x = 1; x < DstNumEle; ++x) { + SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, + getPointerTy(), DAG.getConstant(x, MVT::i32)); + Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty, + Dst, numEle[x], idx); + } + delete [] numEle; + ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8; + Res = DAG.getSExtOrTrunc(Dst, DL, ty); + return Res; + } + } + } + Res = DAG.getNode(AMDILISD::BITCONV, + Dst.getDebugLoc(), + Dst.getValueType(), Src); + return Res; +} + +SDValue +AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + unsigned int SPReg = AMDIL::SP; + DebugLoc DL = Op.getDebugLoc(); + SDValue SP = DAG.getCopyFromReg(Chain, + DL, + SPReg, MVT::i32); + SDValue NewSP = DAG.getNode(ISD::ADD, + DL, + MVT::i32, SP, Size); + Chain = DAG.getCopyToReg(SP.getValue(1), + DL, + SPReg, NewSP); + SDValue Ops[2] = {NewSP, Chain}; + Chain = DAG.getMergeValues(Ops, 2 ,DL); + return Chain; +} +SDValue +AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue Cond = Op.getOperand(1); + SDValue Jump = Op.getOperand(2); + SDValue Result; + Result = DAG.getNode( + AMDILISD::BRANCH_COND, + Op.getDebugLoc(), + Op.getValueType(), + Chain, Jump, Cond); + return Result; +} + +SDValue +AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1)); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue JumpT = Op.getOperand(4); + SDValue CmpValue; + ISD::CondCode CC = CCNode->get(); + SDValue Result; + unsigned int cmpOpcode = CondCCodeToCC( + CC, + LHS.getValueType().getSimpleVT().SimpleTy); + CmpValue = DAG.getNode( + AMDILISD::CMP, + Op.getDebugLoc(), + LHS.getValueType(), + DAG.getConstant(cmpOpcode, MVT::i32), + LHS, RHS); + Result = DAG.getNode( + AMDILISD::BRANCH_COND, + CmpValue.getDebugLoc(), + MVT::Other, Chain, + JumpT, CmpValue); + return Result; +} + +SDValue +AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Result = DAG.getNode( + AMDILISD::DP_TO_FP, + Op.getDebugLoc(), + Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1)); + return Result; +} + +SDValue +AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Result = DAG.getNode( + AMDILISD::VCONCAT, + Op.getDebugLoc(), + Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1)); + return Result; +} +// LowerRET - Lower an ISD::RET node. +SDValue +AMDILTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) +const +{ + //MachineFunction& MF = DAG.getMachineFunction(); + // CCValAssign - represent the assignment of the return value + // to a location + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slot + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + // Analyze return values of ISD::RET + CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { + if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { + MRI.addLiveOut(RVLocs[i].getLocReg()); + } + } + // FIXME: implement this when tail call is implemented + // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); + // both x86 and ppc implement this in ISelLowering + + // Regular return here + SDValue Flag; + SmallVector<SDValue, 6> RetOps; + RetOps.push_back(Chain); + RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); + for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign &VA = RVLocs[i]; + SDValue ValToCopy = OutVals[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + // ISD::Ret => ret chain, (regnum1, val1), ... + // So i * 2 + 1 index only the regnums + Chain = DAG.getCopyToReg(Chain, + dl, + VA.getLocReg(), + ValToCopy, + Flag); + // guarantee that all emitted copies are stuck together + // avoiding something bad + Flag = Chain.getValue(1); + } + /*if (MF.getFunction()->hasStructRetAttr()) { + assert(0 && "Struct returns are not yet implemented!"); + // Both MIPS and X86 have this + }*/ + RetOps[0] = Chain; + if (Flag.getNode()) + RetOps.push_back(Flag); + + Flag = DAG.getNode(AMDILISD::RET_FLAG, + dl, + MVT::Other, &RetOps[0], RetOps.size()); + return Flag; +} +void +AMDILTargetLowering::generateLongRelational(MachineInstr *MI, + unsigned int opCode) const +{ + MachineOperand DST = MI->getOperand(0); + MachineOperand LHS = MI->getOperand(2); + MachineOperand RHS = MI->getOperand(3); + unsigned int opi32Code = 0, si32Code = 0; + unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass; + uint32_t REGS[12]; + // All the relationals can be generated with with 6 temp registers + for (int x = 0; x < 12; ++x) { + REGS[x] = genVReg(simpleVT); + } + // Pull out the high and low components of each 64 bit register + generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg()); + generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg()); + generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg()); + generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg()); + // Determine the correct opcode that we should use + switch(opCode) { + default: + assert(!"comparison case not handled!"); + break; + case AMDIL::LEQ: + si32Code = opi32Code = AMDIL::IEQ; + break; + case AMDIL::LNE: + si32Code = opi32Code = AMDIL::INE; + break; + case AMDIL::LLE: + case AMDIL::ULLE: + case AMDIL::LGE: + case AMDIL::ULGE: + if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) { + std::swap(REGS[0], REGS[2]); + } else { + std::swap(REGS[1], REGS[3]); + } + if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) { + opi32Code = AMDIL::ILT; + } else { + opi32Code = AMDIL::ULT; + } + si32Code = AMDIL::UGE; + break; + case AMDIL::LGT: + case AMDIL::ULGT: + std::swap(REGS[0], REGS[2]); + std::swap(REGS[1], REGS[3]); + case AMDIL::LLT: + case AMDIL::ULLT: + if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) { + opi32Code = AMDIL::ILT; + } else { + opi32Code = AMDIL::ULT; + } + si32Code = AMDIL::ULT; + break; + }; + // Do the initial opcode on the high and low components. + // This leaves the following: + // REGS[4] = L_HI OP R_HI + // REGS[5] = L_LO OP R_LO + generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]); + generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]); + switch(opi32Code) { + case AMDIL::IEQ: + case AMDIL::INE: + { + // combine the results with an and or or depending on if + // we are eq or ne + uint32_t combineOp = (opi32Code == AMDIL::IEQ) + ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32; + generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]); + } + break; + default: + // this finishes codegen for the following pattern + // REGS[4] || (REGS[5] && (L_HI == R_HI)) + generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]); + generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5], + REGS[9]); + generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4], + REGS[10]); + break; + } + generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]); +} + +unsigned int +AMDILTargetLowering::getFunctionAlignment(const Function *) const +{ + return 0; +} + +void +AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB, + MachineBasicBlock::iterator &BBI, + DebugLoc *DL, const TargetInstrInfo *TII) const +{ + mBB = BB; + mBBI = BBI; + mDL = DL; + mTII = TII; +} +uint32_t +AMDILTargetLowering::genVReg(uint32_t regType) const +{ + return mBB->getParent()->getRegInfo().createVirtualRegister( + getRegClassFromID(regType)); +} + +MachineInstrBuilder +AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const +{ + return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst); +} + +MachineInstrBuilder +AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, + uint32_t src1) const +{ + return generateMachineInst(opcode, dst).addReg(src1); +} + +MachineInstrBuilder +AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, + uint32_t src1, uint32_t src2) const +{ + return generateMachineInst(opcode, dst, src1).addReg(src2); +} + +MachineInstrBuilder +AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, + uint32_t src1, uint32_t src2, uint32_t src3) const +{ + return generateMachineInst(opcode, dst, src1, src2).addReg(src3); +} + + +SDValue +AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + MVT INTTY; + MVT FLTTY; + if (!OVT.isVector()) { + INTTY = MVT::i32; + FLTTY = MVT::f32; + } else if (OVT.getVectorNumElements() == 2) { + INTTY = MVT::v2i32; + FLTTY = MVT::v2f32; + } else if (OVT.getVectorNumElements() == 4) { + INTTY = MVT::v4i32; + FLTTY = MVT::v4f32; + } + unsigned bitsize = OVT.getScalarType().getSizeInBits(); + // char|short jq = ia ^ ib; + SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); + + // jq = jq >> (bitsize - 2) + jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); + + // jq = jq | 0x1 + jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); + + // jq = (int)jq + jq = DAG.getSExtOrTrunc(jq, DL, INTTY); + + // int ia = (int)LHS; + SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); + + // int ib, (int)RHS; + SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); + + // float fa = (float)ia; + SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); + + // float fb = (float)ib; + SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); + + // float fq = native_divide(fa, fb); + SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); + + // fq = trunc(fq); + fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); + + // float fqneg = -fq; + SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); + + // float fr = mad(fqneg, fb, fa); + SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); + + // int iq = (int)fq; + SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); + + // fr = fabs(fr); + fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); + + // fb = fabs(fb); + fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); + + // int cv = fr >= fb; + SDValue cv; + if (INTTY == MVT::i32) { + cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); + } else { + cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); + } + // jq = (cv ? jq : 0); + jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, + DAG.getConstant(0, OVT)); + // dst = iq + jq; + iq = DAG.getSExtOrTrunc(iq, DL, OVT); + iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); + return iq; +} + +SDValue +AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerSDIV32 function generates equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // ilt r10, r0, 0 + // ilt r11, r1, 0 + // iadd r0, r0, r10 + // iadd r1, r1, r11 + // ixor r0, r0, r10 + // ixor r1, r1, r11 + // udiv r0, r0, r1 + // ixor r10, r10, r11 + // iadd r0, r0, r10 + // ixor DST, r0, r10 + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // ilt r10, r0, 0 + SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, + DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), + r0, DAG.getConstant(0, OVT)); + + // ilt r11, r1, 0 + SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, + DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), + r1, DAG.getConstant(0, OVT)); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // iadd r1, r1, r11 + r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + + // ixor r0, r0, r10 + r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + + // ixor r1, r1, r11 + r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + + // udiv r0, r0, r1 + r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); + + // ixor r10, r10, r11 + r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // ixor DST, r0, r10 + SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + return DST; +} + +SDValue +AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const +{ + return SDValue(Op.getNode(), 0); +} + +SDValue +AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + MVT INTTY; + MVT FLTTY; + if (!OVT.isVector()) { + INTTY = MVT::i32; + FLTTY = MVT::f32; + } else if (OVT.getVectorNumElements() == 2) { + INTTY = MVT::v2i32; + FLTTY = MVT::v2f32; + } else if (OVT.getVectorNumElements() == 4) { + INTTY = MVT::v4i32; + FLTTY = MVT::v4f32; + } + + // The LowerUDIV24 function implements the following CL. + // int ia = (int)LHS + // float fa = (float)ia + // int ib = (int)RHS + // float fb = (float)ib + // float fq = native_divide(fa, fb) + // fq = trunc(fq) + // float t = mad(fq, fb, fb) + // int iq = (int)fq - (t <= fa) + // return (type)iq + + // int ia = (int)LHS + SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY); + + // float fa = (float)ia + SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); + + // int ib = (int)RHS + SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY); + + // float fb = (float)ib + SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); + + // float fq = native_divide(fa, fb) + SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); + + // fq = trunc(fq) + fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); + + // float t = mad(fq, fb, fb) + SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb); + + // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1 + SDValue iq; + fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); + if (INTTY == MVT::i32) { + iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); + } else { + iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); + } + iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq); + + + // return (type)iq + iq = DAG.getZExtOrTrunc(iq, DL, OVT); + return iq; + +} + +SDValue +AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const +{ + return SDValue(Op.getNode(), 0); +} + +SDValue +AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const +{ + return SDValue(Op.getNode(), 0); +} +SDValue +AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i8) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4i8) { + INTTY = MVT::v4i32; + } + SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); + SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); + LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); + LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); + return LHS; +} + +SDValue +AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i16) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4i16) { + INTTY = MVT::v4i32; + } + SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); + SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); + LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); + LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); + return LHS; +} + +SDValue +AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerSREM32 function generates equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // ilt r10, r0, 0 + // ilt r11, r1, 0 + // iadd r0, r0, r10 + // iadd r1, r1, r11 + // ixor r0, r0, r10 + // ixor r1, r1, r11 + // udiv r20, r0, r1 + // umul r20, r20, r1 + // sub r0, r0, r20 + // iadd r0, r0, r10 + // ixor DST, r0, r10 + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // ilt r10, r0, 0 + SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, + DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), + r0, DAG.getConstant(0, OVT)); + + // ilt r11, r1, 0 + SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, + DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), + r1, DAG.getConstant(0, OVT)); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // iadd r1, r1, r11 + r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + + // ixor r0, r0, r10 + r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + + // ixor r1, r1, r11 + r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + + // udiv r20, r0, r1 + SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); + + // umul r20, r20, r1 + r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); + + // sub r0, r0, r20 + r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // ixor DST, r0, r10 + SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + return DST; +} + +SDValue +AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const +{ + return SDValue(Op.getNode(), 0); +} + +SDValue +AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i8) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4i8) { + INTTY = MVT::v4i32; + } + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerUREM8 function generates equivalent to the following IL. + // mov r0, as_u32(LHS) + // mov r1, as_u32(RHS) + // and r10, r0, 0xFF + // and r11, r1, 0xFF + // cmov_logical r3, r11, r11, 0x1 + // udiv r3, r10, r3 + // cmov_logical r3, r11, r3, 0 + // umul r3, r3, r11 + // sub r3, r10, r3 + // and as_u8(DST), r3, 0xFF + + // mov r0, as_u32(LHS) + SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY); + + // mov r1, as_u32(RHS) + SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY); + + // and r10, r0, 0xFF + SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0, + DAG.getConstant(0xFF, INTTY)); + + // and r11, r1, 0xFF + SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1, + DAG.getConstant(0xFF, INTTY)); + + // cmov_logical r3, r11, r11, 0x1 + SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11, + DAG.getConstant(0x01, INTTY)); + + // udiv r3, r10, r3 + r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); + + // cmov_logical r3, r11, r3, 0 + r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3, + DAG.getConstant(0, INTTY)); + + // umul r3, r3, r11 + r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11); + + // sub r3, r10, r3 + r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3); + + // and as_u8(DST), r3, 0xFF + SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3, + DAG.getConstant(0xFF, INTTY)); + DST = DAG.getZExtOrTrunc(DST, DL, OVT); + return DST; +} + +SDValue +AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i16) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4i16) { + INTTY = MVT::v4i32; + } + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerUREM16 function generatest equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // DIV = LowerUDIV16(LHS, RHS) + // and r10, r0, 0xFFFF + // and r11, r1, 0xFFFF + // cmov_logical r3, r11, r11, 0x1 + // udiv as_u16(r3), as_u32(r10), as_u32(r3) + // and r3, r3, 0xFFFF + // cmov_logical r3, r11, r3, 0 + // umul r3, r3, r11 + // sub r3, r10, r3 + // and DST, r3, 0xFFFF + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // and r10, r0, 0xFFFF + SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0, + DAG.getConstant(0xFFFF, OVT)); + + // and r11, r1, 0xFFFF + SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1, + DAG.getConstant(0xFFFF, OVT)); + + // cmov_logical r3, r11, r11, 0x1 + SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11, + DAG.getConstant(0x01, OVT)); + + // udiv as_u16(r3), as_u32(r10), as_u32(r3) + r10 = DAG.getZExtOrTrunc(r10, DL, INTTY); + r3 = DAG.getZExtOrTrunc(r3, DL, INTTY); + r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); + r3 = DAG.getZExtOrTrunc(r3, DL, OVT); + r10 = DAG.getZExtOrTrunc(r10, DL, OVT); + + // and r3, r3, 0xFFFF + r3 = DAG.getNode(ISD::AND, DL, OVT, r3, + DAG.getConstant(0xFFFF, OVT)); + + // cmov_logical r3, r11, r3, 0 + r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3, + DAG.getConstant(0, OVT)); + // umul r3, r3, r11 + r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11); + + // sub r3, r10, r3 + r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3); + + // and DST, r3, 0xFFFF + SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3, + DAG.getConstant(0xFFFF, OVT)); + return DST; +} + +SDValue +AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerUREM32 function generates equivalent to the following IL. + // udiv r20, LHS, RHS + // umul r20, r20, RHS + // sub DST, LHS, r20 + + // udiv r20, LHS, RHS + SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS); + + // umul r20, r20, RHS + r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS); + + // sub DST, LHS, r20 + SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20); + return DST; +} + +SDValue +AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const +{ + return SDValue(Op.getNode(), 0); +} + + +SDValue +AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2f32) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4f32) { + INTTY = MVT::v4i32; + } + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue DST; + const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( + &this->getTargetMachine())->getSubtargetImpl(); + if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { + // TODO: This doesn't work for vector types yet + // The LowerFDIV32 function generates equivalent to the following + // IL: + // mov r20, as_int(LHS) + // mov r21, as_int(RHS) + // and r30, r20, 0x7f800000 + // and r31, r20, 0x807FFFFF + // and r32, r21, 0x7f800000 + // and r33, r21, 0x807FFFFF + // ieq r40, r30, 0x7F800000 + // ieq r41, r31, 0x7F800000 + // ieq r42, r32, 0 + // ieq r43, r33, 0 + // and r50, r20, 0x80000000 + // and r51, r21, 0x80000000 + // ior r32, r32, 0x3f800000 + // ior r33, r33, 0x3f800000 + // cmov_logical r32, r42, r50, r32 + // cmov_logical r33, r43, r51, r33 + // cmov_logical r32, r40, r20, r32 + // cmov_logical r33, r41, r21, r33 + // ior r50, r40, r41 + // ior r51, r42, r43 + // ior r50, r50, r51 + // inegate r52, r31 + // iadd r30, r30, r52 + // cmov_logical r30, r50, 0, r30 + // div_zeroop(infinity) r21, 1.0, r33 + // mul_ieee r20, r32, r21 + // and r22, r20, 0x7FFFFFFF + // and r23, r20, 0x80000000 + // ishr r60, r22, 0x00000017 + // ishr r61, r30, 0x00000017 + // iadd r20, r20, r30 + // iadd r21, r22, r30 + // iadd r60, r60, r61 + // ige r42, 0, R60 + // ior r41, r23, 0x7F800000 + // ige r40, r60, 0x000000FF + // cmov_logical r40, r50, 0, r40 + // cmov_logical r20, r42, r23, r20 + // cmov_logical DST, r40, r41, r20 + // as_float(DST) + + // mov r20, as_int(LHS) + SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS); + + // mov r21, as_int(RHS) + SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS); + + // and r30, r20, 0x7f800000 + SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20, + DAG.getConstant(0x7F800000, INTTY)); + + // and r31, r21, 0x7f800000 + SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21, + DAG.getConstant(0x7f800000, INTTY)); + + // and r32, r20, 0x807FFFFF + SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20, + DAG.getConstant(0x807FFFFF, INTTY)); + + // and r33, r21, 0x807FFFFF + SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21, + DAG.getConstant(0x807FFFFF, INTTY)); + + // ieq r40, r30, 0x7F800000 + SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), + R30, DAG.getConstant(0x7F800000, INTTY)); + + // ieq r41, r31, 0x7F800000 + SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), + R31, DAG.getConstant(0x7F800000, INTTY)); + + // ieq r42, r30, 0 + SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), + R30, DAG.getConstant(0, INTTY)); + + // ieq r43, r31, 0 + SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), + R31, DAG.getConstant(0, INTTY)); + + // and r50, r20, 0x80000000 + SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20, + DAG.getConstant(0x80000000, INTTY)); + + // and r51, r21, 0x80000000 + SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21, + DAG.getConstant(0x80000000, INTTY)); + + // ior r32, r32, 0x3f800000 + R32 = DAG.getNode(ISD::OR, DL, INTTY, R32, + DAG.getConstant(0x3F800000, INTTY)); + + // ior r33, r33, 0x3f800000 + R33 = DAG.getNode(ISD::OR, DL, INTTY, R33, + DAG.getConstant(0x3F800000, INTTY)); + + // cmov_logical r32, r42, r50, r32 + R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32); + + // cmov_logical r33, r43, r51, r33 + R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33); + + // cmov_logical r32, r40, r20, r32 + R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32); + + // cmov_logical r33, r41, r21, r33 + R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33); + + // ior r50, r40, r41 + R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41); + + // ior r51, r42, r43 + R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43); + + // ior r50, r50, r51 + R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51); + + // inegate r52, r31 + SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31); + + // iadd r30, r30, r52 + R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52); + + // cmov_logical r30, r50, 0, r30 + R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, + DAG.getConstant(0, INTTY), R30); + + // div_zeroop(infinity) r21, 1.0, as_float(r33) + R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); + R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, + DAG.getConstantFP(1.0f, OVT), R33); + + // mul_ieee as_int(r20), as_float(r32), r21 + R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); + R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); + R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); + + // div_zeroop(infinity) r21, 1.0, as_float(r33) + R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); + R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, + DAG.getConstantFP(1.0f, OVT), R33); + + // mul_ieee as_int(r20), as_float(r32), r21 + R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); + R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); + R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); + + // and r22, r20, 0x7FFFFFFF + SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20, + DAG.getConstant(0x7FFFFFFF, INTTY)); + + // and r23, r20, 0x80000000 + SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20, + DAG.getConstant(0x80000000, INTTY)); + + // ishr r60, r22, 0x00000017 + SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22, + DAG.getConstant(0x00000017, INTTY)); + + // ishr r61, r30, 0x00000017 + SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30, + DAG.getConstant(0x00000017, INTTY)); + + // iadd r20, r20, r30 + R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30); + + // iadd r21, r22, r30 + R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30); + + // iadd r60, r60, r61 + R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61); + + // ige r42, 0, R60 + R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), + DAG.getConstant(0, INTTY), + R60); + + // ior r41, r23, 0x7F800000 + R41 = DAG.getNode(ISD::OR, DL, INTTY, R23, + DAG.getConstant(0x7F800000, INTTY)); + + // ige r40, r60, 0x000000FF + R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, + DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), + R60, + DAG.getConstant(0x0000000FF, INTTY)); + + // cmov_logical r40, r50, 0, r40 + R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, + DAG.getConstant(0, INTTY), + R40); + + // cmov_logical r20, r42, r23, r20 + R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20); + + // cmov_logical DST, r40, r41, r20 + DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20); + + // as_float(DST) + DST = DAG.getNode(ISDBITCAST, DL, OVT, DST); + } else { + // The following sequence of DAG nodes produce the following IL: + // fabs r1, RHS + // lt r2, 0x1.0p+96f, r1 + // cmov_logical r3, r2, 0x1.0p-23f, 1.0f + // mul_ieee r1, RHS, r3 + // div_zeroop(infinity) r0, LHS, r1 + // mul_ieee DST, r0, r3 + + // fabs r1, RHS + SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS); + // lt r2, 0x1.0p+96f, r1 + SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT, + DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32), + DAG.getConstant(0x6f800000, INTTY), r1); + // cmov_logical r3, r2, 0x1.0p-23f, 1.0f + SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2, + DAG.getConstant(0x2f800000, INTTY), + DAG.getConstant(0x3f800000, INTTY)); + // mul_ieee r1, RHS, r3 + r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3); + // div_zeroop(infinity) r0, LHS, r1 + SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1); + // mul_ieee DST, r0, r3 + DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3); + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const +{ + return SDValue(Op.getNode(), 0); +} diff --git a/lib/Target/AMDIL/AMDILISelLowering.h b/lib/Target/AMDIL/AMDILISelLowering.h new file mode 100644 index 00000000000..302f0cb6909 --- /dev/null +++ b/lib/Target/AMDIL/AMDILISelLowering.h @@ -0,0 +1,527 @@ +//===-- AMDILISelLowering.h - AMDIL DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file defines the interfaces that AMDIL uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDIL_ISELLOWERING_H_ +#define AMDIL_ISELLOWERING_H_ +#include "AMDIL.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm +{ + namespace AMDILISD + { + enum + { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + INTTOANY, // Dummy instruction that takes an int and goes to + // any type converts the SDNode to an int + DP_TO_FP, // Conversion from 64bit FP to 32bit FP + FP_TO_DP, // Conversion from 32bit FP to 64bit FP + BITCONV, // instruction that converts from any type to any type + CMOV, // 32bit FP Conditional move instruction + CMOVLOG, // 32bit FP Conditional move logical instruction + SELECT, // 32bit FP Conditional move logical instruction + SETCC, // 32bit FP Conditional move logical instruction + ISGN, // 32bit Int Sign instruction + INEGATE, // 32bit Int Negation instruction + MAD, // 32bit Fused Multiply Add instruction + ADD, // 32/64 bit pseudo instruction + AND, // 128 bit and instruction + OR, // 128 bit or instruction + NOT, // 128 bit not instruction + XOR, // 128 bit xor instruction + MOVE, // generic mov instruction + PHIMOVE, // generic phi-node mov instruction + VBUILD, // scalar to vector mov instruction + VEXTRACT, // extract vector components + VINSERT, // insert vector components + VCONCAT, // concat a single vector to another vector + UMAD, // 32bit UInt Fused Multiply Add instruction + CALL, // Function call based on a single integer + RET, // Return from a function call + SELECT_CC, // Select the correct conditional instruction + BRCC, // Select the correct branch instruction + CMPCC, // Compare to GPR operands + CMPICC, // Compare two GPR operands, set icc. + CMPFCC, // Compare two FP operands, set fcc. + BRICC, // Branch to dest on icc condition + BRFCC, // Branch to dest on fcc condition + SELECT_ICC, // Select between two values using the current ICC + //flags. + SELECT_FCC, // Select between two values using the current FCC + //flags. + LCREATE, // Create a 64bit integer from two 32 bit integers + LCOMPHI, // Get the hi 32 bits from a 64 bit integer + LCOMPLO, // Get the lo 32 bits from a 64 bit integer + DCREATE, // Create a 64bit float from two 32 bit integers + DCOMPHI, // Get the hi 32 bits from a 64 bit float + DCOMPLO, // Get the lo 32 bits from a 64 bit float + LCREATE2, // Create a 64bit integer from two 32 bit integers + LCOMPHI2, // Get the hi 32 bits from a 64 bit integer + LCOMPLO2, // Get the lo 32 bits from a 64 bit integer + DCREATE2, // Create a 64bit float from two 32 bit integers + DCOMPHI2, // Get the hi 32 bits from a 64 bit float + DCOMPLO2, // Get the lo 32 bits from a 64 bit float + UMUL, // 32bit unsigned multiplication + IFFB_HI, // 32bit find first hi bit instruction + IFFB_LO, // 32bit find first low bit instruction + DIV_INF, // Divide with infinity returned on zero divisor + SMAX, // Signed integer max + CMP, + IL_CC_I_GT, + IL_CC_I_LT, + IL_CC_I_GE, + IL_CC_I_LE, + IL_CC_I_EQ, + IL_CC_I_NE, + RET_FLAG, + BRANCH_COND, + LOOP_NZERO, + LOOP_ZERO, + LOOP_CMP, + ADDADDR, + // ATOMIC Operations + // Global Memory + ATOM_G_ADD = ISD::FIRST_TARGET_MEMORY_OPCODE, + ATOM_G_AND, + ATOM_G_CMPXCHG, + ATOM_G_DEC, + ATOM_G_INC, + ATOM_G_MAX, + ATOM_G_UMAX, + ATOM_G_MIN, + ATOM_G_UMIN, + ATOM_G_OR, + ATOM_G_SUB, + ATOM_G_RSUB, + ATOM_G_XCHG, + ATOM_G_XOR, + ATOM_G_ADD_NORET, + ATOM_G_AND_NORET, + ATOM_G_CMPXCHG_NORET, + ATOM_G_DEC_NORET, + ATOM_G_INC_NORET, + ATOM_G_MAX_NORET, + ATOM_G_UMAX_NORET, + ATOM_G_MIN_NORET, + ATOM_G_UMIN_NORET, + ATOM_G_OR_NORET, + ATOM_G_SUB_NORET, + ATOM_G_RSUB_NORET, + ATOM_G_XCHG_NORET, + ATOM_G_XOR_NORET, + // Local Memory + ATOM_L_ADD, + ATOM_L_AND, + ATOM_L_CMPXCHG, + ATOM_L_DEC, + ATOM_L_INC, + ATOM_L_MAX, + ATOM_L_UMAX, + ATOM_L_MIN, + ATOM_L_UMIN, + ATOM_L_OR, + ATOM_L_MSKOR, + ATOM_L_SUB, + ATOM_L_RSUB, + ATOM_L_XCHG, + ATOM_L_XOR, + ATOM_L_ADD_NORET, + ATOM_L_AND_NORET, + ATOM_L_CMPXCHG_NORET, + ATOM_L_DEC_NORET, + ATOM_L_INC_NORET, + ATOM_L_MAX_NORET, + ATOM_L_UMAX_NORET, + ATOM_L_MIN_NORET, + ATOM_L_UMIN_NORET, + ATOM_L_OR_NORET, + ATOM_L_MSKOR_NORET, + ATOM_L_SUB_NORET, + ATOM_L_RSUB_NORET, + ATOM_L_XCHG_NORET, + ATOM_L_XOR_NORET, + // Region Memory + ATOM_R_ADD, + ATOM_R_AND, + ATOM_R_CMPXCHG, + ATOM_R_DEC, + ATOM_R_INC, + ATOM_R_MAX, + ATOM_R_UMAX, + ATOM_R_MIN, + ATOM_R_UMIN, + ATOM_R_OR, + ATOM_R_MSKOR, + ATOM_R_SUB, + ATOM_R_RSUB, + ATOM_R_XCHG, + ATOM_R_XOR, + ATOM_R_ADD_NORET, + ATOM_R_AND_NORET, + ATOM_R_CMPXCHG_NORET, + ATOM_R_DEC_NORET, + ATOM_R_INC_NORET, + ATOM_R_MAX_NORET, + ATOM_R_UMAX_NORET, + ATOM_R_MIN_NORET, + ATOM_R_UMIN_NORET, + ATOM_R_OR_NORET, + ATOM_R_MSKOR_NORET, + ATOM_R_SUB_NORET, + ATOM_R_RSUB_NORET, + ATOM_R_XCHG_NORET, + ATOM_R_XOR_NORET, + // Append buffer + APPEND_ALLOC, + APPEND_ALLOC_NORET, + APPEND_CONSUME, + APPEND_CONSUME_NORET, + // 2D Images + IMAGE2D_READ, + IMAGE2D_WRITE, + IMAGE2D_INFO0, + IMAGE2D_INFO1, + // 3D Images + IMAGE3D_READ, + IMAGE3D_WRITE, + IMAGE3D_INFO0, + IMAGE3D_INFO1, + + LAST_ISD_NUMBER + }; + } // AMDILISD + + class MachineBasicBlock; + class MachineInstr; + class DebugLoc; + class TargetInstrInfo; + + class AMDILTargetLowering : public TargetLowering + { + private: + int VarArgsFrameOffset; // Frame offset to start of varargs area. + public: + AMDILTargetLowering(TargetMachine &TM); + + virtual SDValue + LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + int + getVarArgsFrameOffset() const; + + /// computeMaskedBitsForTargetNode - Determine which of + /// the bits specified + /// in Mask are known to be either zero or one and return them in + /// the + /// KnownZero/KnownOne bitsets. + virtual void + computeMaskedBitsForTargetNode( + const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0 + ) const; + + virtual MachineBasicBlock* + EmitInstrWithCustomInserter( + MachineInstr *MI, + MachineBasicBlock *MBB) const; + + virtual bool + getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, unsigned Intrinsic) const; + virtual const char* + getTargetNodeName( + unsigned Opcode + ) const; + // We want to mark f32/f64 floating point values as + // legal + bool + isFPImmLegal(const APFloat &Imm, EVT VT) const; + // We don't want to shrink f64/f32 constants because + // they both take up the same amount of space and + // we don't want to use a f2d instruction. + bool ShouldShrinkFPConstant(EVT VT) const; + + /// getFunctionAlignment - Return the Log2 alignment of this + /// function. + virtual unsigned int + getFunctionAlignment(const Function *F) const; + + private: + CCAssignFn* + CCAssignFnForNode(unsigned int CC) const; + + SDValue LowerCallResult(SDValue Chain, + SDValue InFlag, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue LowerMemArgument(SDValue Chain, + CallingConv::ID CallConv, + const SmallVectorImpl<ISD::InputArg> &ArgInfo, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, MachineFrameInfo *MFI, + unsigned i) const; + + SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, + SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const; + + //+++--- Function dealing with conversions between floating point and + //integer types ---+++// + SDValue + genCLZu64(SDValue Op, SelectionDAG &DAG) const; + SDValue + genCLZuN(SDValue Op, SelectionDAG &DAG, uint32_t bits) const; + SDValue + genCLZu32(SDValue Op, SelectionDAG &DAG) const; + SDValue + genf64toi32(SDValue Op, SelectionDAG &DAG, + bool includeSign) const; + + SDValue + genf64toi64(SDValue Op, SelectionDAG &DAG, + bool includeSign) const; + + SDValue + genu32tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const; + + SDValue + genu64tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const; + + SDValue + LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG& DAG) const; + + SDValue + LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG& DAG) const; + + SDValue + LowerINTRINSIC_VOID(SDValue Op, SelectionDAG& DAG) const; + + SDValue + LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerADD(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSUB(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSREM(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSREM8(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSREM16(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSREM32(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSREM64(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerUREM(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerUREM8(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerUREM16(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerUREM32(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerUREM64(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSDIV(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSDIV24(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSDIV32(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerUDIV(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerUDIV24(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerUDIV32(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerUDIV64(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerFDIV(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerMUL(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerAND(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerOR(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + + EVT + genIntType(uint32_t size = 32, uint32_t numEle = 1) const; + + SDValue + LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + void + generateCMPInstr(MachineInstr*, MachineBasicBlock*, + const TargetInstrInfo&) const; + MachineOperand + convertToReg(MachineOperand) const; + + // private members used by the set of instruction generation + // functions, these are marked mutable as they are cached so + // that they don't have to constantly be looked up when using the + // generateMachineInst/genVReg instructions. This is to simplify + // the code + // and to make it cleaner. The object itself doesn't change as + // only these functions use these three data types. + mutable MachineBasicBlock *mBB; + mutable DebugLoc *mDL; + mutable const TargetInstrInfo *mTII; + mutable MachineBasicBlock::iterator mBBI; + void + setPrivateData(MachineBasicBlock *BB, + MachineBasicBlock::iterator &BBI, + DebugLoc *DL, + const TargetInstrInfo *TII) const; + uint32_t genVReg(uint32_t regType) const; + MachineInstrBuilder + generateMachineInst(uint32_t opcode, + uint32_t dst) const; + MachineInstrBuilder + generateMachineInst(uint32_t opcode, + uint32_t dst, uint32_t src1) const; + MachineInstrBuilder + generateMachineInst(uint32_t opcode, + uint32_t dst, uint32_t src1, uint32_t src2) const; + MachineInstrBuilder + generateMachineInst(uint32_t opcode, + uint32_t dst, uint32_t src1, uint32_t src2, + uint32_t src3) const; + uint32_t + addExtensionInstructions( + uint32_t reg, bool signedShift, + unsigned int simpleVT) const; + void + generateLongRelational(MachineInstr *MI, + unsigned int opCode) const; + + }; // AMDILTargetLowering +} // end namespace llvm + +#endif // AMDIL_ISELLOWERING_H_ diff --git a/lib/Target/AMDIL/AMDILInstrInfo.cpp b/lib/Target/AMDIL/AMDILInstrInfo.cpp new file mode 100644 index 00000000000..fbc3e45b357 --- /dev/null +++ b/lib/Target/AMDIL/AMDILInstrInfo.cpp @@ -0,0 +1,709 @@ +//===- AMDILInstrInfo.cpp - AMDIL Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file contains the AMDIL implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// +#include "AMDILInstrInfo.h" +#include "AMDILUtilityFunctions.h" + +#define GET_INSTRINFO_CTOR +#include "AMDILGenInstrInfo.inc" + +#include "AMDILInstrInfo.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Instructions.h" + +using namespace llvm; + +AMDILInstrInfo::AMDILInstrInfo(AMDILTargetMachine &tm) + : AMDILGenInstrInfo(AMDIL::ADJCALLSTACKDOWN, AMDIL::ADJCALLSTACKUP), + RI(tm, *this), + TM(tm) { +} + +const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const { + return RI; +} + +/// Return true if the instruction is a register to register move and leave the +/// source and dest operands in the passed parameters. +bool AMDILInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg, + unsigned int &DstReg, unsigned int &SrcSubIdx, + unsigned int &DstSubIdx) const { + // FIXME: we should look for: + // add with 0 + //assert(0 && "is Move Instruction has not been implemented yet!"); + //return true; + if (!isMove(MI.getOpcode())) { + return false; + } + if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) { + return false; + } + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + DstSubIdx = 0; + SrcSubIdx = 0; + return true; +} + +bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { +// TODO: Implement this function + return false; +} + +unsigned AMDILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} + +unsigned AMDILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} + +bool AMDILInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { +// TODO: Implement this function + return false; +} +unsigned AMDILInstrInfo::isStoreFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} +unsigned AMDILInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} +bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { +// TODO: Implement this function + return false; +} +#if 0 +void +AMDILInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const { +// TODO: Implement this function +} + +MachineInst AMDILInstrInfo::duplicate(MachineInstr *Orig, + MachineFunction &MF) const { +// TODO: Implement this function + return NULL; +} +#endif +MachineInstr * +AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { +// TODO: Implement this function + return NULL; +} +#if 0 +MachineInst AMDILInstrInfo::commuteInstruction(MachineInstr *MI, + bool NewMI = false) const { +// TODO: Implement this function + return NULL; +} +bool +AMDILInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const +{ +// TODO: Implement this function +} +bool +AMDILInstrInfo::produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1) const +{ +// TODO: Implement this function +} +#endif +bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, + MachineBasicBlock &MBB) const { + while (iter != MBB.end()) { + switch (iter->getOpcode()) { + default: + break; + ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND); + case AMDIL::BRANCH: + return true; + }; + ++iter; + } + return false; +} + +bool AMDILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + bool retVal = true; + return retVal; + MachineBasicBlock::iterator iter = MBB.begin(); + if (!getNextBranchInstr(iter, MBB)) { + retVal = false; + } else { + MachineInstr *firstBranch = iter; + if (!getNextBranchInstr(++iter, MBB)) { + if (firstBranch->getOpcode() == AMDIL::BRANCH) { + TBB = firstBranch->getOperand(0).getMBB(); + firstBranch->eraseFromParent(); + retVal = false; + } else { + TBB = firstBranch->getOperand(0).getMBB(); + FBB = *(++MBB.succ_begin()); + if (FBB == TBB) { + FBB = *(MBB.succ_begin()); + } + Cond.push_back(firstBranch->getOperand(1)); + retVal = false; + } + } else { + MachineInstr *secondBranch = iter; + if (!getNextBranchInstr(++iter, MBB)) { + if (secondBranch->getOpcode() == AMDIL::BRANCH) { + TBB = firstBranch->getOperand(0).getMBB(); + Cond.push_back(firstBranch->getOperand(1)); + FBB = secondBranch->getOperand(0).getMBB(); + secondBranch->eraseFromParent(); + retVal = false; + } else { + assert(0 && "Should not have two consecutive conditional branches"); + } + } else { + MBB.getParent()->viewCFG(); + assert(0 && "Should not have three branch instructions in" + " a single basic block"); + retVal = false; + } + } + } + return retVal; +} + +unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const { + const MachineInstr *MI = op.getParent(); + + switch (MI->getDesc().OpInfo->RegClass) { + default: // FIXME: fallthrough?? + case AMDIL::GPRI8RegClassID: return AMDIL::BRANCH_COND_i8; + case AMDIL::GPRI16RegClassID: return AMDIL::BRANCH_COND_i16; + case AMDIL::GPRI32RegClassID: return AMDIL::BRANCH_COND_i32; + case AMDIL::GPRI64RegClassID: return AMDIL::BRANCH_COND_i64; + case AMDIL::GPRF32RegClassID: return AMDIL::BRANCH_COND_f32; + case AMDIL::GPRF64RegClassID: return AMDIL::BRANCH_COND_f64; + }; +} + +unsigned int +AMDILInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const +{ + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + for (unsigned int x = 0; x < Cond.size(); ++x) { + Cond[x].getParent()->dump(); + } + if (FBB == 0) { + if (Cond.empty()) { + BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(TBB); + } else { + BuildMI(&MBB, DL, get(getBranchInstr(Cond[0]))) + .addMBB(TBB).addReg(Cond[0].getReg()); + } + return 1; + } else { + BuildMI(&MBB, DL, get(getBranchInstr(Cond[0]))) + .addMBB(TBB).addReg(Cond[0].getReg()); + BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(FBB); + } + assert(0 && "Inserting two branches not supported"); + return 0; +} + +unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) { + return 0; + } + --I; + switch (I->getOpcode()) { + default: + return 0; + ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND); + case AMDIL::BRANCH: + I->eraseFromParent(); + break; + } + I = MBB.end(); + + if (I == MBB.begin()) { + return 1; + } + --I; + switch (I->getOpcode()) { + // FIXME: only one case?? + default: + return 1; + ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND); + I->eraseFromParent(); + break; + } + return 2; +} + +MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) { + MachineBasicBlock::iterator tmp = MBB->end(); + if (!MBB->size()) { + return MBB->end(); + } + while (--tmp) { + if (tmp->getOpcode() == AMDIL::ENDLOOP + || tmp->getOpcode() == AMDIL::ENDIF + || tmp->getOpcode() == AMDIL::ELSE) { + if (tmp == MBB->begin()) { + return tmp; + } else { + continue; + } + } else { + return ++tmp; + } + } + return MBB->end(); +} + +bool +AMDILInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { + // If we are adding to the end of a basic block we can safely assume that the + // move is caused by a PHI node since all move instructions that are non-PHI + // have already been inserted into the basic blocks Therefor we call the skip + // flow control instruction to move the iterator before the flow control + // instructions and put the move instruction there. + bool phi = (DestReg < 1025) || (SrcReg < 1025); + int movInst = phi ? getMoveInstFromID(DestRC->getID()) + : getPHIMoveInstFromID(DestRC->getID()); + + MachineBasicBlock::iterator iTemp = (I == MBB.end()) ? skipFlowControl(&MBB) + : I; + if (DestRC != SrcRC) { + //int convInst; + size_t dSize = DestRC->getSize(); + size_t sSize = SrcRC->getSize(); + if (dSize > sSize) { + // Elements are going to get duplicated. + BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); + } else if (dSize == sSize) { + // Direct copy, conversions are not handled. + BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); + } else if (dSize < sSize) { + // Elements are going to get dropped. + BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); + } + } else { + BuildMI( MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); + } + return true; +} +void +AMDILInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const +{ + BuildMI(MBB, MI, DL, get(AMDIL::MOVE_v4i32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; +#if 0 + DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) + << " to " << RI.getName(DestReg) << '\n'); + abort(); +#endif +} +void +AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + unsigned int Opc = 0; + // MachineInstr *curMI = MI; + MachineFunction &MF = *(MBB.getParent()); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + DebugLoc DL; + switch (RC->getID()) { + default: + Opc = AMDIL::PRIVATESTORE_v4i32; + break; + case AMDIL::GPRF32RegClassID: + Opc = AMDIL::PRIVATESTORE_f32; + break; + case AMDIL::GPRF64RegClassID: + Opc = AMDIL::PRIVATESTORE_f64; + break; + case AMDIL::GPRI16RegClassID: + Opc = AMDIL::PRIVATESTORE_i16; + break; + case AMDIL::GPRI32RegClassID: + Opc = AMDIL::PRIVATESTORE_i32; + break; + case AMDIL::GPRI8RegClassID: + Opc = AMDIL::PRIVATESTORE_i8; + break; + case AMDIL::GPRI64RegClassID: + Opc = AMDIL::PRIVATESTORE_i64; + break; + case AMDIL::GPRV2F32RegClassID: + Opc = AMDIL::PRIVATESTORE_v2f32; + break; + case AMDIL::GPRV2F64RegClassID: + Opc = AMDIL::PRIVATESTORE_v2f64; + break; + case AMDIL::GPRV2I16RegClassID: + Opc = AMDIL::PRIVATESTORE_v2i16; + break; + case AMDIL::GPRV2I32RegClassID: + Opc = AMDIL::PRIVATESTORE_v2i32; + break; + case AMDIL::GPRV2I8RegClassID: + Opc = AMDIL::PRIVATESTORE_v2i8; + break; + case AMDIL::GPRV2I64RegClassID: + Opc = AMDIL::PRIVATESTORE_v2i64; + break; + case AMDIL::GPRV4F32RegClassID: + Opc = AMDIL::PRIVATESTORE_v4f32; + break; + case AMDIL::GPRV4I16RegClassID: + Opc = AMDIL::PRIVATESTORE_v4i16; + break; + case AMDIL::GPRV4I32RegClassID: + Opc = AMDIL::PRIVATESTORE_v4i32; + break; + case AMDIL::GPRV4I8RegClassID: + Opc = AMDIL::PRIVATESTORE_v4i8; + break; + } + if (MI != MBB.end()) DL = MI->getDebugLoc(); + MachineMemOperand *MMO = + new MachineMemOperand( + MachinePointerInfo::getFixedStack(FrameIndex), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); + if (MI != MBB.end()) { + DL = MI->getDebugLoc(); + } + MachineInstr *nMI = BuildMI(MBB, MI, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIndex) + .addMemOperand(MMO) + .addImm(0); + AMDILAS::InstrResEnc curRes; + curRes.bits.ResourceID + = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID); + setAsmPrinterFlags(nMI, curRes); +} + +void +AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + unsigned int Opc = 0; + MachineFunction &MF = *(MBB.getParent()); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + DebugLoc DL; + switch (RC->getID()) { + default: + Opc = AMDIL::PRIVATELOAD_v4i32; + break; + case AMDIL::GPRF32RegClassID: + Opc = AMDIL::PRIVATELOAD_f32; + break; + case AMDIL::GPRF64RegClassID: + Opc = AMDIL::PRIVATELOAD_f64; + break; + case AMDIL::GPRI16RegClassID: + Opc = AMDIL::PRIVATELOAD_i16; + break; + case AMDIL::GPRI32RegClassID: + Opc = AMDIL::PRIVATELOAD_i32; + break; + case AMDIL::GPRI8RegClassID: + Opc = AMDIL::PRIVATELOAD_i8; + break; + case AMDIL::GPRI64RegClassID: + Opc = AMDIL::PRIVATELOAD_i64; + break; + case AMDIL::GPRV2F32RegClassID: + Opc = AMDIL::PRIVATELOAD_v2f32; + break; + case AMDIL::GPRV2F64RegClassID: + Opc = AMDIL::PRIVATELOAD_v2f64; + break; + case AMDIL::GPRV2I16RegClassID: + Opc = AMDIL::PRIVATELOAD_v2i16; + break; + case AMDIL::GPRV2I32RegClassID: + Opc = AMDIL::PRIVATELOAD_v2i32; + break; + case AMDIL::GPRV2I8RegClassID: + Opc = AMDIL::PRIVATELOAD_v2i8; + break; + case AMDIL::GPRV2I64RegClassID: + Opc = AMDIL::PRIVATELOAD_v2i64; + break; + case AMDIL::GPRV4F32RegClassID: + Opc = AMDIL::PRIVATELOAD_v4f32; + break; + case AMDIL::GPRV4I16RegClassID: + Opc = AMDIL::PRIVATELOAD_v4i16; + break; + case AMDIL::GPRV4I32RegClassID: + Opc = AMDIL::PRIVATELOAD_v4i32; + break; + case AMDIL::GPRV4I8RegClassID: + Opc = AMDIL::PRIVATELOAD_v4i8; + break; + } + + MachineMemOperand *MMO = + new MachineMemOperand( + MachinePointerInfo::getFixedStack(FrameIndex), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); + if (MI != MBB.end()) { + DL = MI->getDebugLoc(); + } + MachineInstr* nMI = BuildMI(MBB, MI, DL, get(Opc)) + .addReg(DestReg, RegState::Define) + .addFrameIndex(FrameIndex) + .addMemOperand(MMO) + .addImm(0); + AMDILAS::InstrResEnc curRes; + curRes.bits.ResourceID + = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID); + setAsmPrinterFlags(nMI, curRes); + +} +MachineInstr * +AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const { +// TODO: Implement this function + return 0; +} +MachineInstr* +AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr *LoadMI) const { + // TODO: Implement this function + return 0; +} +bool +AMDILInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const +{ + // TODO: Implement this function + return false; +} +bool +AMDILInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, + bool UnfoldStore, + SmallVectorImpl<MachineInstr*> &NewMIs) const { + // TODO: Implement this function + return false; +} + +bool +AMDILInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl<SDNode*> &NewNodes) const { + // TODO: Implement this function + return false; +} + +unsigned +AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex) const { + // TODO: Implement this function + return 0; +} + +bool +AMDILInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, + int64_t &Offset2) const { + return false; + if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) { + return false; + } + const MachineSDNode *mload1 = dyn_cast<MachineSDNode>(Load1); + const MachineSDNode *mload2 = dyn_cast<MachineSDNode>(Load2); + if (!mload1 || !mload2) { + return false; + } + if (mload1->memoperands_empty() || + mload2->memoperands_empty()) { + return false; + } + MachineMemOperand *memOp1 = (*mload1->memoperands_begin()); + MachineMemOperand *memOp2 = (*mload2->memoperands_begin()); + const Value *mv1 = memOp1->getValue(); + const Value *mv2 = memOp2->getValue(); + if (!memOp1->isLoad() || !memOp2->isLoad()) { + return false; + } + if (getBasePointerValue(mv1) == getBasePointerValue(mv2)) { + if (isa<GetElementPtrInst>(mv1) && isa<GetElementPtrInst>(mv2)) { + const GetElementPtrInst *gep1 = dyn_cast<GetElementPtrInst>(mv1); + const GetElementPtrInst *gep2 = dyn_cast<GetElementPtrInst>(mv2); + if (!gep1 || !gep2) { + return false; + } + if (gep1->getNumOperands() != gep2->getNumOperands()) { + return false; + } + for (unsigned i = 0, e = gep1->getNumOperands() - 1; i < e; ++i) { + const Value *op1 = gep1->getOperand(i); + const Value *op2 = gep2->getOperand(i); + if (op1 != op2) { + // If any value except the last one is different, return false. + return false; + } + } + unsigned size = gep1->getNumOperands()-1; + if (!isa<ConstantInt>(gep1->getOperand(size)) + || !isa<ConstantInt>(gep2->getOperand(size))) { + return false; + } + Offset1 = dyn_cast<ConstantInt>(gep1->getOperand(size))->getSExtValue(); + Offset2 = dyn_cast<ConstantInt>(gep2->getOperand(size))->getSExtValue(); + return true; + } else if (isa<Argument>(mv1) && isa<Argument>(mv2)) { + return false; + } else if (isa<GlobalValue>(mv1) && isa<GlobalValue>(mv2)) { + return false; + } + } + return false; +} + +bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + assert(Offset2 > Offset1 + && "Second offset should be larger than first offset!"); + // If we have less than 16 loads in a row, and the offsets are within 16, + // then schedule together. + // TODO: Make the loads schedule near if it fits in a cacheline + return (NumLoads < 16 && (Offset2 - Offset1) < 16); +} + +bool +AMDILInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) + const { + // TODO: Implement this function + return true; +} +void AMDILInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + // TODO: Implement this function +} + +bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const { + // TODO: Implement this function + return false; +} +#if 0 +bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + // TODO: Implement this function +} + +bool AMDILInstrInfo::PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + // TODO: Implement this function +} +#endif +bool +AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) + const { + // TODO: Implement this function + return false; +} + +bool AMDILInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + // TODO: Implement this function + return false; +} + +bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const { + // TODO: Implement this function + return MI->getDesc().isPredicable(); +} + +bool +AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { + // TODO: Implement this function + return true; +} + +unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + // TODO: Implement this function + return 0; +} + +#if 0 +unsigned +AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const { + // TODO: Implement this function + return 0; +} + +unsigned AMDILInstrInfo::getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const { + // TODO: Implement this function + return 0; +} +#endif diff --git a/lib/Target/AMDIL/AMDILInstrInfo.h b/lib/Target/AMDIL/AMDILInstrInfo.h new file mode 100644 index 00000000000..88dd4e9441a --- /dev/null +++ b/lib/Target/AMDIL/AMDILInstrInfo.h @@ -0,0 +1,175 @@ +//===- AMDILInstrInfo.h - AMDIL Instruction Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file contains the AMDIL implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDILINSTRUCTIONINFO_H_ +#define AMDILINSTRUCTIONINFO_H_ + +#include "AMDILRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "AMDILGenInstrInfo.inc" + +namespace llvm { + // AMDIL - This namespace holds all of the target specific flags that + // instruction info tracks. + // + //class AMDILTargetMachine; +class AMDILInstrInfo : public AMDILGenInstrInfo { +private: + const AMDILRegisterInfo RI; + AMDILTargetMachine &TM; + bool getNextBranchInstr(MachineBasicBlock::iterator &iter, + MachineBasicBlock &MBB) const; + unsigned int getBranchInstr(const MachineOperand &op) const; +public: + explicit AMDILInstrInfo(AMDILTargetMachine &tm); + + // getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + // such, whenever a client has an instance of instruction info, it should + // always be able to get register info as well (through this method). + const AMDILRegisterInfo &getRegisterInfo() const; + + // Return true if the instruction is a register to register move and leave the + // source and dest operands in the passed parameters. + bool isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg, + unsigned int &DstReg, unsigned int &SrcSubIdx, + unsigned int &DstSubIdx) const; + + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, + unsigned &DstReg, unsigned &SubIdx) const; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + bool hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; + unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + bool hasStoreFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; + + +#if 0 + void reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const; + MachineInstr *duplicate(MachineInstr *Orig, + MachineFunction &MF) const; +#endif + MachineInstr * + convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; +#if 0 + MachineInstr *commuteInstruction(MachineInstr *MI, + bool NewMI = false) const; + bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const; + bool produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1) const; + +#endif + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const; + + unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + unsigned + InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + + bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + +protected: + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr *LoadMI) const; +public: + bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const; + bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, bool UnfoldStore, + SmallVectorImpl<MachineInstr *> &NewMIs) const; + bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl<SDNode *> &NewNodes) const; + unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex = 0) const; + bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, int64_t &Offset2) const; + bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const; + + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + bool isPredicated(const MachineInstr *MI) const; +#if 0 + bool isUnpredicatedTerminator(const MachineInstr *MI) const; + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const; +#endif + bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + bool isPredicable(MachineInstr *MI) const; + bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + unsigned GetInstSizeInBytes(const MachineInstr *MI) const; +#if 0 + unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const; + unsigned getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const; +#endif + }; + +} + +#endif // AMDILINSTRINFO_H_ diff --git a/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp b/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp new file mode 100644 index 00000000000..75729ac01a3 --- /dev/null +++ b/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp @@ -0,0 +1,190 @@ +//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file contains the AMDIL Implementation of the IntrinsicInfo class. +// +//===-----------------------------------------------------------------------===// + +#include "AMDILIntrinsicInfo.h" +#include "AMDIL.h" +#include "AMDILTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" + +using namespace llvm; + +#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#include "AMDILGenIntrinsics.inc" +#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN + +AMDILIntrinsicInfo::AMDILIntrinsicInfo(AMDILTargetMachine *tm) + : TargetIntrinsicInfo(), mTM(tm) +{ +} + +std::string +AMDILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, + unsigned int numTys) const +{ + static const char* const names[] = { +#define GET_INTRINSIC_NAME_TABLE +#include "AMDILGenIntrinsics.inc" +#undef GET_INTRINSIC_NAME_TABLE + }; + + //assert(!isOverloaded(IntrID) + //&& "AMDIL Intrinsics are not overloaded"); + if (IntrID < Intrinsic::num_intrinsics) { + return 0; + } + assert(IntrID < AMDGPUIntrinsic::num_AMDIL_intrinsics + && "Invalid intrinsic ID"); + + std::string Result(names[IntrID - Intrinsic::num_intrinsics]); + return Result; +} + + static bool +checkTruncation(const char *Name, unsigned int& Len) +{ + const char *ptr = Name + (Len - 1); + while(ptr != Name && *ptr != '_') { + --ptr; + } + // We don't want to truncate on atomic instructions + // but we do want to enter the check Truncation + // section so that we can translate the atomic + // instructions if we need to. + if (!strncmp(Name, "__atom", 6)) { + return true; + } + if (strstr(ptr, "i32") + || strstr(ptr, "u32") + || strstr(ptr, "i64") + || strstr(ptr, "u64") + || strstr(ptr, "f32") + || strstr(ptr, "f64") + || strstr(ptr, "i16") + || strstr(ptr, "u16") + || strstr(ptr, "i8") + || strstr(ptr, "u8")) { + Len = (unsigned int)(ptr - Name); + return true; + } + return false; +} + +// We don't want to support both the OpenCL 1.0 atomics +// and the 1.1 atomics with different names, so we translate +// the 1.0 atomics to the 1.1 naming here if needed. +static char* +atomTranslateIfNeeded(const char *Name, unsigned int Len) +{ + char *buffer = NULL; + if (strncmp(Name, "__atom_", 7)) { + // If we are not starting with __atom_, then + // go ahead and continue on with the allocation. + buffer = new char[Len + 1]; + memcpy(buffer, Name, Len); + } else { + buffer = new char[Len + 3]; + memcpy(buffer, "__atomic_", 9); + memcpy(buffer + 9, Name + 7, Len - 7); + Len += 2; + } + buffer[Len] = '\0'; + return buffer; +} + +unsigned int +AMDILIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const +{ +#define GET_FUNCTION_RECOGNIZER +#include "AMDILGenIntrinsics.inc" +#undef GET_FUNCTION_RECOGNIZER + AMDGPUIntrinsic::ID IntrinsicID + = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic; + if (checkTruncation(Name, Len)) { + char *buffer = atomTranslateIfNeeded(Name, Len); + IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", buffer); + delete [] buffer; + } else { + IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name); + } + if (!isValidIntrinsic(IntrinsicID)) { + return 0; + } + if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) { + return IntrinsicID; + } + return 0; +} + +bool +AMDILIntrinsicInfo::isOverloaded(unsigned id) const +{ + // Overload Table +#define GET_INTRINSIC_OVERLOAD_TABLE +#include "AMDILGenIntrinsics.inc" +#undef GET_INTRINSIC_OVERLOAD_TABLE +} + +/// This defines the "getAttributes(ID id)" method. +#define GET_INTRINSIC_ATTRIBUTES +#include "AMDILGenIntrinsics.inc" +#undef GET_INTRINSIC_ATTRIBUTES + +Function* +AMDILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, + Type **Tys, + unsigned numTys) const +{ + assert(!isOverloaded(IntrID) && "AMDIL intrinsics are not overloaded"); + AttrListPtr AList = getAttributes((AMDGPUIntrinsic::ID) IntrID); + LLVMContext& Context = M->getContext(); + unsigned int id = IntrID; + Type *ResultTy = NULL; + std::vector<Type*> ArgTys; + bool IsVarArg = false; + +#define GET_INTRINSIC_GENERATOR +#include "AMDILGenIntrinsics.inc" +#undef GET_INTRINSIC_GENERATOR + // We need to add the resource ID argument for atomics. + if (id >= AMDGPUIntrinsic::AMDIL_atomic_add_gi32 + && id <= AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret) { + ArgTys.push_back(IntegerType::get(Context, 32)); + } + + return cast<Function>(M->getOrInsertFunction(getName(IntrID), + FunctionType::get(ResultTy, ArgTys, IsVarArg), + AList)); +} + +/// Because the code generator has to support different SC versions, +/// this function is added to check that the intrinsic being used +/// is actually valid. In the case where it isn't valid, the +/// function call is not translated into an intrinsic and the +/// fall back software emulated path should pick up the result. +bool +AMDILIntrinsicInfo::isValidIntrinsic(unsigned int IntrID) const +{ + const AMDILSubtarget *stm = mTM->getSubtargetImpl(); + switch (IntrID) { + default: + return true; + case AMDGPUIntrinsic::AMDIL_convert_f32_i32_rpi: + case AMDGPUIntrinsic::AMDIL_convert_f32_i32_flr: + case AMDGPUIntrinsic::AMDIL_convert_f32_f16_near: + case AMDGPUIntrinsic::AMDIL_convert_f32_f16_neg_inf: + case AMDGPUIntrinsic::AMDIL_convert_f32_f16_plus_inf: + return stm->calVersion() >= CAL_VERSION_SC_139; + }; +} diff --git a/lib/Target/AMDIL/AMDILIntrinsicInfo.h b/lib/Target/AMDIL/AMDILIntrinsicInfo.h new file mode 100644 index 00000000000..513c6f06e85 --- /dev/null +++ b/lib/Target/AMDIL/AMDILIntrinsicInfo.h @@ -0,0 +1,49 @@ +//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the AMDIL Implementation of the Intrinsic Info class. +// +//===-----------------------------------------------------------------------===// +#ifndef _AMDIL_INTRINSICS_H_ +#define _AMDIL_INTRINSICS_H_ + +#include "llvm/Intrinsics.h" +#include "llvm/Target/TargetIntrinsicInfo.h" + +namespace llvm { + class AMDILTargetMachine; + namespace AMDGPUIntrinsic { + enum ID { + last_non_AMDIL_intrinsic = Intrinsic::num_intrinsics - 1, +#define GET_INTRINSIC_ENUM_VALUES +#include "AMDILGenIntrinsics.inc" +#undef GET_INTRINSIC_ENUM_VALUES + , num_AMDIL_intrinsics + }; + + } + + + class AMDILIntrinsicInfo : public TargetIntrinsicInfo { + AMDILTargetMachine *mTM; + public: + AMDILIntrinsicInfo(AMDILTargetMachine *tm); + std::string getName(unsigned int IntrId, Type **Tys = 0, + unsigned int numTys = 0) const; + unsigned int lookupName(const char *Name, unsigned int Len) const; + bool isOverloaded(unsigned int IID) const; + Function *getDeclaration(Module *M, unsigned int ID, + Type **Tys = 0, + unsigned int numTys = 0) const; + bool isValidIntrinsic(unsigned int) const; + }; // AMDILIntrinsicInfo +} + +#endif // _AMDIL_INTRINSICS_H_ + diff --git a/lib/Target/AMDIL/AMDILNIDevice.cpp b/lib/Target/AMDIL/AMDILNIDevice.cpp new file mode 100644 index 00000000000..8fda1c18ae5 --- /dev/null +++ b/lib/Target/AMDIL/AMDILNIDevice.cpp @@ -0,0 +1,71 @@ +//===-- AMDILNIDevice.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#include "AMDILNIDevice.h" +#include "AMDILEvergreenDevice.h" +#include "AMDILSubtarget.h" + +using namespace llvm; + +AMDILNIDevice::AMDILNIDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) +{ + std::string name = ST->getDeviceName(); + if (name == "caicos") { + mDeviceFlag = OCL_DEVICE_CAICOS; + } else if (name == "turks") { + mDeviceFlag = OCL_DEVICE_TURKS; + } else if (name == "cayman") { + mDeviceFlag = OCL_DEVICE_CAYMAN; + } else { + mDeviceFlag = OCL_DEVICE_BARTS; + } +} +AMDILNIDevice::~AMDILNIDevice() +{ +} + +size_t +AMDILNIDevice::getMaxLDSSize() const +{ + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_900; + } else { + return 0; + } +} + +uint32_t +AMDILNIDevice::getGeneration() const +{ + return AMDILDeviceInfo::HD6XXX; +} + + +AMDILCaymanDevice::AMDILCaymanDevice(AMDILSubtarget *ST) + : AMDILNIDevice(ST) +{ + setCaps(); +} + +AMDILCaymanDevice::~AMDILCaymanDevice() +{ +} + +void +AMDILCaymanDevice::setCaps() +{ + if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) { + mHWBits.set(AMDILDeviceInfo::DoubleOps); + mHWBits.set(AMDILDeviceInfo::FMA); + } + mHWBits.set(AMDILDeviceInfo::Signed24BitOps); + mSWBits.reset(AMDILDeviceInfo::Signed24BitOps); + mSWBits.set(AMDILDeviceInfo::ArenaSegment); +} + diff --git a/lib/Target/AMDIL/AMDILNIDevice.h b/lib/Target/AMDIL/AMDILNIDevice.h new file mode 100644 index 00000000000..556670abba1 --- /dev/null +++ b/lib/Target/AMDIL/AMDILNIDevice.h @@ -0,0 +1,59 @@ +//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===---------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the +// specific hardware. +//===---------------------------------------------------------------------===// +#ifndef _AMDILNIDEVICE_H_ +#define _AMDILNIDEVICE_H_ +#include "AMDILEvergreenDevice.h" +#include "AMDILSubtarget.h" + +namespace llvm { + class AMDILSubtarget; +//===---------------------------------------------------------------------===// +// NI generation of devices and their respective sub classes +//===---------------------------------------------------------------------===// + +// The AMDILNIDevice is the base class for all Northern Island series of +// cards. It is very similiar to the AMDILEvergreenDevice, with the major +// exception being differences in wavefront size and hardware capabilities. The +// NI devices are all 64 wide wavefronts and also add support for signed 24 bit +// integer operations + + class AMDILNIDevice : public AMDILEvergreenDevice { + public: + AMDILNIDevice(AMDILSubtarget*); + virtual ~AMDILNIDevice(); + virtual size_t getMaxLDSSize() const; + virtual uint32_t getGeneration() const; + protected: + }; // AMDILNIDevice + +// Just as the AMDILCypressDevice is the double capable version of the +// AMDILEvergreenDevice, the AMDILCaymanDevice is the double capable version of +// the AMDILNIDevice. The other major difference that is not as useful from +// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the +// NI family is a 5 wide. + + class AMDILCaymanDevice: public AMDILNIDevice { + public: + AMDILCaymanDevice(AMDILSubtarget*); + virtual ~AMDILCaymanDevice(); + private: + virtual void setCaps(); + }; // AMDILCaymanDevice + + static const unsigned int MAX_LDS_SIZE_900 = AMDILDevice::MAX_LDS_SIZE_800; +} // namespace llvm +#endif // _AMDILNIDEVICE_H_ diff --git a/lib/Target/AMDIL/AMDILRegisterInfo.cpp b/lib/Target/AMDIL/AMDILRegisterInfo.cpp new file mode 100644 index 00000000000..5588233378c --- /dev/null +++ b/lib/Target/AMDIL/AMDILRegisterInfo.cpp @@ -0,0 +1,200 @@ +//===- AMDILRegisterInfo.cpp - AMDIL Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AMDIL implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "AMDILRegisterInfo.h" +#include "AMDIL.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +AMDILRegisterInfo::AMDILRegisterInfo(AMDILTargetMachine &tm, + const TargetInstrInfo &tii) +: AMDILGenRegisterInfo(0), // RA??? + TM(tm), TII(tii) +{ + baseOffset = 0; + nextFuncOffset = 0; +} + +const uint16_t* +AMDILRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const +{ + static const uint16_t CalleeSavedRegs[] = { 0 }; + // TODO: Does IL need to actually have any callee saved regs? + // I don't think we do since we can just use sequential registers + // Maybe this would be easier if every function call was inlined first + // and then there would be no callee issues to deal with + //TODO(getCalleeSavedRegs); + return CalleeSavedRegs; +} + +BitVector +AMDILRegisterInfo::getReservedRegs(const MachineFunction &MF) const +{ + BitVector Reserved(getNumRegs()); + // We reserve the first getNumRegs() registers as they are the ones passed + // in live-in/live-out + // and therefor cannot be killed by the scheduler. This works around a bug + // discovered + // that was causing the linearscan register allocator to kill registers + // inside of the + // function that were also passed as LiveIn registers. + for (unsigned int x = 0, y = 256; x < y; ++x) { + Reserved.set(x); + } + return Reserved; +} + +BitVector +AMDILRegisterInfo::getAllocatableSet(const MachineFunction &MF, + const TargetRegisterClass *RC = NULL) const +{ + BitVector Allocatable(getNumRegs()); + Allocatable.clear(); + return Allocatable; +} + +const TargetRegisterClass* const* +AMDILRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const +{ + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; + // TODO: Keep in sync with getCalleeSavedRegs + //TODO(getCalleeSavedRegClasses); + return CalleeSavedRegClasses; +} +void +AMDILRegisterInfo::eliminateCallFramePseudoInstr( + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const +{ + MBB.erase(I); +} + +// For each frame index we find, we store the offset in the stack which is +// being pushed back into the global buffer. The offset into the stack where +// the value is stored is copied into a new register and the frame index is +// then replaced with that register. +void +AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, + RegScavenger *RS) const +{ + assert(SPAdj == 0 && "Unexpected"); + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned int y = MI.getNumOperands(); + for (unsigned int x = 0; x < y; ++x) { + if (!MI.getOperand(x).isFI()) { + continue; + } + bool def = isStoreInst(TM.getInstrInfo(), &MI); + int FrameIndex = MI.getOperand(x).getIndex(); + int64_t Offset = MFI->getObjectOffset(FrameIndex); + //int64_t Size = MF.getFrameInfo()->getObjectSize(FrameIndex); + // An optimization is to only use the offsets if the size + // is larger than 4, which means we are storing an array + // instead of just a pointer. If we are size 4 then we can + // just do register copies since we don't need to worry about + // indexing dynamically + MachineInstr *nMI = MF.CreateMachineInstr( + TII.get(AMDIL::LOADCONST_i32), MI.getDebugLoc()); + nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, true)); + nMI->addOperand( + MachineOperand::CreateImm(Offset)); + MI.getParent()->insert(II, nMI); + nMI = MF.CreateMachineInstr( + TII.get(AMDIL::ADD_i32), MI.getDebugLoc()); + nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, true)); + nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, false)); + nMI->addOperand(MachineOperand::CreateReg(AMDIL::FP, false)); + + MI.getParent()->insert(II, nMI); + if (MI.getOperand(x).isReg() == false) { + MI.getOperand(x).ChangeToRegister( + nMI->getOperand(0).getReg(), def); + } else { + MI.getOperand(x).setReg( + nMI->getOperand(0).getReg()); + } + } +} + +void +AMDILRegisterInfo::processFunctionBeforeFrameFinalized( + MachineFunction &MF) const +{ + //TODO(processFunctionBeforeFrameFinalized); + // Here we keep track of the amount of stack that the current function + // uses so + // that we can set the offset to the end of the stack and any other + // function call + // will not overwrite any stack variables. + // baseOffset = nextFuncOffset; + MachineFrameInfo *MFI = MF.getFrameInfo(); + + for (uint32_t x = 0, y = MFI->getNumObjects(); x < y; ++x) { + int64_t size = MFI->getObjectSize(x); + if (!(size % 4) && size > 1) { + nextFuncOffset += size; + } else { + nextFuncOffset += 16; + } + } +} +unsigned int +AMDILRegisterInfo::getRARegister() const +{ + return AMDIL::RA; +} + +unsigned int +AMDILRegisterInfo::getFrameRegister(const MachineFunction &MF) const +{ + return AMDIL::FP; +} + +unsigned int +AMDILRegisterInfo::getEHExceptionRegister() const +{ + assert(0 && "What is the exception register"); + return 0; +} + +unsigned int +AMDILRegisterInfo::getEHHandlerRegister() const +{ + assert(0 && "What is the exception handler register"); + return 0; +} + +int64_t +AMDILRegisterInfo::getStackSize() const +{ + return nextFuncOffset - baseOffset; +} + +#define GET_REGINFO_TARGET_DESC +#include "AMDILGenRegisterInfo.inc" + diff --git a/lib/Target/AMDIL/AMDILRegisterInfo.h b/lib/Target/AMDIL/AMDILRegisterInfo.h new file mode 100644 index 00000000000..5207cd8b466 --- /dev/null +++ b/lib/Target/AMDIL/AMDILRegisterInfo.h @@ -0,0 +1,91 @@ +//===- AMDILRegisterInfo.h - AMDIL Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file contains the AMDIL implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDILREGISTERINFO_H_ +#define AMDILREGISTERINFO_H_ + +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "AMDILGenRegisterInfo.inc" +// See header file for explanation + +namespace llvm +{ + + class AMDILTargetMachine; + class TargetInstrInfo; + class Type; + + /// DWARFFlavour - Flavour of dwarf regnumbers + /// + namespace DWARFFlavour { + enum { + AMDIL_Generic = 0 + }; + } + + struct AMDILRegisterInfo : public AMDILGenRegisterInfo + { + AMDILTargetMachine &TM; + const TargetInstrInfo &TII; + + AMDILRegisterInfo(AMDILTargetMachine &tm, const TargetInstrInfo &tii); + /// Code Generation virtual methods... + const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses( + const MachineFunction *MF = 0) const; + + BitVector + getReservedRegs(const MachineFunction &MF) const; + BitVector + getAllocatableSet(const MachineFunction &MF, + const TargetRegisterClass *RC) const; + + void + eliminateCallFramePseudoInstr( + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + void + eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void + processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + // Debug information queries. + unsigned int + getRARegister() const; + + unsigned int + getFrameRegister(const MachineFunction &MF) const; + + // Exception handling queries. + unsigned int + getEHExceptionRegister() const; + unsigned int + getEHHandlerRegister() const; + + int64_t + getStackSize() const; + private: + mutable int64_t baseOffset; + mutable int64_t nextFuncOffset; + }; + +} // end namespace llvm + +#endif // AMDILREGISTERINFO_H_ diff --git a/lib/Target/AMDIL/AMDILSIDevice.cpp b/lib/Target/AMDIL/AMDILSIDevice.cpp new file mode 100644 index 00000000000..ce560984ef9 --- /dev/null +++ b/lib/Target/AMDIL/AMDILSIDevice.cpp @@ -0,0 +1,49 @@ +//===-- AMDILSIDevice.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===//
+#include "AMDILSIDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSubtarget.h"
+ +using namespace llvm;
+
+AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST)
+{
+}
+AMDILSIDevice::~AMDILSIDevice()
+{
+}
+
+size_t
+AMDILSIDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDILSIDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD7XXX;
+}
+
+std::string
+AMDILSIDevice::getDataLayout() const
+{
+ return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
diff --git a/lib/Target/AMDIL/AMDILSIDevice.h b/lib/Target/AMDIL/AMDILSIDevice.h new file mode 100644 index 00000000000..69f35a0588d --- /dev/null +++ b/lib/Target/AMDIL/AMDILSIDevice.h @@ -0,0 +1,45 @@ +//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
+// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===---------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef _AMDILSIDEVICE_H_
+#define _AMDILSIDEVICE_H_
+#include "AMDILEvergreenDevice.h"
+#include "AMDILSubtarget.h"
+ +namespace llvm {
+ class AMDILSubtarget;
+//===---------------------------------------------------------------------===//
+// SI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+// The AMDILSIDevice is the base class for all Northern Island series of
+// cards. It is very similiar to the AMDILEvergreenDevice, with the major
+// exception being differences in wavefront size and hardware capabilities. The
+// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
+// integer operations
+
+ class AMDILSIDevice : public AMDILEvergreenDevice {
+ public:
+ AMDILSIDevice(AMDILSubtarget*);
+ virtual ~AMDILSIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual std::string getDataLayout() const;
+ protected:
+ }; // AMDILSIDevice
+
+} // namespace llvm
+#endif // _AMDILSIDEVICE_H_
diff --git a/lib/Target/AMDIL/AMDILSubtarget.cpp b/lib/Target/AMDIL/AMDILSubtarget.cpp new file mode 100644 index 00000000000..11b6bbe0c01 --- /dev/null +++ b/lib/Target/AMDIL/AMDILSubtarget.cpp @@ -0,0 +1,177 @@ +//===- AMDILSubtarget.cpp - AMDIL Subtarget Information -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file implements the AMD IL specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "AMDILSubtarget.h" +#include "AMDIL.h" +#include "AMDILDevices.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/SubtargetFeature.h" + +using namespace llvm; + +#define GET_SUBTARGETINFO_ENUM +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "AMDILGenSubtargetInfo.inc" + +AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ) +{ + memset(CapsOverride, 0, sizeof(*CapsOverride) + * AMDILDeviceInfo::MaxNumberCapabilities); + // Default card + std::string GPU = "rv770"; + GPU = CPU; + mIs64bit = false; + mVersion = 0; + SmallVector<StringRef, DEFAULT_VEC_SLOTS> Features; + SplitString(FS, Features, ","); + mDefaultSize[0] = 64; + mDefaultSize[1] = 1; + mDefaultSize[2] = 1; + std::string newFeatures = ""; +#if defined(_DEBUG) || defined(DEBUG) + bool useTest = false; +#endif + for (size_t x = 0; x < Features.size(); ++x) { + if (Features[x].startswith("+mwgs")) { + SmallVector<StringRef, DEFAULT_VEC_SLOTS> sizes; + SplitString(Features[x], sizes, "-"); + size_t mDim = ::atoi(sizes[1].data()); + if (mDim > 3) { + mDim = 3; + } + for (size_t y = 0; y < mDim; ++y) { + mDefaultSize[y] = ::atoi(sizes[y+2].data()); + } +#if defined(_DEBUG) || defined(DEBUG) + } else if (!Features[x].compare("test")) { + useTest = true; +#endif + } else if (Features[x].startswith("+cal")) { + SmallVector<StringRef, DEFAULT_VEC_SLOTS> version; + SplitString(Features[x], version, "="); + mVersion = ::atoi(version[1].data()); + } else { + GPU = CPU; + if (x > 0) newFeatures += ','; + newFeatures += Features[x]; + } + } + // If we don't have a version then set it to + // -1 which enables everything. This is for + // offline devices. + if (!mVersion) { + mVersion = (uint32_t)-1; + } + for (int x = 0; x < 3; ++x) { + if (!mDefaultSize[x]) { + mDefaultSize[x] = 1; + } + } +#if defined(_DEBUG) || defined(DEBUG) + if (useTest) { + GPU = "kauai"; + } +#endif + ParseSubtargetFeatures(GPU, newFeatures); +#if defined(_DEBUG) || defined(DEBUG) + if (useTest) { + GPU = "test"; + } +#endif + mDevName = GPU; + mDevice = getDeviceFromName(mDevName, this, mIs64bit); +} +AMDILSubtarget::~AMDILSubtarget() +{ + delete mDevice; +} +bool +AMDILSubtarget::isOverride(AMDILDeviceInfo::Caps caps) const +{ + assert(caps < AMDILDeviceInfo::MaxNumberCapabilities && + "Caps index is out of bounds!"); + return CapsOverride[caps]; +} +bool +AMDILSubtarget::is64bit() const +{ + return mIs64bit; +} +bool +AMDILSubtarget::isTargetELF() const +{ + return false; +} +size_t +AMDILSubtarget::getDefaultSize(uint32_t dim) const +{ + if (dim > 3) { + return 1; + } else { + return mDefaultSize[dim]; + } +} +uint32_t +AMDILSubtarget::calVersion() const +{ + return mVersion; +} + +AMDILGlobalManager* +AMDILSubtarget::getGlobalManager() const +{ + return mGM; +} +void +AMDILSubtarget::setGlobalManager(AMDILGlobalManager *gm) const +{ + mGM = gm; +} + +AMDILKernelManager* +AMDILSubtarget::getKernelManager() const +{ + return mKM; +} +void +AMDILSubtarget::setKernelManager(AMDILKernelManager *km) const +{ + mKM = km; +} +std::string +AMDILSubtarget::getDataLayout() const +{ + if (!mDevice) { + return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64"); + } + return mDevice->getDataLayout(); +} + +std::string +AMDILSubtarget::getDeviceName() const +{ + return mDevName; +} +const AMDILDevice * +AMDILSubtarget::device() const +{ + return mDevice; +} diff --git a/lib/Target/AMDIL/AMDILSubtarget.h b/lib/Target/AMDIL/AMDILSubtarget.h new file mode 100644 index 00000000000..a4b0e34ada7 --- /dev/null +++ b/lib/Target/AMDIL/AMDILSubtarget.h @@ -0,0 +1,75 @@ +//=====-- AMDILSubtarget.h - Define Subtarget for the AMDIL ----*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file declares the AMDIL specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef _AMDILSUBTARGET_H_ +#define _AMDILSUBTARGET_H_ + +#include "AMDILDevice.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#include <cstdlib> +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "AMDILGenSubtargetInfo.inc" + +#define MAX_CB_SIZE (1 << 16) +namespace llvm { + class Module; + class AMDILKernelManager; + class AMDILGlobalManager; + class AMDILDevice; + class AMDILSubtarget : public AMDILGenSubtargetInfo { + private: + bool CapsOverride[AMDILDeviceInfo::MaxNumberCapabilities]; + mutable AMDILGlobalManager *mGM; + mutable AMDILKernelManager *mKM; + const AMDILDevice *mDevice; + size_t mDefaultSize[3]; + size_t mMinimumSize[3]; + std::string mDevName; + uint32_t mVersion; + bool mIs64bit; + bool mIs32on64bit; + public: + AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS); + virtual ~AMDILSubtarget(); + bool isOverride(AMDILDeviceInfo::Caps) const; + bool is64bit() const; + + // Helper functions to simplify if statements + bool isTargetELF() const; + AMDILGlobalManager* getGlobalManager() const; + void setGlobalManager(AMDILGlobalManager *gm) const; + AMDILKernelManager* getKernelManager() const; + void setKernelManager(AMDILKernelManager *gm) const; + const AMDILDevice* device() const; + std::string getDataLayout() const; + std::string getDeviceName() const; + virtual size_t getDefaultSize(uint32_t dim) const; + // Return the version of CAL that the backend should target. + uint32_t calVersion() const; + // ParseSubtargetFeatures - Parses features string setting specified + // subtarget options. Definition of function is + //auto generated by tblgen. + void + ParseSubtargetFeatures( + llvm::StringRef CPU, + llvm::StringRef FS); + + }; + +} // end namespace llvm + +#endif // AMDILSUBTARGET_H_ diff --git a/lib/Target/AMDIL/AMDILTargetMachine.cpp b/lib/Target/AMDIL/AMDILTargetMachine.cpp new file mode 100644 index 00000000000..77fac1d97bd --- /dev/null +++ b/lib/Target/AMDIL/AMDILTargetMachine.cpp @@ -0,0 +1,182 @@ +//===-- AMDILTargetMachine.cpp - Define TargetMachine for AMDIL -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "AMDILTargetMachine.h" +#include "AMDGPUTargetMachine.h" +#include "AMDILDevices.h" +#include "AMDILFrameLowering.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Pass.h" +#include "llvm/PassManager.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +extern "C" void LLVMInitializeAMDILTarget() { + // Register the target + RegisterTargetMachine<AMDILTargetMachine> X(TheAMDILTarget); + RegisterTargetMachine<AMDGPUTargetMachine> Y(TheAMDGPUTarget); +} + +/// AMDILTargetMachine ctor - +/// +AMDILTargetMachine::AMDILTargetMachine(const Target &T, + StringRef TT, StringRef CPU, StringRef FS, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL +) +: + LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS), + DataLayout(Subtarget.getDataLayout()), + FrameLowering(TargetFrameLowering::StackGrowsUp, + Subtarget.device()->getStackAlignment(), 0), + InstrInfo(*this), //JITInfo(*this), + TLInfo(*this), + IntrinsicInfo(this) +{ + setAsmVerbosityDefault(true); + setMCUseLoc(false); +} + +AMDILTargetLowering* +AMDILTargetMachine::getTargetLowering() const +{ + return const_cast<AMDILTargetLowering*>(&TLInfo); +} + +const AMDILInstrInfo* +AMDILTargetMachine::getInstrInfo() const +{ + return &InstrInfo; +} +const AMDILFrameLowering* +AMDILTargetMachine::getFrameLowering() const +{ + return &FrameLowering; +} + +const AMDILSubtarget* +AMDILTargetMachine::getSubtargetImpl() const +{ + return &Subtarget; +} + +const AMDILRegisterInfo* +AMDILTargetMachine::getRegisterInfo() const +{ + return &InstrInfo.getRegisterInfo(); +} + +const TargetData* +AMDILTargetMachine::getTargetData() const +{ + return &DataLayout; +} + +const AMDILIntrinsicInfo* +AMDILTargetMachine::getIntrinsicInfo() const +{ + return &IntrinsicInfo; +} + + void +AMDILTargetMachine::dump(llvm::raw_ostream &O) +{ + if (!mDebugMode) { + return; + } + O << ";AMDIL Target Machine State Dump: \n"; +} + + void +AMDILTargetMachine::setDebug(bool debugMode) +{ + mDebugMode = debugMode; +} + +bool +AMDILTargetMachine::getDebug() const +{ + return mDebugMode; +} + +namespace { +class AMDILPassConfig : public TargetPassConfig { + +public: + AMDILPassConfig(AMDILTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + AMDILTargetMachine &getAMDILTargetMachine() const { + return getTM<AMDILTargetMachine>(); + } + + virtual bool addPreISel(); + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPostRegAlloc(); + virtual bool addPreEmitPass(); +}; +} // End of anonymous namespace + +TargetPassConfig *AMDILTargetMachine::createPassConfig(PassManagerBase &PM) { + return new AMDILPassConfig(this, PM); +} + +bool AMDILPassConfig::addPreISel() +{ + return false; +} + +bool AMDILPassConfig::addInstSelector() +{ + PM.add(createAMDILPeepholeOpt(*TM)); + PM.add(createAMDILISelDag(getAMDILTargetMachine())); + return false; +} + +bool AMDILPassConfig::addPreRegAlloc() +{ + // If debugging, reduce code motion. Use less aggressive pre-RA scheduler + if (TM->getOptLevel() == CodeGenOpt::None) { + llvm::RegisterScheduler::setDefault(&llvm::createSourceListDAGScheduler); + } + + PM.add(createAMDILMachinePeephole(*TM)); + return false; +} + +bool AMDILPassConfig::addPostRegAlloc() { + return false; // -print-machineinstr should print after this. +} + +/// addPreEmitPass - This pass may be implemented by targets that want to run +/// passes immediately before machine code is emitted. This should return +/// true if -print-machineinstrs should print out the code after the passes. +bool AMDILPassConfig::addPreEmitPass() +{ + PM.add(createAMDILCFGPreparationPass(*TM)); + PM.add(createAMDILCFGStructurizerPass(*TM)); + return true; +} + diff --git a/lib/Target/AMDIL/AMDILTargetMachine.h b/lib/Target/AMDIL/AMDILTargetMachine.h new file mode 100644 index 00000000000..0ff3674c59e --- /dev/null +++ b/lib/Target/AMDIL/AMDILTargetMachine.h @@ -0,0 +1,72 @@ +//===-- AMDILTargetMachine.h - Define TargetMachine for AMDIL ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file declares the AMDIL specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDILTARGETMACHINE_H_ +#define AMDILTARGETMACHINE_H_ + +#include "AMDILFrameLowering.h" +#include "AMDILISelLowering.h" +#include "AMDILInstrInfo.h" +#include "AMDILIntrinsicInfo.h" +#include "AMDILSubtarget.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm +{ + class raw_ostream; + + class AMDILTargetMachine : public LLVMTargetMachine + { + private: + AMDILSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + AMDILFrameLowering FrameLowering; + AMDILInstrInfo InstrInfo; + AMDILTargetLowering TLInfo; + AMDILIntrinsicInfo IntrinsicInfo; + bool mDebugMode; + CodeGenOpt::Level mOptLevel; + + protected: + + public: + AMDILTargetMachine(const Target &T, + StringRef TT, StringRef CPU, StringRef FS, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + // Get Target/Subtarget specific information + virtual AMDILTargetLowering* getTargetLowering() const; + virtual const AMDILInstrInfo* getInstrInfo() const; + virtual const AMDILFrameLowering* getFrameLowering() const; + virtual const AMDILSubtarget* getSubtargetImpl() const; + virtual const AMDILRegisterInfo* getRegisterInfo() const; + virtual const TargetData* getTargetData() const; + virtual const AMDILIntrinsicInfo *getIntrinsicInfo() const; + + // Pass Pipeline Configuration + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + + void dump(llvm::raw_ostream &O); + void setDebug(bool debugMode); + bool getDebug() const; + CodeGenOpt::Level getOptLevel() const { return mOptLevel; } + + + }; // AMDILTargetMachine + +} // end namespace llvm + +#endif // AMDILTARGETMACHINE_H_ diff --git a/lib/Target/AMDIL/AMDILUtilityFunctions.cpp b/lib/Target/AMDIL/AMDILUtilityFunctions.cpp new file mode 100644 index 00000000000..f2ef4eb7771 --- /dev/null +++ b/lib/Target/AMDIL/AMDILUtilityFunctions.cpp @@ -0,0 +1,683 @@ +//===-- AMDILUtilityFunctions.cpp - AMDIL Utility Functions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file provides the implementations of functions that are declared in the +// AMDILUtilityFUnctions.h file. +// +//===----------------------------------------------------------------------===// +#include "AMDILUtilityFunctions.h" +#include "AMDILISelLowering.h" +#include "llvm/ADT/ValueMap.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Type.h" + +#include <cstdio> +#include <list> +#include <queue> + +#define GET_OPCODE_NAME(TII, MI) \ + TII->getName(MI->getOpcode()) + + +using namespace llvm; +int64_t GET_SCALAR_SIZE(llvm::Type *A) { + return A->getScalarSizeInBits(); +} + +const TargetRegisterClass * getRegClassFromID(unsigned int ID) { + switch (ID) { + default: + assert(0 && "Passed in ID does not match any register classes."); + return NULL; + case AMDIL::GPRI8RegClassID: + return &AMDIL::GPRI8RegClass; + case AMDIL::GPRI16RegClassID: + return &AMDIL::GPRI16RegClass; + case AMDIL::GPRI32RegClassID: + return &AMDIL::GPRI32RegClass; + case AMDIL::GPRF32RegClassID: + return &AMDIL::GPRF32RegClass; + case AMDIL::GPRI64RegClassID: + return &AMDIL::GPRI64RegClass; + case AMDIL::GPRF64RegClassID: + return &AMDIL::GPRF64RegClass; + case AMDIL::GPRV4F32RegClassID: + return &AMDIL::GPRV4F32RegClass; + case AMDIL::GPRV4I8RegClassID: + return &AMDIL::GPRV4I8RegClass; + case AMDIL::GPRV4I16RegClassID: + return &AMDIL::GPRV4I16RegClass; + case AMDIL::GPRV4I32RegClassID: + return &AMDIL::GPRV4I32RegClass; + case AMDIL::GPRV2F32RegClassID: + return &AMDIL::GPRV2F32RegClass; + case AMDIL::GPRV2I8RegClassID: + return &AMDIL::GPRV2I8RegClass; + case AMDIL::GPRV2I16RegClassID: + return &AMDIL::GPRV2I16RegClass; + case AMDIL::GPRV2I32RegClassID: + return &AMDIL::GPRV2I32RegClass; + case AMDIL::GPRV2F64RegClassID: + return &AMDIL::GPRV2F64RegClass; + case AMDIL::GPRV2I64RegClassID: + return &AMDIL::GPRV2I64RegClass; + }; +} + +unsigned int getMoveInstFromID(unsigned int ID) { + switch (ID) { + default: + assert(0 && "Passed in ID does not match any move instructions."); + case AMDIL::GPRI8RegClassID: + return AMDIL::MOVE_i8; + case AMDIL::GPRI16RegClassID: + return AMDIL::MOVE_i16; + case AMDIL::GPRI32RegClassID: + return AMDIL::MOVE_i32; + case AMDIL::GPRF32RegClassID: + return AMDIL::MOVE_f32; + case AMDIL::GPRI64RegClassID: + return AMDIL::MOVE_i64; + case AMDIL::GPRF64RegClassID: + return AMDIL::MOVE_f64; + case AMDIL::GPRV4F32RegClassID: + return AMDIL::MOVE_v4f32; + case AMDIL::GPRV4I8RegClassID: + return AMDIL::MOVE_v4i8; + case AMDIL::GPRV4I16RegClassID: + return AMDIL::MOVE_v4i16; + case AMDIL::GPRV4I32RegClassID: + return AMDIL::MOVE_v4i32; + case AMDIL::GPRV2F32RegClassID: + return AMDIL::MOVE_v2f32; + case AMDIL::GPRV2I8RegClassID: + return AMDIL::MOVE_v2i8; + case AMDIL::GPRV2I16RegClassID: + return AMDIL::MOVE_v2i16; + case AMDIL::GPRV2I32RegClassID: + return AMDIL::MOVE_v2i32; + case AMDIL::GPRV2F64RegClassID: + return AMDIL::MOVE_v2f64; + case AMDIL::GPRV2I64RegClassID: + return AMDIL::MOVE_v2i64; + }; + return -1; +} + +unsigned int getPHIMoveInstFromID(unsigned int ID) { + switch (ID) { + default: + assert(0 && "Passed in ID does not match any move instructions."); + case AMDIL::GPRI8RegClassID: + return AMDIL::PHIMOVE_i8; + case AMDIL::GPRI16RegClassID: + return AMDIL::PHIMOVE_i16; + case AMDIL::GPRI32RegClassID: + return AMDIL::PHIMOVE_i32; + case AMDIL::GPRF32RegClassID: + return AMDIL::PHIMOVE_f32; + case AMDIL::GPRI64RegClassID: + return AMDIL::PHIMOVE_i64; + case AMDIL::GPRF64RegClassID: + return AMDIL::PHIMOVE_f64; + case AMDIL::GPRV4F32RegClassID: + return AMDIL::PHIMOVE_v4f32; + case AMDIL::GPRV4I8RegClassID: + return AMDIL::PHIMOVE_v4i8; + case AMDIL::GPRV4I16RegClassID: + return AMDIL::PHIMOVE_v4i16; + case AMDIL::GPRV4I32RegClassID: + return AMDIL::PHIMOVE_v4i32; + case AMDIL::GPRV2F32RegClassID: + return AMDIL::PHIMOVE_v2f32; + case AMDIL::GPRV2I8RegClassID: + return AMDIL::PHIMOVE_v2i8; + case AMDIL::GPRV2I16RegClassID: + return AMDIL::PHIMOVE_v2i16; + case AMDIL::GPRV2I32RegClassID: + return AMDIL::PHIMOVE_v2i32; + case AMDIL::GPRV2F64RegClassID: + return AMDIL::PHIMOVE_v2f64; + case AMDIL::GPRV2I64RegClassID: + return AMDIL::PHIMOVE_v2i64; + }; + return -1; +} + +const TargetRegisterClass* getRegClassFromType(unsigned int type) { + switch (type) { + default: + assert(0 && "Passed in type does not match any register classes."); + case MVT::i8: + return &AMDIL::GPRI8RegClass; + case MVT::i16: + return &AMDIL::GPRI16RegClass; + case MVT::i32: + return &AMDIL::GPRI32RegClass; + case MVT::f32: + return &AMDIL::GPRF32RegClass; + case MVT::i64: + return &AMDIL::GPRI64RegClass; + case MVT::f64: + return &AMDIL::GPRF64RegClass; + case MVT::v4f32: + return &AMDIL::GPRV4F32RegClass; + case MVT::v4i8: + return &AMDIL::GPRV4I8RegClass; + case MVT::v4i16: + return &AMDIL::GPRV4I16RegClass; + case MVT::v4i32: + return &AMDIL::GPRV4I32RegClass; + case MVT::v2f32: + return &AMDIL::GPRV2F32RegClass; + case MVT::v2i8: + return &AMDIL::GPRV2I8RegClass; + case MVT::v2i16: + return &AMDIL::GPRV2I16RegClass; + case MVT::v2i32: + return &AMDIL::GPRV2I32RegClass; + case MVT::v2f64: + return &AMDIL::GPRV2F64RegClass; + case MVT::v2i64: + return &AMDIL::GPRV2I64RegClass; + } +} + +void printSDNode(const SDNode *N) { + printf("Opcode: %d isTargetOpcode: %d isMachineOpcode: %d\n", + N->getOpcode(), N->isTargetOpcode(), N->isMachineOpcode()); + printf("Empty: %d OneUse: %d Size: %d NodeID: %d\n", + N->use_empty(), N->hasOneUse(), (int)N->use_size(), N->getNodeId()); + for (unsigned int i = 0; i < N->getNumOperands(); ++i) { + printf("OperandNum: %d ValueCount: %d ValueType: %d\n", + i, N->getNumValues(), N->getValueType(0) .getSimpleVT().SimpleTy); + printSDValue(N->getOperand(i), 0); + } +} + +void printSDValue(const SDValue &Op, int level) { + printf("\nOp: %p OpCode: %d NumOperands: %d ", (void*)&Op, Op.getOpcode(), + Op.getNumOperands()); + printf("IsTarget: %d IsMachine: %d ", Op.isTargetOpcode(), + Op.isMachineOpcode()); + if (Op.isMachineOpcode()) { + printf("MachineOpcode: %d\n", Op.getMachineOpcode()); + } else { + printf("\n"); + } + EVT vt = Op.getValueType(); + printf("ValueType: %d \n", vt.getSimpleVT().SimpleTy); + printf("UseEmpty: %d OneUse: %d\n", Op.use_empty(), Op.hasOneUse()); + if (level) { + printf("Children for %d:\n", level); + for (unsigned int i = 0; i < Op.getNumOperands(); ++i) { + printf("Child %d->%d:", level, i); + printSDValue(Op.getOperand(i), level - 1); + } + } +} + +bool isPHIMove(unsigned int opcode) { + switch (opcode) { + default: + return false; + ExpandCaseToAllTypes(AMDIL::PHIMOVE); + return true; + } + return false; +} + +bool isMove(unsigned int opcode) { + switch (opcode) { + default: + return false; + ExpandCaseToAllTypes(AMDIL::MOVE); + return true; + } + return false; +} + +bool isMoveOrEquivalent(unsigned int opcode) { + switch (opcode) { + default: + return isMove(opcode) || isPHIMove(opcode); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASCHAR); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASSHORT); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASINT); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASLONG); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASDOUBLE); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASFLOAT); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2CHAR); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2SHORT); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2INT); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2FLOAT); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2LONG); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2DOUBLE); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4CHAR); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4SHORT); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4INT); + ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4FLOAT); + case AMDIL::INTTOANY_i8: + case AMDIL::INTTOANY_i16: + case AMDIL::INTTOANY_i32: + case AMDIL::INTTOANY_f32: + case AMDIL::DLO: + case AMDIL::LLO: + case AMDIL::LLO_v2i64: + return true; + }; + return false; +} + +bool check_type(const Value *ptr, unsigned int addrspace) { + if (!ptr) { + return false; + } + Type *ptrType = ptr->getType(); + return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace; +} + +size_t getTypeSize(Type * const T, bool dereferencePtr) { + size_t size = 0; + if (!T) { + return size; + } + switch (T->getTypeID()) { + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + assert(0 && "These types are not supported by this backend"); + default: + case Type::FloatTyID: + case Type::DoubleTyID: + size = T->getPrimitiveSizeInBits() >> 3; + break; + case Type::PointerTyID: + size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr); + break; + case Type::IntegerTyID: + size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr); + break; + case Type::StructTyID: + size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr); + break; + case Type::ArrayTyID: + size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr); + break; + case Type::FunctionTyID: + size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr); + break; + case Type::VectorTyID: + size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr); + break; + }; + return size; +} + +size_t getTypeSize(StructType * const ST, bool dereferencePtr) { + size_t size = 0; + if (!ST) { + return size; + } + Type *curType; + StructType::element_iterator eib; + StructType::element_iterator eie; + for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { + curType = *eib; + size += getTypeSize(curType, dereferencePtr); + } + return size; +} + +size_t getTypeSize(IntegerType * const IT, bool dereferencePtr) { + return IT ? (IT->getBitWidth() >> 3) : 0; +} + +size_t getTypeSize(FunctionType * const FT, bool dereferencePtr) { + assert(0 && "Should not be able to calculate the size of an function type"); + return 0; +} + +size_t getTypeSize(ArrayType * const AT, bool dereferencePtr) { + return (size_t)(AT ? (getTypeSize(AT->getElementType(), + dereferencePtr) * AT->getNumElements()) + : 0); +} + +size_t getTypeSize(VectorType * const VT, bool dereferencePtr) { + return VT ? (VT->getBitWidth() >> 3) : 0; +} + +size_t getTypeSize(PointerType * const PT, bool dereferencePtr) { + if (!PT) { + return 0; + } + Type *CT = PT->getElementType(); + if (CT->getTypeID() == Type::StructTyID && + PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { + return getTypeSize(dyn_cast<StructType>(CT)); + } else if (dereferencePtr) { + size_t size = 0; + for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { + size += getTypeSize(PT->getContainedType(x), dereferencePtr); + } + return size; + } else { + return 4; + } +} + +size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr) { + //assert(0 && "Should not be able to calculate the size of an opaque type"); + return 4; +} + +size_t getNumElements(Type * const T) { + size_t size = 0; + if (!T) { + return size; + } + switch (T->getTypeID()) { + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + assert(0 && "These types are not supported by this backend"); + default: + case Type::FloatTyID: + case Type::DoubleTyID: + size = 1; + break; + case Type::PointerTyID: + size = getNumElements(dyn_cast<PointerType>(T)); + break; + case Type::IntegerTyID: + size = getNumElements(dyn_cast<IntegerType>(T)); + break; + case Type::StructTyID: + size = getNumElements(dyn_cast<StructType>(T)); + break; + case Type::ArrayTyID: + size = getNumElements(dyn_cast<ArrayType>(T)); + break; + case Type::FunctionTyID: + size = getNumElements(dyn_cast<FunctionType>(T)); + break; + case Type::VectorTyID: + size = getNumElements(dyn_cast<VectorType>(T)); + break; + }; + return size; +} + +size_t getNumElements(StructType * const ST) { + size_t size = 0; + if (!ST) { + return size; + } + Type *curType; + StructType::element_iterator eib; + StructType::element_iterator eie; + for (eib = ST->element_begin(), eie = ST->element_end(); + eib != eie; ++eib) { + curType = *eib; + size += getNumElements(curType); + } + return size; +} + +size_t getNumElements(IntegerType * const IT) { + return (!IT) ? 0 : 1; +} + +size_t getNumElements(FunctionType * const FT) { + assert(0 && "Should not be able to calculate the number of " + "elements of a function type"); + return 0; +} + +size_t getNumElements(ArrayType * const AT) { + return (!AT) ? 0 + : (size_t)(getNumElements(AT->getElementType()) * + AT->getNumElements()); +} + +size_t getNumElements(VectorType * const VT) { + return (!VT) ? 0 + : VT->getNumElements() * getNumElements(VT->getElementType()); +} + +size_t getNumElements(PointerType * const PT) { + size_t size = 0; + if (!PT) { + return size; + } + for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { + size += getNumElements(PT->getContainedType(x)); + } + return size; +} + +const llvm::Value *getBasePointerValue(const llvm::Value *V) +{ + if (!V) { + return NULL; + } + const Value *ret = NULL; + ValueMap<const Value *, bool> ValueBitMap; + std::queue<const Value *, std::list<const Value *> > ValueQueue; + ValueQueue.push(V); + while (!ValueQueue.empty()) { + V = ValueQueue.front(); + if (ValueBitMap.find(V) == ValueBitMap.end()) { + ValueBitMap[V] = true; + if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) { + ret = V; + break; + } else if (dyn_cast<GlobalVariable>(V)) { + ret = V; + break; + } else if (dyn_cast<Constant>(V)) { + const ConstantExpr *CE = dyn_cast<ConstantExpr>(V); + if (CE) { + ValueQueue.push(CE->getOperand(0)); + } + } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + ret = AI; + break; + } else if (const Instruction *I = dyn_cast<Instruction>(V)) { + uint32_t numOps = I->getNumOperands(); + for (uint32_t x = 0; x < numOps; ++x) { + ValueQueue.push(I->getOperand(x)); + } + } else { + // assert(0 && "Found a Value that we didn't know how to handle!"); + } + } + ValueQueue.pop(); + } + return ret; +} + +const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI) { + const Value *moVal = NULL; + if (!MI->memoperands_empty()) { + const MachineMemOperand *memOp = (*MI->memoperands_begin()); + moVal = memOp ? memOp->getValue() : NULL; + moVal = getBasePointerValue(moVal); + } + return moVal; +} + +bool commaPrint(int i, llvm::raw_ostream &O) { + O << ":" << i; + return false; +} + +bool isLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { + if (strstr(GET_OPCODE_NAME(TII, MI), "LOADCONST")) { + return false; + } + return strstr(GET_OPCODE_NAME(TII, MI), "LOAD"); +} + +bool isSWSExtLoadInst(MachineInstr *MI) +{ +switch (MI->getOpcode()) { + default: + break; + ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD); + ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD); + ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD); + ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD); + ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD); + ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD); + return true; + }; + return false; +} + +bool isExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { + return strstr(GET_OPCODE_NAME(TII, MI), "EXTLOAD"); +} + +bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { + return strstr(GET_OPCODE_NAME(TII, MI), "SEXTLOAD"); +} + +bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { + return strstr(GET_OPCODE_NAME(TII, MI), "AEXTLOAD"); +} + +bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { + return strstr(GET_OPCODE_NAME(TII, MI), "ZEXTLOAD"); +} + +bool isStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { + return strstr(GET_OPCODE_NAME(TII, MI), "STORE"); +} + +bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { + return strstr(GET_OPCODE_NAME(TII, MI), "TRUNCSTORE"); +} + +bool isAtomicInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { + return strstr(GET_OPCODE_NAME(TII, MI), "ATOM"); +} + +bool isVolatileInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { + if (!MI->memoperands_empty()) { + for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(), + moe = MI->memoperands_end(); mob != moe; ++mob) { + // If there is a volatile mem operand, this is a volatile instruction. + if ((*mob)->isVolatile()) { + return true; + } + } + } + return false; +} +bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "GLOBAL"); +} +bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "PRIVATE"); +} +bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "CONSTANT") + || strstr(GET_OPCODE_NAME(TII, MI), "CPOOL"); +} +bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "REGION"); +} +bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "LOCAL"); +} +bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "IMAGE"); +} +bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "APPEND"); +} +bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_R"); +} +bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_L"); +} +bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_G") + || isArenaAtomic(TII, MI); +} +bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) +{ + return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_A"); +} + +const char* getSrcSwizzle(unsigned idx) { + const char *srcSwizzles[] = { + "", ".x000", ".0x00", ".00x0", ".000x", ".y000", ".0y00", ".00y0", ".000y", + ".z000", ".0z00", ".00z0", ".000z", ".w000", ".0w00", ".00w0", ".000w", + ".xy00", ".00xy", ".zw00", ".00zw", ".xyz0", ".0xyz", ".xyzw", ".0000", + ".xxxx", ".yyyy", ".zzzz", ".wwww", ".xyxy", ".zwzw", ".xzxz", ".ywyw", + ".x0y0", ".0x0y", ".xy_neg(y)", "_neg(yw)", "_neg(x)", ".xy_neg(xy)", + "_neg(xyzw)", ".0yzw", ".x0zw", ".xy0w", ".x", ".y", ".z", ".w", ".xy", + ".zw" + }; + assert(idx < sizeof(srcSwizzles)/sizeof(srcSwizzles[0]) + && "Idx passed in is invalid!"); + return srcSwizzles[idx]; +} +const char* getDstSwizzle(unsigned idx) { + const char *dstSwizzles[] = { + "", ".x___", ".xy__", ".xyz_", ".xyzw", "._y__", "._yz_", "._yzw", ".__z_", + ".__zw", ".___w", ".x_zw", ".xy_w", ".x_z_", ".x__w", "._y_w", + }; + assert(idx < sizeof(dstSwizzles)/sizeof(dstSwizzles[0]) + && "Idx passed in is invalid!"); + return dstSwizzles[idx]; +} +/// Helper function to get the currently set flags +void getAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes) +{ + // We need 16 bits of information, but LLVMr127097 cut the field in half. + // So we have to use two different fields to store all of our information. + uint16_t upper = MI->getFlags() << 8; + uint16_t lower = MI->getAsmPrinterFlags(); + curRes.u16all = upper | lower; +} +/// Helper function to clear the currently set flags and add the new flags. +void setAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes) +{ + // We need 16 bits of information, but LLVMr127097 cut the field in half. + // So we have to use two different fields to store all of our information. + MI->clearAsmPrinterFlags(); + MI->setFlags(0); + uint8_t lower = curRes.u16all & 0xFF; + uint8_t upper = (curRes.u16all >> 8) & 0xFF; + MI->setFlags(upper); + MI->setAsmPrinterFlag((llvm::MachineInstr::CommentFlag)lower); +} diff --git a/lib/Target/AMDIL/AMDILUtilityFunctions.h b/lib/Target/AMDIL/AMDILUtilityFunctions.h new file mode 100644 index 00000000000..637c868b55c --- /dev/null +++ b/lib/Target/AMDIL/AMDILUtilityFunctions.h @@ -0,0 +1,362 @@ +//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file provides declarations for functions that are used across different +// classes and provide various conversions or utility to shorten the code +// +//===----------------------------------------------------------------------===// +#ifndef AMDILUTILITYFUNCTIONS_H_ +#define AMDILUTILITYFUNCTIONS_H_ + +#include "AMDIL.h" +#include "AMDILTargetMachine.h" +#include "llvm/ADT/SmallVector.h" + +// Utility functions from ID +// +namespace llvm { +class TargetRegisterClass; +class SDValue; +class SDNode; +class Value; +class Type; +class StructType; +class IntegerType; +class FunctionType; +class VectorType; +class ArrayType; +class PointerType; +class OpaqueType; +class MachineInstr; + +} +enum SrcSwizzles { + AMDIL_SRC_SWIZZLE_DEFAULT = 0, + AMDIL_SRC_SWIZZLE_X000, + AMDIL_SRC_SWIZZLE_0X00, + AMDIL_SRC_SWIZZLE_00X0, + AMDIL_SRC_SWIZZLE_000X, + AMDIL_SRC_SWIZZLE_Y000, + AMDIL_SRC_SWIZZLE_0Y00, + AMDIL_SRC_SWIZZLE_00Y0, + AMDIL_SRC_SWIZZLE_000Y, + AMDIL_SRC_SWIZZLE_Z000, + AMDIL_SRC_SWIZZLE_0Z00, + AMDIL_SRC_SWIZZLE_00Z0, + AMDIL_SRC_SWIZZLE_000Z, + AMDIL_SRC_SWIZZLE_W000, + AMDIL_SRC_SWIZZLE_0W00, + AMDIL_SRC_SWIZZLE_00W0, + AMDIL_SRC_SWIZZLE_000W, + AMDIL_SRC_SWIZZLE_XY00, + AMDIL_SRC_SWIZZLE_00XY, + AMDIL_SRC_SWIZZLE_ZW00, + AMDIL_SRC_SWIZZLE_00ZW, + AMDIL_SRC_SWIZZLE_XYZ0, + AMDIL_SRC_SWIZZLE_0XYZ, + AMDIL_SRC_SWIZZLE_XYZW, + AMDIL_SRC_SWIZZLE_0000, + AMDIL_SRC_SWIZZLE_XXXX, + AMDIL_SRC_SWIZZLE_YYYY, + AMDIL_SRC_SWIZZLE_ZZZZ, + AMDIL_SRC_SWIZZLE_WWWW, + AMDIL_SRC_SWIZZLE_XYXY, + AMDIL_SRC_SWIZZLE_ZWZW, + AMDIL_SRC_SWIZZLE_XZXZ, + AMDIL_SRC_SWIZZLE_YWYW, + AMDIL_SRC_SWIZZLE_X0Y0, + AMDIL_SRC_SWIZZLE_0X0Y, + AMDIL_SRC_SWIZZLE_XY_NEGY, + AMDIL_SRC_SWIZZLE_NEGYW, + AMDIL_SRC_SWIZZLE_NEGX, + AMDIL_SRC_SWIZZLE_XY_NEGXY, + AMDIL_SRC_SWIZZLE_NEG_XYZW, + AMDIL_SRC_SWIZZLE_0YZW, + AMDIL_SRC_SWIZZLE_X0ZW, + AMDIL_SRC_SWIZZLE_XY0W, + AMDIL_SRC_SWIZZLE_X, + AMDIL_SRC_SWIZZLE_Y, + AMDIL_SRC_SWIZZLE_Z, + AMDIL_SRC_SWIZZLE_W, + AMDIL_SRC_SWIZZLE_XY, + AMDIL_SRC_SWIZZLE_ZW, + AMDIL_SRC_SWIZZLE_LAST +}; +enum DstSwizzles { + AMDIL_DST_SWIZZLE_DEFAULT = 0, + AMDIL_DST_SWIZZLE_X___, + AMDIL_DST_SWIZZLE_XY__, + AMDIL_DST_SWIZZLE_XYZ_, + AMDIL_DST_SWIZZLE_XYZW, + AMDIL_DST_SWIZZLE__Y__, + AMDIL_DST_SWIZZLE__YZ_, + AMDIL_DST_SWIZZLE__YZW, + AMDIL_DST_SWIZZLE___Z_, + AMDIL_DST_SWIZZLE___ZW, + AMDIL_DST_SWIZZLE____W, + AMDIL_DST_SWIZZLE_X_ZW, + AMDIL_DST_SWIZZLE_XY_W, + AMDIL_DST_SWIZZLE_X_Z_, + AMDIL_DST_SWIZZLE_X__W, + AMDIL_DST_SWIZZLE__Y_W, + AMDIL_DST_SWIZZLE_LAST +}; +// Function to get the correct src swizzle string from ID +const char *getSrcSwizzle(unsigned); + +// Function to get the correct dst swizzle string from ID +const char *getDstSwizzle(unsigned); + +const llvm::TargetRegisterClass *getRegClassFromID(unsigned int ID); + +unsigned int getMoveInstFromID(unsigned int ID); +unsigned int getPHIMoveInstFromID(unsigned int ID); + +// Utility functions from Type. +const llvm::TargetRegisterClass *getRegClassFromType(unsigned int type); +unsigned int getTargetIndependentMoveFromType(unsigned int type); + +// Debug functions for SDNode and SDValue. +void printSDValue(const llvm::SDValue &Op, int level); +void printSDNode(const llvm::SDNode *N); + +// Functions to check if an opcode is a specific type. +bool isMove(unsigned int opcode); +bool isPHIMove(unsigned int opcode); +bool isMoveOrEquivalent(unsigned int opcode); + +// Function to check address space +bool check_type(const llvm::Value *ptr, unsigned int addrspace); + +// Group of functions that recursively calculate the size of a structure based +// on it's sub-types. +size_t getTypeSize(llvm::Type * const T, bool dereferencePtr = false); +size_t +getTypeSize(llvm::StructType * const ST, bool dereferencePtr = false); +size_t +getTypeSize(llvm::IntegerType * const IT, bool dereferencePtr = false); +size_t +getTypeSize(llvm::FunctionType * const FT, bool dereferencePtr = false); +size_t +getTypeSize(llvm::ArrayType * const AT, bool dereferencePtr = false); +size_t +getTypeSize(llvm::VectorType * const VT, bool dereferencePtr = false); +size_t +getTypeSize(llvm::PointerType * const PT, bool dereferencePtr = false); +size_t +getTypeSize(llvm::OpaqueType * const OT, bool dereferencePtr = false); + +// Group of functions that recursively calculate the number of elements of a +// structure based on it's sub-types. +size_t getNumElements(llvm::Type * const T); +size_t getNumElements(llvm::StructType * const ST); +size_t getNumElements(llvm::IntegerType * const IT); +size_t getNumElements(llvm::FunctionType * const FT); +size_t getNumElements(llvm::ArrayType * const AT); +size_t getNumElements(llvm::VectorType * const VT); +size_t getNumElements(llvm::PointerType * const PT); +size_t getNumElements(llvm::OpaqueType * const OT); +const llvm::Value *getBasePointerValue(const llvm::Value *V); +const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI); + + +int64_t GET_SCALAR_SIZE(llvm::Type* A); + +// Helper functions that check the opcode for status information +bool isLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isSWSExtLoadInst(llvm::MachineInstr *MI); +bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isAtomicInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isVolatileInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); +bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); + + +// Macros that are used to help with switch statements for various data types +// However, these macro's do not return anything unlike the second set below. +#define ExpandCaseTo32bitIntTypes(Instr) \ +case Instr##_i8: \ +case Instr##_i16: \ +case Instr##_i32: + +#define ExpandCaseTo32bitIntTruncTypes(Instr) \ +case Instr##_i16i8: \ +case Instr##_i32i8: \ +case Instr##_i32i16: + +#define ExpandCaseToIntTypes(Instr) \ + ExpandCaseTo32bitIntTypes(Instr) \ +case Instr##_i64: + +#define ExpandCaseToIntTruncTypes(Instr) \ + ExpandCaseTo32bitIntTruncTypes(Instr) \ +case Instr##_i64i8:\ +case Instr##_i64i16:\ +case Instr##_i64i32:\ + +#define ExpandCaseToFloatTypes(Instr) \ + case Instr##_f32: \ +case Instr##_f64: + +#define ExpandCaseToFloatTruncTypes(Instr) \ +case Instr##_f64f32: + +#define ExpandCaseTo32bitScalarTypes(Instr) \ + ExpandCaseTo32bitIntTypes(Instr) \ +case Instr##_f32: + +#define ExpandCaseToAllScalarTypes(Instr) \ + ExpandCaseToFloatTypes(Instr) \ +ExpandCaseToIntTypes(Instr) + +#define ExpandCaseToAllScalarTruncTypes(Instr) \ + ExpandCaseToFloatTruncTypes(Instr) \ +ExpandCaseToIntTruncTypes(Instr) + +// Vector versions of above macros +#define ExpandCaseToVectorIntTypes(Instr) \ + case Instr##_v2i8: \ +case Instr##_v4i8: \ +case Instr##_v2i16: \ +case Instr##_v4i16: \ +case Instr##_v2i32: \ +case Instr##_v4i32: \ +case Instr##_v2i64: + +#define ExpandCaseToVectorIntTruncTypes(Instr) \ +case Instr##_v2i16i8: \ +case Instr##_v4i16i8: \ +case Instr##_v2i32i8: \ +case Instr##_v4i32i8: \ +case Instr##_v2i32i16: \ +case Instr##_v4i32i16: \ +case Instr##_v2i64i8: \ +case Instr##_v2i64i16: \ +case Instr##_v2i64i32: + +#define ExpandCaseToVectorFloatTypes(Instr) \ + case Instr##_v2f32: \ +case Instr##_v4f32: \ +case Instr##_v2f64: + +#define ExpandCaseToVectorFloatTruncTypes(Instr) \ +case Instr##_v2f64f32: + +#define ExpandCaseToVectorByteTypes(Instr) \ + case Instr##_v4i8:\ +case Instr##_v2i16: \ +case Instr##_v4i16: + +#define ExpandCaseToAllVectorTypes(Instr) \ + ExpandCaseToVectorFloatTypes(Instr) \ +ExpandCaseToVectorIntTypes(Instr) + +#define ExpandCaseToAllVectorTruncTypes(Instr) \ + ExpandCaseToVectorFloatTruncTypes(Instr) \ +ExpandCaseToVectorIntTruncTypes(Instr) + +#define ExpandCaseToAllTypes(Instr) \ + ExpandCaseToAllVectorTypes(Instr) \ +ExpandCaseToAllScalarTypes(Instr) + +#define ExpandCaseToAllTruncTypes(Instr) \ + ExpandCaseToAllVectorTruncTypes(Instr) \ +ExpandCaseToAllScalarTruncTypes(Instr) + +#define ExpandCaseToPackedTypes(Instr) \ + case Instr##_v2i8: \ + case Instr##_v4i8: \ + case Instr##_v2i16: \ + case Instr##_v4i16: + +#define ExpandCaseToByteShortTypes(Instr) \ + case Instr##_i8: \ + case Instr##_i16: \ + ExpandCaseToPackedTypes(Instr) + +// Macros that expand into case statements with return values +#define ExpandCaseTo32bitIntReturn(Instr, Return) \ +case Instr##_i8: return Return##_i8;\ +case Instr##_i16: return Return##_i16;\ +case Instr##_i32: return Return##_i32; + +#define ExpandCaseToIntReturn(Instr, Return) \ + ExpandCaseTo32bitIntReturn(Instr, Return) \ +case Instr##_i64: return Return##_i64; + +#define ExpandCaseToFloatReturn(Instr, Return) \ + case Instr##_f32: return Return##_f32;\ +case Instr##_f64: return Return##_f64; + +#define ExpandCaseToAllScalarReturn(Instr, Return) \ + ExpandCaseToFloatReturn(Instr, Return) \ +ExpandCaseToIntReturn(Instr, Return) + +// These macros expand to common groupings of RegClass ID's +#define ExpandCaseTo1CompRegID \ +case AMDIL::GPRI8RegClassID: \ +case AMDIL::GPRI16RegClassID: \ +case AMDIL::GPRI32RegClassID: \ +case AMDIL::GPRF32RegClassID: + +#define ExpandCaseTo2CompRegID \ + case AMDIL::GPRI64RegClassID: \ +case AMDIL::GPRF64RegClassID: \ +case AMDIL::GPRV2I8RegClassID: \ +case AMDIL::GPRV2I16RegClassID: \ +case AMDIL::GPRV2I32RegClassID: \ +case AMDIL::GPRV2F32RegClassID: + +// Macros that expand to case statements for specific bitlengths +#define ExpandCaseTo8BitType(Instr) \ + case Instr##_i8: + +#define ExpandCaseTo16BitType(Instr) \ + case Instr##_v2i8: \ +case Instr##_i16: + +#define ExpandCaseTo32BitType(Instr) \ + case Instr##_v4i8: \ +case Instr##_v2i16: \ +case Instr##_i32: \ +case Instr##_f32: + +#define ExpandCaseTo64BitType(Instr) \ + case Instr##_v4i16: \ +case Instr##_v2i32: \ +case Instr##_v2f32: \ +case Instr##_i64: \ +case Instr##_f64: + +#define ExpandCaseTo128BitType(Instr) \ + case Instr##_v4i32: \ +case Instr##_v4f32: \ +case Instr##_v2i64: \ +case Instr##_v2f64: + +bool commaPrint(int i, llvm::raw_ostream &O); +/// Helper function to get the currently get/set flags. +void getAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes); +void setAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes); + +#endif // AMDILUTILITYFUNCTIONS_H_ diff --git a/lib/Target/AMDIL/CMakeLists.txt b/lib/Target/AMDIL/CMakeLists.txt new file mode 100644 index 00000000000..dac9fe0be65 --- /dev/null +++ b/lib/Target/AMDIL/CMakeLists.txt @@ -0,0 +1,61 @@ +set(LLVM_TARGET_DEFINITIONS AMDIL.td) + +tablegen(LLVM AMDILGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AMDILGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM AMDILGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM AMDILGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM AMDILGenCallingConv.inc -gen-callingconv) +tablegen(LLVM AMDILGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM AMDILGenEDInfo.inc -gen-enhanced-disassembly-info) +tablegen(LLVM AMDILGenIntrinsics.inc -gen-tgt-intrinsic) +tablegen(LLVM AMDILGenCodeEmitter.inc -gen-emitter) +add_public_tablegen_target(AMDILCommonTableGen) + +add_llvm_target(AMDILCodeGen + AMDIL7XXDevice.cpp + AMDILCFGStructurizer.cpp + AMDILDevice.cpp + AMDILDeviceInfo.cpp + AMDILEvergreenDevice.cpp + AMDILFrameLowering.cpp + AMDILInstrInfo.cpp + AMDILIntrinsicInfo.cpp + AMDILISelDAGToDAG.cpp + AMDILISelLowering.cpp + AMDILMachinePeephole.cpp + AMDILMCCodeEmitter.cpp + AMDILNIDevice.cpp + AMDILPeepholeOptimizer.cpp + AMDILRegisterInfo.cpp + AMDILSIDevice.cpp + AMDILSubtarget.cpp + AMDILTargetMachine.cpp + AMDILUtilityFunctions.cpp + AMDGPUTargetMachine.cpp + AMDGPUISelLowering.cpp + AMDGPUConvertToISA.cpp + AMDGPULowerInstructions.cpp + AMDGPULowerShaderInstructions.cpp + AMDGPUReorderPreloadInstructions.cpp + AMDGPUInstrInfo.cpp + AMDGPURegisterInfo.cpp + AMDGPUUtil.cpp + R600CodeEmitter.cpp + R600InstrInfo.cpp + R600ISelLowering.cpp + R600KernelParameters.cpp + R600LowerInstructions.cpp + R600LowerShaderInstructions.cpp + R600RegisterInfo.cpp + SIAssignInterpRegs.cpp + SICodeEmitter.cpp + SIInstrInfo.cpp + SIISelLowering.cpp + SILowerShaderInstructions.cpp + SIMachineFunctionInfo.cpp + SIPropagateImmReads.cpp + SIRegisterInfo.cpp + ) + +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/AMDIL/LLVMBuild.txt b/lib/Target/AMDIL/LLVMBuild.txt new file mode 100644 index 00000000000..ef39aa130e5 --- /dev/null +++ b/lib/Target/AMDIL/LLVMBuild.txt @@ -0,0 +1,32 @@ +;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = AMDIL +parent = Target +has_asmprinter = 0 + +[component_1] +type = Library +name = AMDILCodeGen +parent = AMDIL +required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC AMDILInfo AMDILDesc +add_to_library_groups = AMDIL diff --git a/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.cpp b/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.cpp new file mode 100644 index 00000000000..5b62311c6e6 --- /dev/null +++ b/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.cpp @@ -0,0 +1,107 @@ +//===-- MCTargetDesc/AMDILMCAsmInfo.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDILMCAsmInfo.h" +#ifndef NULL +#define NULL 0 +#endif + +using namespace llvm; +AMDILMCAsmInfo::AMDILMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() +{ + //===------------------------------------------------------------------===// + HasSubsectionsViaSymbols = true; + HasMachoZeroFillDirective = false; + HasMachoTBSSDirective = false; + HasStaticCtorDtorReferenceInStaticMode = false; + LinkerRequiresNonEmptyDwarfLines = true; + MaxInstLength = 16; + PCSymbol = "$"; + SeparatorString = "\n"; + CommentColumn = 40; + CommentString = ";"; + LabelSuffix = ":"; + GlobalPrefix = "@"; + PrivateGlobalPrefix = ";."; + LinkerPrivateGlobalPrefix = "!"; + InlineAsmStart = ";#ASMSTART"; + InlineAsmEnd = ";#ASMEND"; + AssemblerDialect = 0; + AllowQuotesInName = false; + AllowNameToStartWithDigit = false; + AllowPeriodsInName = false; + + //===--- Data Emission Directives -------------------------------------===// + ZeroDirective = ".zero"; + AsciiDirective = ".ascii\t"; + AscizDirective = ".asciz\t"; + Data8bitsDirective = ".byte\t"; + Data16bitsDirective = ".short\t"; + Data32bitsDirective = ".long\t"; + Data64bitsDirective = ".quad\t"; + GPRel32Directive = NULL; + SunStyleELFSectionSwitchSyntax = true; + UsesELFSectionDirectiveForBSS = true; + HasMicrosoftFastStdCallMangling = false; + + //===--- Alignment Information ----------------------------------------===// + AlignDirective = ".align\t"; + AlignmentIsInBytes = true; + TextAlignFillValue = 0; + + //===--- Global Variable Emission Directives --------------------------===// + GlobalDirective = ".global"; + ExternDirective = ".extern"; + HasSetDirective = false; + HasAggressiveSymbolFolding = true; + LCOMMDirectiveType = LCOMM::None; + COMMDirectiveAlignmentIsInBytes = false; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = true; + HasNoDeadStrip = true; + HasSymbolResolver = false; + WeakRefDirective = ".weakref\t"; + WeakDefDirective = ".weakdef\t"; + LinkOnceDirective = NULL; + HiddenVisibilityAttr = MCSA_Hidden; + HiddenDeclarationVisibilityAttr = MCSA_Hidden; + ProtectedVisibilityAttr = MCSA_Protected; + + //===--- Dwarf Emission Directives -----------------------------------===// + HasLEB128 = true; + SupportsDebugInformation = true; + ExceptionsType = ExceptionHandling::None; + DwarfUsesInlineInfoSection = false; + DwarfSectionOffsetDirective = ".offset"; + DwarfUsesLabelOffsetForRanges = true; + + //===--- CBE Asm Translation Table -----------------------------------===// + AsmTransCBE = NULL; +} +const char* +AMDILMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const +{ + switch (AS) { + default: + return NULL; + case 0: + return NULL; + }; + return NULL; +} + +const MCSection* +AMDILMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const +{ + return NULL; +} diff --git a/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.h b/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.h new file mode 100644 index 00000000000..d354b03351c --- /dev/null +++ b/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- MCTargetDesc/AMDILMCAsmInfo.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef AMDILMCASMINFO_H_ +#define AMDILMCASMINFO_H_ + +#include "llvm/MC/MCAsmInfo.h" +namespace llvm { + class Target; + class StringRef; + + class AMDILMCAsmInfo : public MCAsmInfo { + public: + explicit AMDILMCAsmInfo(const Target &T, StringRef &TT); + const char* + getDataASDirective(unsigned int Size, unsigned int AS) const; + const MCSection* getNonexecutableStackSection(MCContext &CTX) const; + }; +} // namespace llvm +#endif // AMDILMCASMINFO_H_ diff --git a/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.cpp b/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.cpp new file mode 100644 index 00000000000..5e60b00bf53 --- /dev/null +++ b/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.cpp @@ -0,0 +1,66 @@ +#include "AMDILMCTargetDesc.h" +#include "AMDILMCAsmInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "AMDILGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "AMDILGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "AMDILGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createAMDILMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitAMDILMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createAMDILMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAMDILMCRegisterInfo(X, 0); + return X; +} + +static MCSubtargetInfo *createAMDILMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo * X = new MCSubtargetInfo(); + InitAMDILMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCCodeGenInfo *createAMDILMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +extern "C" void LLVMInitializeAMDILTargetMC() { + + RegisterMCAsmInfo<AMDILMCAsmInfo> X(TheAMDILTarget); + RegisterMCAsmInfo<AMDILMCAsmInfo> Y(TheAMDGPUTarget); + + TargetRegistry::RegisterMCCodeGenInfo(TheAMDILTarget, createAMDILMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDILMCCodeGenInfo); + + TargetRegistry::RegisterMCInstrInfo(TheAMDILTarget, createAMDILMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDILMCInstrInfo); + + TargetRegistry::RegisterMCRegInfo(TheAMDILTarget, createAMDILMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDILMCRegisterInfo); + + TargetRegistry::RegisterMCSubtargetInfo(TheAMDILTarget, createAMDILMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDILMCSubtargetInfo); + +} diff --git a/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.h b/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.h new file mode 100644 index 00000000000..370769fea25 --- /dev/null +++ b/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.h @@ -0,0 +1,36 @@ +//===-- AMDILMCTargetDesc.h - AMDIL Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AMDIL specific target descriptions. +// +//===----------------------------------------------------------------------===// +// + +#ifndef AMDILMCTARGETDESC_H +#define AMDILMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; + +extern Target TheAMDILTarget; +extern Target TheAMDGPUTarget; + +} // End llvm namespace + +#define GET_REGINFO_ENUM +#include "AMDILGenRegisterInfo.inc" + +#define GET_INSTRINFO_ENUM +#include "AMDILGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "AMDILGenSubtargetInfo.inc" + +#endif // AMDILMCTARGETDESC_H diff --git a/lib/Target/AMDIL/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDIL/MCTargetDesc/CMakeLists.txt new file mode 100644 index 00000000000..39c9b1ec61b --- /dev/null +++ b/lib/Target/AMDIL/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,7 @@ + +add_llvm_library(LLVMAMDILDesc + AMDILMCTargetDesc.cpp + AMDILMCAsmInfo.cpp + ) + +add_dependencies(LLVMAMDILDesc AMDILCommonTableGen) diff --git a/lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt b/lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 00000000000..828009e1a58 --- /dev/null +++ b/lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AMDILDesc +parent = AMDIL +required_libraries = AMDILInfo MC +add_to_library_groups = AMDIL diff --git a/lib/Target/AMDIL/MCTargetDesc/Makefile b/lib/Target/AMDIL/MCTargetDesc/Makefile new file mode 100644 index 00000000000..eb61a5d7b4d --- /dev/null +++ b/lib/Target/AMDIL/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AMDIL/TargetDesc/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMAMDILDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AMDIL/Makefile b/lib/Target/AMDIL/Makefile new file mode 100644 index 00000000000..fc49d096ad4 --- /dev/null +++ b/lib/Target/AMDIL/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/AMDIL/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMAMDILCodeGen +TARGET = AMDIL + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = AMDILGenRegisterInfo.inc AMDILGenInstrInfo.inc \ + AMDILGenAsmWriter.inc AMDILGenDAGISel.inc \ + AMDILGenSubtargetInfo.inc AMDILGenCodeEmitter.inc \ + AMDILGenCallingConv.inc AMDILGenEDInfo.inc \ + AMDILGenIntrinsics.inc + +DIRS = TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AMDIL/R600ISelLowering.cpp b/lib/Target/AMDIL/R600ISelLowering.cpp new file mode 100644 index 00000000000..94c183ec274 --- /dev/null +++ b/lib/Target/AMDIL/R600ISelLowering.cpp @@ -0,0 +1,116 @@ +//===-- R600ISelLowering.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +R600TargetLowering::R600TargetLowering(TargetMachine &TM) : + AMDGPUTargetLowering(TM), + TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) +{ + setOperationAction(ISD::MUL, MVT::i64, Expand); +// setSchedulingPreference(Sched::VLIW); + addRegisterClass(MVT::v4f32, &AMDIL::R600_Reg128RegClass); + addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); +} + +MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( + MachineInstr * MI, MachineBasicBlock * BB) const +{ + MachineFunction * MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + switch (MI->getOpcode()) { + default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + /* XXX: Use helper function from AMDGPULowerShaderInstructions here */ + case AMDIL::TGID_X: + addLiveIn(MI, MF, MRI, AMDIL::T1_X); + break; + case AMDIL::TGID_Y: + addLiveIn(MI, MF, MRI, AMDIL::T1_Y); + break; + case AMDIL::TGID_Z: + addLiveIn(MI, MF, MRI, AMDIL::T1_Z); + break; + case AMDIL::TIDIG_X: + addLiveIn(MI, MF, MRI, AMDIL::T0_X); + break; + case AMDIL::TIDIG_Y: + addLiveIn(MI, MF, MRI, AMDIL::T0_Y); + break; + case AMDIL::TIDIG_Z: + addLiveIn(MI, MF, MRI, AMDIL::T0_Z); + break; + case AMDIL::NGROUPS_X: + lowerImplicitParameter(MI, *BB, MRI, 0); + break; + case AMDIL::NGROUPS_Y: + lowerImplicitParameter(MI, *BB, MRI, 1); + break; + case AMDIL::NGROUPS_Z: + lowerImplicitParameter(MI, *BB, MRI, 2); + break; + case AMDIL::GLOBAL_SIZE_X: + lowerImplicitParameter(MI, *BB, MRI, 3); + break; + case AMDIL::GLOBAL_SIZE_Y: + lowerImplicitParameter(MI, *BB, MRI, 4); + break; + case AMDIL::GLOBAL_SIZE_Z: + lowerImplicitParameter(MI, *BB, MRI, 5); + break; + case AMDIL::LOCAL_SIZE_X: + lowerImplicitParameter(MI, *BB, MRI, 6); + break; + case AMDIL::LOCAL_SIZE_Y: + lowerImplicitParameter(MI, *BB, MRI, 7); + break; + case AMDIL::LOCAL_SIZE_Z: + lowerImplicitParameter(MI, *BB, MRI, 8); + break; + } + MI->eraseFromParent(); + return BB; +} + +void R600TargetLowering::addLiveIn(MachineInstr * MI, + MachineFunction * MF, MachineRegisterInfo & MRI, unsigned reg) const +{ + MRI.addLiveIn(reg, MI->getOperand(0).getReg()); + BuildMI(MF->front(), MF->front().begin(), DebugLoc(), TII->get(TargetOpcode::COPY)) + .addOperand(MI->getOperand(0)) + .addReg(reg); +} + +void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, + MachineRegisterInfo & MRI, unsigned dword_offset) const +{ + MachineBasicBlock::iterator I = *MI; + unsigned offsetReg = MRI.createVirtualRegister(&AMDIL::R600_TReg32_XRegClass); + MRI.setRegClass(MI->getOperand(0).getReg(), &AMDIL::R600_TReg32_XRegClass); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::MOV), offsetReg) + .addReg(AMDIL::ALU_LITERAL_X) + .addImm(dword_offset * 4); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::VTX_READ_eg)) + .addOperand(MI->getOperand(0)) + .addReg(offsetReg) + .addImm(0); +} diff --git a/lib/Target/AMDIL/R600ISelLowering.h b/lib/Target/AMDIL/R600ISelLowering.h new file mode 100644 index 00000000000..ed59a00e7e2 --- /dev/null +++ b/lib/Target/AMDIL/R600ISelLowering.h @@ -0,0 +1,43 @@ +//===-- R600ISelLowering.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef R600ISELLOWERING_H +#define R600ISELLOWERING_H + +#include "AMDGPUISelLowering.h" + +namespace llvm { + +class R600InstrInfo; + +class R600TargetLowering : public AMDGPUTargetLowering +{ +public: + R600TargetLowering(TargetMachine &TM); + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock * BB) const; + +private: + const R600InstrInfo * TII; + + void addLiveIn(MachineInstr * MI, MachineFunction * MF, + MachineRegisterInfo & MRI, unsigned reg) const; + + void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, + MachineRegisterInfo & MRI, unsigned dword_offset) const; + +}; + +} // End namespace llvm; + +#endif // R600ISELLOWERING_H diff --git a/lib/Target/AMDIL/R600InstrInfo.cpp b/lib/Target/AMDIL/R600InstrInfo.cpp new file mode 100644 index 00000000000..0c7ffc4334d --- /dev/null +++ b/lib/Target/AMDIL/R600InstrInfo.cpp @@ -0,0 +1,123 @@ +//===-- R600InstrInfo.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "R600InstrInfo.h" +#include "AMDGPUTargetMachine.h" +#include "R600RegisterInfo.h" + +using namespace llvm; + +R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) + : AMDGPUInstrInfo(tm), + RI(tm, *this), + TM(tm) + { } + +const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const +{ + return RI; +} + +bool R600InstrInfo::isTrig(const MachineInstr &MI) const +{ + return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; +} + +void +R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const +{ + + unsigned subRegMap[4] = {AMDIL::sel_x, AMDIL::sel_y, AMDIL::sel_z, AMDIL::sel_w}; + + if (AMDIL::R600_Reg128RegClass.contains(DestReg) + && AMDIL::R600_Reg128RegClass.contains(SrcReg)) { + for (unsigned i = 0; i < 4; i++) { + BuildMI(MBB, MI, DL, get(AMDIL::MOV)) + .addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define) + .addReg(RI.getSubReg(SrcReg, subRegMap[i])) + .addReg(DestReg, RegState::Define | RegState::Implicit); + } + } else { + + /* We can't copy vec4 registers */ + assert(!AMDIL::R600_Reg128RegClass.contains(DestReg) + && !AMDIL::R600_Reg128RegClass.contains(SrcReg)); + + BuildMI(MBB, MI, DL, get(AMDIL::MOV), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } +} + +unsigned R600InstrInfo::getISAOpcode(unsigned opcode) const +{ + switch (opcode) { + default: return AMDGPUInstrInfo::getISAOpcode(opcode); + case AMDIL::CUSTOM_ADD_i32: + return AMDIL::ADD_INT; + case AMDIL::CUSTOM_XOR_i32: + return AMDIL::XOR_INT; + case AMDIL::MOVE_f32: + case AMDIL::MOVE_i32: + return AMDIL::MOV; + case AMDIL::SHR_i32: + return getLSHRop(); + } +} + +unsigned R600InstrInfo::getLSHRop() const +{ + unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration(); + if (gen < AMDILDeviceInfo::HD5XXX) { + return AMDIL::LSHR_r600; + } else { + return AMDIL::LSHR_eg; + } +} + +unsigned R600InstrInfo::getMULHI_UINT() const +{ + unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration(); + + if (gen < AMDILDeviceInfo::HD5XXX) { + return AMDIL::MULHI_UINT_r600; + } else { + return AMDIL::MULHI_UINT_eg; + } +} + +unsigned R600InstrInfo::getMULLO_UINT() const +{ + unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration(); + + if (gen < AMDILDeviceInfo::HD5XXX) { + return AMDIL::MULLO_UINT_r600; + } else { + return AMDIL::MULLO_UINT_eg; + } +} + +unsigned R600InstrInfo::getRECIP_UINT() const +{ + const AMDILDevice * dev = TM.getSubtarget<AMDILSubtarget>().device(); + + if (dev->getGeneration() < AMDILDeviceInfo::HD5XXX) { + return AMDIL::RECIP_UINT_r600; + } else if (dev->getDeviceFlag() != OCL_DEVICE_CAYMAN) { + return AMDIL::RECIP_UINT_eg; + } else { + return AMDIL::RECIP_UINT_cm; + } +} diff --git a/lib/Target/AMDIL/R600InstrInfo.h b/lib/Target/AMDIL/R600InstrInfo.h new file mode 100644 index 00000000000..aedaa9f47f3 --- /dev/null +++ b/lib/Target/AMDIL/R600InstrInfo.h @@ -0,0 +1,74 @@ +//===-- R600InstrInfo.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef R600INSTRUCTIONINFO_H_ +#define R600INSTRUCTIONINFO_H_ + +#include "AMDIL.h" +#include "AMDILInstrInfo.h" +#include "R600RegisterInfo.h" + +#include <map> + +namespace llvm { + + struct InstrGroup { + unsigned amdil; + unsigned r600; + unsigned eg; + unsigned cayman; + }; + + class AMDGPUTargetMachine; + class MachineFunction; + class MachineInstr; + class MachineInstrBuilder; + + class R600InstrInfo : public AMDGPUInstrInfo { + private: + const R600RegisterInfo RI; + AMDGPUTargetMachine &TM; + + public: + explicit R600InstrInfo(AMDGPUTargetMachine &tm); + + const R600RegisterInfo &getRegisterInfo() const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual unsigned getISAOpcode(unsigned opcode) const; + bool isTrig(const MachineInstr &MI) const; + + unsigned getLSHRop() const; + unsigned getMULHI_UINT() const; + unsigned getMULLO_UINT() const; + unsigned getRECIP_UINT() const; + + }; + +} // End llvm namespace + +namespace R600_InstFlag { + enum TIF { + TRANS_ONLY = (1 << 0), + TEX = (1 << 1), + REDUCTION = (1 << 2), + FC = (1 << 3), + TRIG = (1 << 4), + OP3 = (1 << 5) + }; +} + +#endif // R600INSTRINFO_H_ diff --git a/lib/Target/AMDIL/R600RegisterInfo.cpp b/lib/Target/AMDIL/R600RegisterInfo.cpp new file mode 100644 index 00000000000..96507b104cf --- /dev/null +++ b/lib/Target/AMDIL/R600RegisterInfo.cpp @@ -0,0 +1,102 @@ +//===-- R600RegisterInfo.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "R600RegisterInfo.h" +#include "AMDGPUTargetMachine.h" + +using namespace llvm; + +R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm, + const TargetInstrInfo &tii) +: AMDGPURegisterInfo(tm, tii), + TM(tm), + TII(tii) + { } + +BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const +{ + BitVector Reserved(getNumRegs()); + Reserved.set(AMDIL::ZERO); + Reserved.set(AMDIL::HALF); + Reserved.set(AMDIL::ONE); + Reserved.set(AMDIL::ONE_INT); + Reserved.set(AMDIL::NEG_HALF); + Reserved.set(AMDIL::NEG_ONE); + Reserved.set(AMDIL::PV_X); + Reserved.set(AMDIL::ALU_LITERAL_X); + + for (TargetRegisterClass::iterator I = AMDIL::R600_CReg32RegClass.begin(), + E = AMDIL::R600_CReg32RegClass.end(); I != E; ++I) { + Reserved.set(*I); + } + + for (MachineFunction::const_iterator BB = MF.begin(), + BB_E = MF.end(); BB != BB_E; ++BB) { + const MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + const MachineInstr &MI = *I; + if (MI.getOpcode() == AMDIL::RESERVE_REG) { + if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) { + Reserved.set(MI.getOperand(0).getReg()); + } + } + } + } + return Reserved; +} + +const TargetRegisterClass * +R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const +{ + switch (rc->getID()) { + case AMDIL::GPRV4F32RegClassID: + case AMDIL::GPRV4I32RegClassID: + return &AMDIL::R600_Reg128RegClass; + case AMDIL::GPRF32RegClassID: + case AMDIL::GPRI32RegClassID: + return &AMDIL::R600_Reg32RegClass; + default: return rc; + } +} + +unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const +{ + switch(reg) { + case AMDIL::ZERO: return 248; + case AMDIL::ONE: + case AMDIL::NEG_ONE: return 249; + case AMDIL::ONE_INT: return 250; + case AMDIL::HALF: + case AMDIL::NEG_HALF: return 252; + case AMDIL::ALU_LITERAL_X: return 253; + default: return getHWRegIndexGen(reg); + } +} + +unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const +{ + switch(reg) { + case AMDIL::ZERO: + case AMDIL::ONE: + case AMDIL::ONE_INT: + case AMDIL::NEG_ONE: + case AMDIL::HALF: + case AMDIL::NEG_HALF: + case AMDIL::ALU_LITERAL_X: + return 0; + default: return getHWRegChanGen(reg); + } +} + +#include "R600HwRegInfo.include" diff --git a/lib/Target/AMDIL/R600RegisterInfo.h b/lib/Target/AMDIL/R600RegisterInfo.h new file mode 100644 index 00000000000..95a44f971a0 --- /dev/null +++ b/lib/Target/AMDIL/R600RegisterInfo.h @@ -0,0 +1,44 @@ +//===-- R600RegisterInfo.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef R600REGISTERINFO_H_ +#define R600REGISTERINFO_H_ + +#include "AMDGPUTargetMachine.h" +#include "AMDILRegisterInfo.h" + +namespace llvm { + + class R600TargetMachine; + class TargetInstrInfo; + + struct R600RegisterInfo : public AMDGPURegisterInfo + { + AMDGPUTargetMachine &TM; + const TargetInstrInfo &TII; + + R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); + + virtual BitVector getReservedRegs(const MachineFunction &MF) const; + + virtual const TargetRegisterClass * + getISARegClass(const TargetRegisterClass * rc) const; + unsigned getHWRegIndex(unsigned reg) const; + unsigned getHWRegChan(unsigned reg) const; +private: + unsigned getHWRegChanGen(unsigned reg) const; + unsigned getHWRegIndexGen(unsigned reg) const; + }; +} // End namespace llvm + +#endif // AMDIDSAREGISTERINFO_H_ diff --git a/lib/Target/AMDIL/SIISelLowering.cpp b/lib/Target/AMDIL/SIISelLowering.cpp new file mode 100644 index 00000000000..1a4b47ecbf5 --- /dev/null +++ b/lib/Target/AMDIL/SIISelLowering.cpp @@ -0,0 +1,151 @@ +//===-- SIISelLowering.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "SIISelLowering.h" +#include "SIInstrInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +SITargetLowering::SITargetLowering(TargetMachine &TM) : + AMDGPUTargetLowering(TM), + TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) +{ + addRegisterClass(MVT::v4f32, &AMDIL::VReg_128RegClass); + addRegisterClass(MVT::f32, &AMDIL::VReg_32RegClass); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); +} + +MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( + MachineInstr * MI, MachineBasicBlock * BB) const +{ + const struct TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); + MachineBasicBlock::iterator I = MI; + + if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) { + AppendS_WAITCNT(MI, *BB, llvm::next(I)); + } + + switch (MI->getOpcode()) { + default: + return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case AMDIL::SI_INTERP: + LowerSI_INTERP(MI, *BB, I, MRI); + break; + case AMDIL::SI_INTERP_CONST: + LowerSI_INTERP_CONST(MI, *BB, I); + break; + case AMDIL::SI_V_CNDLT: + LowerSI_V_CNDLT(MI, *BB, I, MRI); + break; + case AMDIL::USE_SGPR_32: + case AMDIL::USE_SGPR_64: + lowerUSE_SGPR(MI, BB->getParent(), MRI); + MI->eraseFromParent(); + break; + case AMDIL::VS_LOAD_BUFFER_INDEX: + addLiveIn(MI, BB->getParent(), MRI, TII, AMDIL::VGPR0); + MI->eraseFromParent(); + break; + } + return BB; +} + +void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I) const +{ + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::S_WAITCNT)) + .addImm(0); +} + +void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const +{ + unsigned tmp = MRI.createVirtualRegister(&AMDIL::VReg_32RegClass); + MachineOperand dst = MI->getOperand(0); + MachineOperand iReg = MI->getOperand(1); + MachineOperand jReg = MI->getOperand(2); + MachineOperand attr_chan = MI->getOperand(3); + MachineOperand attr = MI->getOperand(4); + MachineOperand params = MI->getOperand(5); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::S_MOV_B32)) + .addReg(AMDIL::M0) + .addOperand(params); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::V_INTERP_P1_F32), tmp) + .addOperand(iReg) + .addOperand(attr_chan) + .addOperand(attr); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::V_INTERP_P2_F32)) + .addOperand(dst) + .addReg(tmp) + .addOperand(jReg) + .addOperand(attr_chan) + .addOperand(attr); + + MI->eraseFromParent(); +} + +void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, + MachineBasicBlock &BB, MachineBasicBlock::iterator I) const +{ + MachineOperand dst = MI->getOperand(0); + MachineOperand attr_chan = MI->getOperand(1); + MachineOperand attr = MI->getOperand(2); + MachineOperand params = MI->getOperand(3); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::S_MOV_B32)) + .addReg(AMDIL::M0) + .addOperand(params); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::V_INTERP_MOV_F32)) + .addOperand(dst) + .addOperand(attr_chan) + .addOperand(attr); + + MI->eraseFromParent(); +} + +void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const +{ + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::V_CMP_LT_F32_e32)) + .addOperand(MI->getOperand(1)) + .addReg(AMDIL::SREG_LIT_0); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::V_CNDMASK_B32)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(2)) + .addOperand(MI->getOperand(3)); + + MI->eraseFromParent(); +} + +void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI, + MachineFunction * MF, MachineRegisterInfo & MRI) const +{ + const struct TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); + unsigned dstReg = MI->getOperand(0).getReg(); + int64_t newIndex = MI->getOperand(1).getImm(); + const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg); + + unsigned newReg = dstClass->getRegister(newIndex); + addLiveIn(MI, MF, MRI, TII, newReg); +} + diff --git a/lib/Target/AMDIL/SIISelLowering.h b/lib/Target/AMDIL/SIISelLowering.h new file mode 100644 index 00000000000..e7a79f8e215 --- /dev/null +++ b/lib/Target/AMDIL/SIISelLowering.h @@ -0,0 +1,44 @@ +//===-- SIISelLowering.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef SIISELLOWERING_H +#define SIISELLOWERING_H + +#include "AMDGPUISelLowering.h" +#include "SIInstrInfo.h" + +namespace llvm { + +class SITargetLowering : public AMDGPUTargetLowering +{ + const SIInstrInfo * TII; + + void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I) const; + void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; + void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I) const; + void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; + void lowerUSE_SGPR(MachineInstr *MI, MachineFunction * MF, + MachineRegisterInfo & MRI) const; +public: + SITargetLowering(TargetMachine &tm); + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, + MachineBasicBlock * BB) const; +}; + +} // End namespace llvm + +#endif //SIISELLOWERING_H diff --git a/lib/Target/AMDIL/SIInstrInfo.cpp b/lib/Target/AMDIL/SIInstrInfo.cpp new file mode 100644 index 00000000000..f9eb59a9229 --- /dev/null +++ b/lib/Target/AMDIL/SIInstrInfo.cpp @@ -0,0 +1,135 @@ +//===-- SIInstrInfo.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#include "SIInstrInfo.h" +#include "AMDGPUTargetMachine.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCInstrDesc.h" + +#include <stdio.h> + +using namespace llvm; + +SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) + : AMDGPUInstrInfo(tm), + RI(tm, *this), + TM(tm) + { } + +const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const +{ + return RI; +} + +void +SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const +{ + BuildMI(MBB, MI, DL, get(AMDIL::V_MOV_B32_e32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); +} + +unsigned SIInstrInfo::getEncodingType(const MachineInstr &MI) const +{ + return get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK; +} + +unsigned SIInstrInfo::getEncodingBytes(const MachineInstr &MI) const +{ + + /* Instructions with literal constants are expanded to 64-bits, and + * the constant is stored in bits [63:32] */ + for (unsigned i = 0; i < MI.getNumOperands(); i++) { + if (MI.getOperand(i).getType() == MachineOperand::MO_FPImmediate) { + return 8; + } + } + + /* This instruction always has a literal */ + if (MI.getOpcode() == AMDIL::S_MOV_IMM_I32) { + return 8; + } + + unsigned encoding_type = getEncodingType(MI); + switch (encoding_type) { + case SIInstrEncodingType::EXP: + case SIInstrEncodingType::LDS: + case SIInstrEncodingType::MUBUF: + case SIInstrEncodingType::MTBUF: + case SIInstrEncodingType::MIMG: + case SIInstrEncodingType::VOP3: + return 8; + default: + return 4; + } +} + +MachineInstr * SIInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, + DebugLoc DL) const +{ + + switch (MI.getOpcode()) { + default: break; + case AMDIL::ABS_f32: return convertABS_f32(MI, MF, DL); + } + + MachineInstr * newMI = AMDGPUInstrInfo::convertToISA(MI, MF, DL); + const MCInstrDesc &newDesc = get(newMI->getOpcode()); + + /* If this instruction was converted to a VOP3, we need to add the extra + * operands for abs, clamp, omod, and negate. */ + if (getEncodingType(*newMI) == SIInstrEncodingType::VOP3 + && newMI->getNumOperands() < newDesc.getNumOperands()) { + MachineInstrBuilder builder(newMI); + for (unsigned op_idx = newMI->getNumOperands(); + op_idx < newDesc.getNumOperands(); op_idx++) { + builder.addImm(0); + } + } + return newMI; +} + +unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const +{ + switch (AMDILopcode) { + case AMDIL::MAD_f32: return AMDIL::V_MAD_LEGACY_F32; + default: return AMDGPUInstrInfo::getISAOpcode(AMDILopcode); + } +} + +MachineInstr * SIInstrInfo::convertABS_f32(MachineInstr & absInstr, + MachineFunction &MF, DebugLoc DL) const +{ + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineOperand &dst = absInstr.getOperand(0); + + /* Convert the desination register to the VReg_32 class */ + if (TargetRegisterInfo::isVirtualRegister(dst.getReg())) { + MRI.setRegClass(dst.getReg(), &AMDIL::VReg_32RegClass); + } + + return BuildMI(MF, DL, get(AMDIL::V_MOV_B32_e64)) + .addOperand(absInstr.getOperand(0)) + .addOperand(absInstr.getOperand(1)) + /* VSRC1-2 are unused, but we still need to fill all the + * operand slots, so we just reuse the VSRC0 operand */ + .addOperand(absInstr.getOperand(1)) + .addOperand(absInstr.getOperand(1)) + .addImm(1) // ABS + .addImm(0) // CLAMP + .addImm(0) // OMOD + .addImm(0); // NEG +} diff --git a/lib/Target/AMDIL/SIInstrInfo.h b/lib/Target/AMDIL/SIInstrInfo.h new file mode 100644 index 00000000000..32baaa99e63 --- /dev/null +++ b/lib/Target/AMDIL/SIInstrInfo.h @@ -0,0 +1,92 @@ +//===-- SIInstrInfo.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#ifndef SIINSTRINFO_H +#define SIINSTRINFO_H + +#include "AMDGPUInstrInfo.h" +#include "SIRegisterInfo.h" + +namespace llvm { + + class SIInstrInfo : public AMDGPUInstrInfo { + private: + const SIRegisterInfo RI; + AMDGPUTargetMachine &TM; + + MachineInstr * convertABS_f32(MachineInstr & absInstr, MachineFunction &MF, + DebugLoc DL) const; + + public: + explicit SIInstrInfo(AMDGPUTargetMachine &tm); + + const SIRegisterInfo &getRegisterInfo() const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + unsigned getEncodingType(const MachineInstr &MI) const; + + unsigned getEncodingBytes(const MachineInstr &MI) const; + + uint64_t getBinaryCode(const MachineInstr &MI, bool encodOpcode = false) const; + + virtual MachineInstr * convertToISA(MachineInstr & MI, MachineFunction &MF, + DebugLoc DL) const; + + virtual unsigned getISAOpcode(unsigned AMDILopcode) const; + + }; + +} // End namespace llvm + +/* These must be kept in sync with SIInstructions.td and also the + * InstrEncodingInfo array in SIInstrInfo.cpp. + * + * NOTE: This enum is only used to identify the encoding type within LLVM, + * the actual encoding type that is part of the instruction format is different + */ +namespace SIInstrEncodingType { + enum Encoding { + EXP = 0, + LDS = 1, + MIMG = 2, + MTBUF = 3, + MUBUF = 4, + SMRD = 5, + SOP1 = 6, + SOP2 = 7, + SOPC = 8, + SOPK = 9, + SOPP = 10, + VINTRP = 11, + VOP1 = 12, + VOP2 = 13, + VOP3 = 14, + VOPC = 15 + }; +} + +#define SI_INSTR_FLAGS_ENCODING_MASK 0xf + +namespace SIInstrFlags { + enum Flags { + /* First 4 bits are the instruction encoding */ + NEED_WAIT = 1 << 4 + }; +} + +#endif //SIINSTRINFO_H diff --git a/lib/Target/AMDIL/SIMachineFunctionInfo.cpp b/lib/Target/AMDIL/SIMachineFunctionInfo.cpp new file mode 100644 index 00000000000..eace40c226c --- /dev/null +++ b/lib/Target/AMDIL/SIMachineFunctionInfo.cpp @@ -0,0 +1,22 @@ +//===-- SIMachineFunctionInfo.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#include "SIMachineFunctionInfo.h" + +using namespace llvm; + +SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) + : MachineFunctionInfo(), + spi_ps_input_addr(0) + { } diff --git a/lib/Target/AMDIL/SIMachineFunctionInfo.h b/lib/Target/AMDIL/SIMachineFunctionInfo.h new file mode 100644 index 00000000000..5647de9d81f --- /dev/null +++ b/lib/Target/AMDIL/SIMachineFunctionInfo.h @@ -0,0 +1,35 @@ +//===-- SIMachineFunctionInfo.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#ifndef _SIMACHINEFUNCTIONINFO_H_ +#define _SIMACHINEFUNCTIONINFO_H_ + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +class SIMachineFunctionInfo : public MachineFunctionInfo { + + private: + + public: + SIMachineFunctionInfo(const MachineFunction &MF); + unsigned spi_ps_input_addr; + +}; + +} // End namespace llvm + + +#endif //_SIMACHINEFUNCTIONINFO_H_ diff --git a/lib/Target/AMDIL/SIRegisterInfo.cpp b/lib/Target/AMDIL/SIRegisterInfo.cpp new file mode 100644 index 00000000000..203ac9f5f28 --- /dev/null +++ b/lib/Target/AMDIL/SIRegisterInfo.cpp @@ -0,0 +1,66 @@ +//===-- SIRegisterInfo.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#include "SIRegisterInfo.h" +#include "AMDGPUTargetMachine.h" +#include "AMDGPUUtil.h" + +using namespace llvm; + +SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm, + const TargetInstrInfo &tii) +: AMDGPURegisterInfo(tm, tii), + TM(tm), + TII(tii) + { } + +BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const +{ + BitVector Reserved(getNumRegs()); + return Reserved; +} + +unsigned SIRegisterInfo::getBinaryCode(unsigned reg) const +{ + switch (reg) { + case AMDIL::M0: return 124; + case AMDIL::SREG_LIT_0: return 128; + default: return getHWRegNum(reg); + } +} + +bool SIRegisterInfo::isBaseRegClass(unsigned regClassID) const +{ + switch (regClassID) { + default: return true; + case AMDIL::AllReg_32RegClassID: + case AMDIL::AllReg_64RegClassID: + return false; + } +} + +const TargetRegisterClass * +SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const +{ + switch (rc->getID()) { + case AMDIL::GPRF32RegClassID: + return &AMDIL::VReg_32RegClass; + case AMDIL::GPRV4F32RegClassID: + case AMDIL::GPRV4I32RegClassID: + return &AMDIL::VReg_128RegClass; + default: return rc; + } +} + +#include "SIRegisterGetHWRegNum.include" diff --git a/lib/Target/AMDIL/SIRegisterInfo.h b/lib/Target/AMDIL/SIRegisterInfo.h new file mode 100644 index 00000000000..c797e3c8ace --- /dev/null +++ b/lib/Target/AMDIL/SIRegisterInfo.h @@ -0,0 +1,46 @@ +//===-- SIRegisterInfo.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + + +#ifndef SIREGISTERINFO_H_ +#define SIREGISTERINFO_H_ + +#include "AMDGPURegisterInfo.h" + +namespace llvm { + + class AMDGPUTargetMachine; + class TargetInstrInfo; + + struct SIRegisterInfo : public AMDGPURegisterInfo + { + AMDGPUTargetMachine &TM; + const TargetInstrInfo &TII; + + SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); + + virtual BitVector getReservedRegs(const MachineFunction &MF) const; + virtual unsigned getBinaryCode(unsigned reg) const; + + virtual bool isBaseRegClass(unsigned regClassID) const; + + virtual const TargetRegisterClass * + getISARegClass(const TargetRegisterClass * rc) const; + + unsigned getHWRegNum(unsigned reg) const; + + }; + +} // End namespace llvm + +#endif // SIREGISTERINFO_H_ diff --git a/lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp b/lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp new file mode 100644 index 00000000000..5dee0cb7c05 --- /dev/null +++ b/lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp @@ -0,0 +1,32 @@ +//===-- TargetInfo/AMDILTargetInfo.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDIL.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +/// The target for the AMDIL backend +Target llvm::TheAMDILTarget; + +/// The target for the AMDGPU backend +Target llvm::TheAMDGPUTarget; + +/// Extern function to initialize the targets for the AMDIL backend +extern "C" void LLVMInitializeAMDILTargetInfo() { + RegisterTarget<Triple::amdil, false> + IL(TheAMDILTarget, "amdil", "ATI graphics cards"); + + RegisterTarget<Triple::r600, false> + R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX"); +} diff --git a/lib/Target/AMDIL/TargetInfo/CMakeLists.txt b/lib/Target/AMDIL/TargetInfo/CMakeLists.txt new file mode 100644 index 00000000000..15ff3f9b073 --- /dev/null +++ b/lib/Target/AMDIL/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAMDILInfo + AMDILTargetInfo.cpp + ) + +add_dependencies(LLVMAMDILInfo AMDILCodeGenTable_gen) diff --git a/lib/Target/AMDIL/TargetInfo/LLVMBuild.txt b/lib/Target/AMDIL/TargetInfo/LLVMBuild.txt new file mode 100644 index 00000000000..746d7cd7c72 --- /dev/null +++ b/lib/Target/AMDIL/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/AMDIL/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AMDILInfo +parent = AMDIL +required_libraries = MC Support +add_to_library_groups = AMDIL diff --git a/lib/Target/AMDIL/TargetInfo/Makefile b/lib/Target/AMDIL/TargetInfo/Makefile new file mode 100644 index 00000000000..2c02e9d929f --- /dev/null +++ b/lib/Target/AMDIL/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AMDIL/TargetInfo/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAMDILInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common |