diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-01-04 15:38:37 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-01-04 21:05:09 +0000 |
commit | aed37cbee8efb59b2f1a6bc69adcbaecd9e4fa13 (patch) | |
tree | 5748d373dc01011b860049208135c3d942882e29 | |
parent | 05c143cc049a87c515ecdc5695e5912da60cf5cb (diff) |
radeon/llvm: Remove backend code from Mesa
This code now lives in an external tree.
For the next Mesa release fetch the code from the master branch
of this LLVM repo:
http://cgit.freedesktop.org/~tstellar/llvm/
For all subsequent Mesa releases, fetch the code from the official LLVM
project:
www.llvm.org
99 files changed, 0 insertions, 19168 deletions
diff --git a/src/gallium/drivers/radeon/.gitignore b/src/gallium/drivers/radeon/.gitignore deleted file mode 100644 index b723d73ff40..00000000000 --- a/src/gallium/drivers/radeon/.gitignore +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | AMDGPUInstrEnums.h.include | ||
2 | AMDGPUInstrEnums.include | ||
3 | AMDGPUInstrEnums.td | ||
4 | AMDILGenAsmWriter.inc | ||
5 | AMDILGenCallingConv.inc | ||
6 | AMDILGenCodeEmitter.inc | ||
7 | AMDILGenDAGISel.inc | ||
8 | AMDILGenEDInfo.inc | ||
9 | AMDILGenInstrInfo.inc | ||
10 | AMDILGenIntrinsics.inc | ||
11 | AMDILGenRegisterInfo.inc | ||
12 | AMDILGenSubtargetInfo.inc | ||
13 | R600HwRegInfo.include | ||
14 | R600Intrinsics.td | ||
15 | R600RegisterInfo.td | ||
16 | SIRegisterGetHWRegNum.inc | ||
17 | SIRegisterInfo.td | ||
18 | loader | ||
diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h deleted file mode 100644 index f484caa9a3e..00000000000 --- a/src/gallium/drivers/radeon/AMDGPU.h +++ /dev/null | |||
@@ -1,46 +0,0 @@ | |||
1 | //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | |||
10 | #ifndef AMDGPU_H | ||
11 | #define AMDGPU_H | ||
12 | |||
13 | #include "AMDGPUTargetMachine.h" | ||
14 | #include "llvm/Support/TargetRegistry.h" | ||
15 | #include "llvm/Target/TargetMachine.h" | ||
16 | |||
17 | namespace llvm { | ||
18 | |||
19 | class FunctionPass; | ||
20 | class AMDGPUTargetMachine; | ||
21 | |||
22 | // R600 Passes | ||
23 | FunctionPass* createR600KernelParametersPass(const TargetData* TD); | ||
24 | FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); | ||
25 | |||
26 | // SI Passes | ||
27 | FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); | ||
28 | FunctionPass *createSILowerFlowControlPass(TargetMachine &tm); | ||
29 | FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); | ||
30 | FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm); | ||
31 | |||
32 | // Passes common to R600 and SI | ||
33 | FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); | ||
34 | |||
35 | } // End namespace llvm | ||
36 | |||
37 | namespace ShaderType { | ||
38 | enum Type { | ||
39 | PIXEL = 0, | ||
40 | VERTEX = 1, | ||
41 | GEOMETRY = 2, | ||
42 | COMPUTE = 3 | ||
43 | }; | ||
44 | } | ||
45 | |||
46 | #endif // AMDGPU_H | ||
diff --git a/src/gallium/drivers/radeon/AMDGPU.td b/src/gallium/drivers/radeon/AMDGPU.td deleted file mode 100644 index 5086f63d79c..00000000000 --- a/src/gallium/drivers/radeon/AMDGPU.td +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | //===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | |||
10 | // Include AMDIL TD files | ||
11 | include "AMDILBase.td" | ||
12 | |||
13 | |||
14 | def AMDGPUInstrInfo : InstrInfo {} | ||
15 | |||
16 | //===----------------------------------------------------------------------===// | ||
17 | // Declare the target which we are implementing | ||
18 | //===----------------------------------------------------------------------===// | ||
19 | def AMDGPUAsmWriter : AsmWriter { | ||
20 | string AsmWriterClassName = "InstPrinter"; | ||
21 | int Variant = 0; | ||
22 | bit isMCAsmWriter = 1; | ||
23 | } | ||
24 | |||
25 | def AMDGPU : Target { | ||
26 | // Pull in Instruction Info: | ||
27 | let InstructionSet = AMDGPUInstrInfo; | ||
28 | let AssemblyWriters = [AMDGPUAsmWriter]; | ||
29 | } | ||
30 | |||
31 | // Include AMDGPU TD files | ||
32 | include "R600Schedule.td" | ||
33 | include "SISchedule.td" | ||
34 | include "Processors.td" | ||
35 | include "AMDGPUInstrInfo.td" | ||
36 | include "AMDGPUIntrinsics.td" | ||
37 | include "AMDGPURegisterInfo.td" | ||
38 | include "AMDGPUInstructions.td" | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp deleted file mode 100644 index 392791cd49d..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp +++ /dev/null | |||
@@ -1,134 +0,0 @@ | |||
1 | //===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // The AMDGPUAsmPrinter is used to print both assembly string and also binary | ||
11 | // code. When passed an MCAsmStreamer it prints assembly and when passed | ||
12 | // an MCObjectStreamer it outputs binary code. | ||
13 | // | ||
14 | //===----------------------------------------------------------------------===// | ||
15 | // | ||
16 | |||
17 | |||
18 | #include "AMDGPUAsmPrinter.h" | ||
19 | #include "AMDGPU.h" | ||
20 | #include "SIMachineFunctionInfo.h" | ||
21 | #include "SIRegisterInfo.h" | ||
22 | #include "llvm/MC/MCStreamer.h" | ||
23 | #include "llvm/Target/TargetLoweringObjectFile.h" | ||
24 | #include "llvm/Support/TargetRegistry.h" | ||
25 | |||
26 | using namespace llvm; | ||
27 | |||
28 | |||
29 | static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, | ||
30 | MCStreamer &Streamer) { | ||
31 | return new AMDGPUAsmPrinter(tm, Streamer); | ||
32 | } | ||
33 | |||
34 | extern "C" void LLVMInitializeAMDGPUAsmPrinter() { | ||
35 | TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass); | ||
36 | } | ||
37 | |||
38 | /// runOnMachineFunction - We need to override this function so we can avoid | ||
39 | /// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle. | ||
40 | bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { | ||
41 | const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); | ||
42 | if (STM.dumpCode()) { | ||
43 | MF.dump(); | ||
44 | } | ||
45 | SetupMachineFunction(MF); | ||
46 | if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { | ||
47 | EmitProgramInfo(MF); | ||
48 | } | ||
49 | OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); | ||
50 | EmitFunctionBody(); | ||
51 | return false; | ||
52 | } | ||
53 | |||
54 | void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { | ||
55 | unsigned MaxSGPR = 0; | ||
56 | unsigned MaxVGPR = 0; | ||
57 | bool VCCUsed = false; | ||
58 | const SIRegisterInfo * RI = | ||
59 | static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); | ||
60 | |||
61 | for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); | ||
62 | BB != BB_E; ++BB) { | ||
63 | MachineBasicBlock &MBB = *BB; | ||
64 | for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); | ||
65 | I != E; ++I) { | ||
66 | MachineInstr &MI = *I; | ||
67 | |||
68 | unsigned numOperands = MI.getNumOperands(); | ||
69 | for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { | ||
70 | MachineOperand & MO = MI.getOperand(op_idx); | ||
71 | unsigned maxUsed; | ||
72 | unsigned width = 0; | ||
73 | bool isSGPR = false; | ||
74 | unsigned reg; | ||
75 | unsigned hwReg; | ||
76 | if (!MO.isReg()) { | ||
77 | continue; | ||
78 | } | ||
79 | reg = MO.getReg(); | ||
80 | if (reg == AMDGPU::VCC) { | ||
81 | VCCUsed = true; | ||
82 | continue; | ||
83 | } | ||
84 | switch (reg) { | ||
85 | default: break; | ||
86 | case AMDGPU::EXEC: | ||
87 | case AMDGPU::SI_LITERAL_CONSTANT: | ||
88 | case AMDGPU::SREG_LIT_0: | ||
89 | case AMDGPU::M0: | ||
90 | continue; | ||
91 | } | ||
92 | |||
93 | if (AMDGPU::SReg_32RegClass.contains(reg)) { | ||
94 | isSGPR = true; | ||
95 | width = 1; | ||
96 | } else if (AMDGPU::VReg_32RegClass.contains(reg)) { | ||
97 | isSGPR = false; | ||
98 | width = 1; | ||
99 | } else if (AMDGPU::SReg_64RegClass.contains(reg)) { | ||
100 | isSGPR = true; | ||
101 | width = 2; | ||
102 | } else if (AMDGPU::VReg_64RegClass.contains(reg)) { | ||
103 | isSGPR = false; | ||
104 | width = 2; | ||
105 | } else if (AMDGPU::SReg_128RegClass.contains(reg)) { | ||
106 | isSGPR = true; | ||
107 | width = 4; | ||
108 | } else if (AMDGPU::VReg_128RegClass.contains(reg)) { | ||
109 | isSGPR = false; | ||
110 | width = 4; | ||
111 | } else if (AMDGPU::SReg_256RegClass.contains(reg)) { | ||
112 | isSGPR = true; | ||
113 | width = 8; | ||
114 | } else { | ||
115 | assert(!"Unknown register class"); | ||
116 | } | ||
117 | hwReg = RI->getHWRegNum(reg); | ||
118 | maxUsed = hwReg + width - 1; | ||
119 | if (isSGPR) { | ||
120 | MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; | ||
121 | } else { | ||
122 | MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; | ||
123 | } | ||
124 | } | ||
125 | } | ||
126 | } | ||
127 | if (VCCUsed) { | ||
128 | MaxSGPR += 2; | ||
129 | } | ||
130 | SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); | ||
131 | OutStreamer.EmitIntValue(MaxSGPR + 1, 4); | ||
132 | OutStreamer.EmitIntValue(MaxVGPR + 1, 4); | ||
133 | OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4); | ||
134 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h deleted file mode 100644 index b35d2e9b2ca..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h +++ /dev/null | |||
@@ -1,43 +0,0 @@ | |||
1 | //===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // AMDGPU Assembly printer class. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #ifndef AMDGPU_ASMPRINTER_H | ||
15 | #define AMDGPU_ASMPRINTER_H | ||
16 | |||
17 | #include "llvm/CodeGen/AsmPrinter.h" | ||
18 | |||
19 | namespace llvm { | ||
20 | |||
21 | class AMDGPUAsmPrinter : public AsmPrinter { | ||
22 | |||
23 | public: | ||
24 | explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) | ||
25 | : AsmPrinter(TM, Streamer) { } | ||
26 | |||
27 | virtual bool runOnMachineFunction(MachineFunction &MF); | ||
28 | |||
29 | virtual const char *getPassName() const { | ||
30 | return "AMDGPU Assembly Printer"; | ||
31 | } | ||
32 | |||
33 | /// EmitProgramInfo - Emit register usage information so that the GPU driver | ||
34 | /// can correctly setup the GPU state. | ||
35 | void EmitProgramInfo(MachineFunction &MF); | ||
36 | |||
37 | /// EmitInstuction - Implemented in AMDGPUMCInstLower.cpp | ||
38 | virtual void EmitInstruction(const MachineInstr *MI); | ||
39 | }; | ||
40 | |||
41 | } // End anonymous llvm | ||
42 | |||
43 | #endif //AMDGPU_ASMPRINTER_H | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUCodeEmitter.h b/src/gallium/drivers/radeon/AMDGPUCodeEmitter.h deleted file mode 100644 index f1daec19d54..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUCodeEmitter.h +++ /dev/null | |||
@@ -1,48 +0,0 @@ | |||
1 | //===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // CodeEmitter interface for R600 and SI codegen. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #ifndef AMDGPUCODEEMITTER_H | ||
15 | #define AMDGPUCODEEMITTER_H | ||
16 | |||
17 | namespace llvm { | ||
18 | |||
19 | class AMDGPUCodeEmitter { | ||
20 | public: | ||
21 | uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; | ||
22 | virtual uint64_t getMachineOpValue(const MachineInstr &MI, | ||
23 | const MachineOperand &MO) const { return 0; } | ||
24 | virtual unsigned GPR4AlignEncode(const MachineInstr &MI, | ||
25 | unsigned OpNo) const { | ||
26 | return 0; | ||
27 | } | ||
28 | virtual unsigned GPR2AlignEncode(const MachineInstr &MI, | ||
29 | unsigned OpNo) const { | ||
30 | return 0; | ||
31 | } | ||
32 | virtual uint64_t VOPPostEncode(const MachineInstr &MI, | ||
33 | uint64_t Value) const { | ||
34 | return Value; | ||
35 | } | ||
36 | virtual uint64_t i32LiteralEncode(const MachineInstr &MI, | ||
37 | unsigned OpNo) const { | ||
38 | return 0; | ||
39 | } | ||
40 | virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo) | ||
41 | const { | ||
42 | return 0; | ||
43 | } | ||
44 | }; | ||
45 | |||
46 | } // End namespace llvm | ||
47 | |||
48 | #endif // AMDGPUCODEEMITTER_H | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp deleted file mode 100644 index fbca0a7b832..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp +++ /dev/null | |||
@@ -1,62 +0,0 @@ | |||
1 | //===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This pass lowers AMDIL machine instructions to the appropriate hardware | ||
11 | // instructions. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #include "AMDGPU.h" | ||
16 | #include "AMDGPUInstrInfo.h" | ||
17 | #include "llvm/CodeGen/MachineFunctionPass.h" | ||
18 | |||
19 | using namespace llvm; | ||
20 | |||
21 | namespace { | ||
22 | |||
23 | class AMDGPUConvertToISAPass : public MachineFunctionPass { | ||
24 | |||
25 | private: | ||
26 | static char ID; | ||
27 | TargetMachine &TM; | ||
28 | |||
29 | public: | ||
30 | AMDGPUConvertToISAPass(TargetMachine &tm) : | ||
31 | MachineFunctionPass(ID), TM(tm) { } | ||
32 | |||
33 | virtual bool runOnMachineFunction(MachineFunction &MF); | ||
34 | |||
35 | virtual const char *getPassName() const {return "AMDGPU Convert to ISA";} | ||
36 | |||
37 | }; | ||
38 | |||
39 | } // End anonymous namespace | ||
40 | |||
41 | char AMDGPUConvertToISAPass::ID = 0; | ||
42 | |||
43 | FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) { | ||
44 | return new AMDGPUConvertToISAPass(tm); | ||
45 | } | ||
46 | |||
47 | bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) | ||
48 | { | ||
49 | const AMDGPUInstrInfo * TII = | ||
50 | static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo()); | ||
51 | |||
52 | for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); | ||
53 | BB != BB_E; ++BB) { | ||
54 | MachineBasicBlock &MBB = *BB; | ||
55 | for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); | ||
56 | I != E; ++I) { | ||
57 | MachineInstr &MI = *I; | ||
58 | TII->convertToISA(MI, MF, MBB.findDebugLoc(I)); | ||
59 | } | ||
60 | } | ||
61 | return false; | ||
62 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp deleted file mode 100644 index d37df6b986a..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp +++ /dev/null | |||
@@ -1,351 +0,0 @@ | |||
1 | //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This is the parent TargetLowering class for hardware code gen targets. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #include "AMDGPUISelLowering.h" | ||
15 | #include "AMDILIntrinsicInfo.h" | ||
16 | #include "llvm/CodeGen/MachineFunction.h" | ||
17 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
18 | #include "llvm/CodeGen/SelectionDAG.h" | ||
19 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" | ||
20 | |||
21 | using namespace llvm; | ||
22 | |||
23 | AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : | ||
24 | TargetLowering(TM, new TargetLoweringObjectFileELF()) | ||
25 | { | ||
26 | |||
27 | // Initialize target lowering borrowed from AMDIL | ||
28 | InitAMDILLowering(); | ||
29 | |||
30 | // We need to custom lower some of the intrinsics | ||
31 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); | ||
32 | |||
33 | // Library functions. These default to Expand, but we have instructions | ||
34 | // for them. | ||
35 | setOperationAction(ISD::FCEIL, MVT::f32, Legal); | ||
36 | setOperationAction(ISD::FEXP2, MVT::f32, Legal); | ||
37 | setOperationAction(ISD::FPOW, MVT::f32, Legal); | ||
38 | setOperationAction(ISD::FLOG2, MVT::f32, Legal); | ||
39 | setOperationAction(ISD::FABS, MVT::f32, Legal); | ||
40 | setOperationAction(ISD::FFLOOR, MVT::f32, Legal); | ||
41 | setOperationAction(ISD::FRINT, MVT::f32, Legal); | ||
42 | |||
43 | setOperationAction(ISD::UDIV, MVT::i32, Expand); | ||
44 | setOperationAction(ISD::UDIVREM, MVT::i32, Custom); | ||
45 | setOperationAction(ISD::UREM, MVT::i32, Expand); | ||
46 | } | ||
47 | |||
48 | //===---------------------------------------------------------------------===// | ||
49 | // TargetLowering Callbacks | ||
50 | //===---------------------------------------------------------------------===// | ||
51 | |||
52 | SDValue AMDGPUTargetLowering::LowerFormalArguments( | ||
53 | SDValue Chain, | ||
54 | CallingConv::ID CallConv, | ||
55 | bool isVarArg, | ||
56 | const SmallVectorImpl<ISD::InputArg> &Ins, | ||
57 | DebugLoc DL, SelectionDAG &DAG, | ||
58 | SmallVectorImpl<SDValue> &InVals) const | ||
59 | { | ||
60 | for (unsigned i = 0, e = Ins.size(); i < e; ++i) { | ||
61 | InVals.push_back(SDValue()); | ||
62 | } | ||
63 | return Chain; | ||
64 | } | ||
65 | |||
66 | SDValue AMDGPUTargetLowering::LowerReturn( | ||
67 | SDValue Chain, | ||
68 | CallingConv::ID CallConv, | ||
69 | bool isVarArg, | ||
70 | const SmallVectorImpl<ISD::OutputArg> &Outs, | ||
71 | const SmallVectorImpl<SDValue> &OutVals, | ||
72 | DebugLoc DL, SelectionDAG &DAG) const | ||
73 | { | ||
74 | return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); | ||
75 | } | ||
76 | |||
77 | //===---------------------------------------------------------------------===// | ||
78 | // Target specific lowering | ||
79 | //===---------------------------------------------------------------------===// | ||
80 | |||
81 | SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) | ||
82 | const | ||
83 | { | ||
84 | switch (Op.getOpcode()) { | ||
85 | default: | ||
86 | Op.getNode()->dump(); | ||
87 | assert(0 && "Custom lowering code for this" | ||
88 | "instruction is not implemented yet!"); | ||
89 | break; | ||
90 | // AMDIL DAG lowering | ||
91 | case ISD::SDIV: return LowerSDIV(Op, DAG); | ||
92 | case ISD::SREM: return LowerSREM(Op, DAG); | ||
93 | case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); | ||
94 | case ISD::BRCOND: return LowerBRCOND(Op, DAG); | ||
95 | // AMDGPU DAG lowering | ||
96 | case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); | ||
97 | case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); | ||
98 | } | ||
99 | return Op; | ||
100 | } | ||
101 | |||
102 | SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, | ||
103 | SelectionDAG &DAG) const | ||
104 | { | ||
105 | unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); | ||
106 | DebugLoc DL = Op.getDebugLoc(); | ||
107 | EVT VT = Op.getValueType(); | ||
108 | |||
109 | switch (IntrinsicID) { | ||
110 | default: return Op; | ||
111 | case AMDGPUIntrinsic::AMDIL_abs: | ||
112 | return LowerIntrinsicIABS(Op, DAG); | ||
113 | case AMDGPUIntrinsic::AMDIL_exp: | ||
114 | return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); | ||
115 | case AMDGPUIntrinsic::AMDGPU_lrp: | ||
116 | return LowerIntrinsicLRP(Op, DAG); | ||
117 | case AMDGPUIntrinsic::AMDIL_fraction: | ||
118 | return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); | ||
119 | case AMDGPUIntrinsic::AMDIL_mad: | ||
120 | return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), | ||
121 | Op.getOperand(2), Op.getOperand(3)); | ||
122 | case AMDGPUIntrinsic::AMDIL_max: | ||
123 | return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), | ||
124 | Op.getOperand(2)); | ||
125 | case AMDGPUIntrinsic::AMDGPU_imax: | ||
126 | return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), | ||
127 | Op.getOperand(2)); | ||
128 | case AMDGPUIntrinsic::AMDGPU_umax: | ||
129 | return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), | ||
130 | Op.getOperand(2)); | ||
131 | case AMDGPUIntrinsic::AMDIL_min: | ||
132 | return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), | ||
133 | Op.getOperand(2)); | ||
134 | case AMDGPUIntrinsic::AMDGPU_imin: | ||
135 | return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), | ||
136 | Op.getOperand(2)); | ||
137 | case AMDGPUIntrinsic::AMDGPU_umin: | ||
138 | return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), | ||
139 | Op.getOperand(2)); | ||
140 | case AMDGPUIntrinsic::AMDIL_round_nearest: | ||
141 | return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); | ||
142 | } | ||
143 | } | ||
144 | |||
145 | ///IABS(a) = SMAX(sub(0, a), a) | ||
146 | SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, | ||
147 | SelectionDAG &DAG) const | ||
148 | { | ||
149 | |||
150 | DebugLoc DL = Op.getDebugLoc(); | ||
151 | EVT VT = Op.getValueType(); | ||
152 | SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), | ||
153 | Op.getOperand(1)); | ||
154 | |||
155 | return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); | ||
156 | } | ||
157 | |||
158 | /// Linear Interpolation | ||
159 | /// LRP(a, b, c) = muladd(a, b, (1 - a) * c) | ||
160 | SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, | ||
161 | SelectionDAG &DAG) const | ||
162 | { | ||
163 | DebugLoc DL = Op.getDebugLoc(); | ||
164 | EVT VT = Op.getValueType(); | ||
165 | SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, | ||
166 | DAG.getConstantFP(1.0f, MVT::f32), | ||
167 | Op.getOperand(1)); | ||
168 | SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, | ||
169 | Op.getOperand(3)); | ||
170 | return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), | ||
171 | Op.getOperand(2), | ||
172 | OneSubAC); | ||
173 | } | ||
174 | |||
175 | |||
176 | |||
177 | SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, | ||
178 | SelectionDAG &DAG) const | ||
179 | { | ||
180 | DebugLoc DL = Op.getDebugLoc(); | ||
181 | EVT VT = Op.getValueType(); | ||
182 | |||
183 | SDValue Num = Op.getOperand(0); | ||
184 | SDValue Den = Op.getOperand(1); | ||
185 | |||
186 | SmallVector<SDValue, 8> Results; | ||
187 | |||
188 | // RCP = URECIP(Den) = 2^32 / Den + e | ||
189 | // e is rounding error. | ||
190 | SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); | ||
191 | |||
192 | // RCP_LO = umulo(RCP, Den) */ | ||
193 | SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); | ||
194 | |||
195 | // RCP_HI = mulhu (RCP, Den) */ | ||
196 | SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); | ||
197 | |||
198 | // NEG_RCP_LO = -RCP_LO | ||
199 | SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), | ||
200 | RCP_LO); | ||
201 | |||
202 | // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) | ||
203 | SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), | ||
204 | NEG_RCP_LO, RCP_LO, | ||
205 | ISD::SETEQ); | ||
206 | // Calculate the rounding error from the URECIP instruction | ||
207 | // E = mulhu(ABS_RCP_LO, RCP) | ||
208 | SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); | ||
209 | |||
210 | // RCP_A_E = RCP + E | ||
211 | SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); | ||
212 | |||
213 | // RCP_S_E = RCP - E | ||
214 | SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); | ||
215 | |||
216 | // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) | ||
217 | SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), | ||
218 | RCP_A_E, RCP_S_E, | ||
219 | ISD::SETEQ); | ||
220 | // Quotient = mulhu(Tmp0, Num) | ||
221 | SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); | ||
222 | |||
223 | // Num_S_Remainder = Quotient * Den | ||
224 | SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); | ||
225 | |||
226 | // Remainder = Num - Num_S_Remainder | ||
227 | SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); | ||
228 | |||
229 | // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) | ||
230 | SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, | ||
231 | DAG.getConstant(-1, VT), | ||
232 | DAG.getConstant(0, VT), | ||
233 | ISD::SETGE); | ||
234 | // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) | ||
235 | SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, | ||
236 | DAG.getConstant(0, VT), | ||
237 | DAG.getConstant(-1, VT), | ||
238 | DAG.getConstant(0, VT), | ||
239 | ISD::SETGE); | ||
240 | // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero | ||
241 | SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, | ||
242 | Remainder_GE_Zero); | ||
243 | |||
244 | // Calculate Division result: | ||
245 | |||
246 | // Quotient_A_One = Quotient + 1 | ||
247 | SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, | ||
248 | DAG.getConstant(1, VT)); | ||
249 | |||
250 | // Quotient_S_One = Quotient - 1 | ||
251 | SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, | ||
252 | DAG.getConstant(1, VT)); | ||
253 | |||
254 | // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) | ||
255 | SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), | ||
256 | Quotient, Quotient_A_One, ISD::SETEQ); | ||
257 | |||
258 | // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) | ||
259 | Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), | ||
260 | Quotient_S_One, Div, ISD::SETEQ); | ||
261 | |||
262 | // Calculate Rem result: | ||
263 | |||
264 | // Remainder_S_Den = Remainder - Den | ||
265 | SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); | ||
266 | |||
267 | // Remainder_A_Den = Remainder + Den | ||
268 | SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); | ||
269 | |||
270 | // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) | ||
271 | SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), | ||
272 | Remainder, Remainder_S_Den, ISD::SETEQ); | ||
273 | |||
274 | // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) | ||
275 | Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), | ||
276 | Remainder_A_Den, Rem, ISD::SETEQ); | ||
277 | |||
278 | DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div); | ||
279 | DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem); | ||
280 | |||
281 | return Op; | ||
282 | } | ||
283 | |||
284 | //===----------------------------------------------------------------------===// | ||
285 | // Helper functions | ||
286 | //===----------------------------------------------------------------------===// | ||
287 | |||
288 | bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const | ||
289 | { | ||
290 | if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { | ||
291 | return CFP->isExactlyValue(1.0); | ||
292 | } | ||
293 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { | ||
294 | return C->isAllOnesValue(); | ||
295 | } | ||
296 | return false; | ||
297 | } | ||
298 | |||
299 | bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const | ||
300 | { | ||
301 | if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { | ||
302 | return CFP->getValueAPF().isZero(); | ||
303 | } | ||
304 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { | ||
305 | return C->isNullValue(); | ||
306 | } | ||
307 | return false; | ||
308 | } | ||
309 | |||
310 | SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, | ||
311 | const TargetRegisterClass *RC, | ||
312 | unsigned Reg, EVT VT) const { | ||
313 | MachineFunction &MF = DAG.getMachineFunction(); | ||
314 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||
315 | unsigned VirtualRegister; | ||
316 | if (!MRI.isLiveIn(Reg)) { | ||
317 | VirtualRegister = MRI.createVirtualRegister(RC); | ||
318 | MRI.addLiveIn(Reg, VirtualRegister); | ||
319 | } else { | ||
320 | VirtualRegister = MRI.getLiveInVirtReg(Reg); | ||
321 | } | ||
322 | return DAG.getRegister(VirtualRegister, VT); | ||
323 | } | ||
324 | |||
325 | #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; | ||
326 | |||
327 | const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const | ||
328 | { | ||
329 | switch (Opcode) { | ||
330 | default: return 0; | ||
331 | // AMDIL DAG nodes | ||
332 | NODE_NAME_CASE(MAD); | ||
333 | NODE_NAME_CASE(CALL); | ||
334 | NODE_NAME_CASE(UMUL); | ||
335 | NODE_NAME_CASE(DIV_INF); | ||
336 | NODE_NAME_CASE(RET_FLAG); | ||
337 | NODE_NAME_CASE(BRANCH_COND); | ||
338 | |||
339 | // AMDGPU DAG nodes | ||
340 | NODE_NAME_CASE(FRACT) | ||
341 | NODE_NAME_CASE(FMAX) | ||
342 | NODE_NAME_CASE(SMAX) | ||
343 | NODE_NAME_CASE(UMAX) | ||
344 | NODE_NAME_CASE(FMIN) | ||
345 | NODE_NAME_CASE(SMIN) | ||
346 | NODE_NAME_CASE(UMIN) | ||
347 | NODE_NAME_CASE(URECIP) | ||
348 | NODE_NAME_CASE(INTERP) | ||
349 | NODE_NAME_CASE(INTERP_P0) | ||
350 | } | ||
351 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h deleted file mode 100644 index 2d8ed82c117..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h +++ /dev/null | |||
@@ -1,142 +0,0 @@ | |||
1 | //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains the interface defintiion of the TargetLowering class | ||
11 | // that is common to all AMD GPUs. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #ifndef AMDGPUISELLOWERING_H | ||
16 | #define AMDGPUISELLOWERING_H | ||
17 | |||
18 | #include "llvm/Target/TargetLowering.h" | ||
19 | |||
20 | namespace llvm { | ||
21 | |||
22 | class MachineRegisterInfo; | ||
23 | |||
24 | class AMDGPUTargetLowering : public TargetLowering | ||
25 | { | ||
26 | private: | ||
27 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; | ||
28 | SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; | ||
29 | |||
30 | protected: | ||
31 | |||
32 | /// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list | ||
33 | /// of the DAG's MachineFunction. This returns a Register SDNode representing | ||
34 | /// Reg. | ||
35 | SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, | ||
36 | unsigned Reg, EVT VT) const; | ||
37 | |||
38 | bool isHWTrueValue(SDValue Op) const; | ||
39 | bool isHWFalseValue(SDValue Op) const; | ||
40 | |||
41 | public: | ||
42 | AMDGPUTargetLowering(TargetMachine &TM); | ||
43 | |||
44 | virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, | ||
45 | bool isVarArg, | ||
46 | const SmallVectorImpl<ISD::InputArg> &Ins, | ||
47 | DebugLoc DL, SelectionDAG &DAG, | ||
48 | SmallVectorImpl<SDValue> &InVals) const; | ||
49 | |||
50 | virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, | ||
51 | bool isVarArg, | ||
52 | const SmallVectorImpl<ISD::OutputArg> &Outs, | ||
53 | const SmallVectorImpl<SDValue> &OutVals, | ||
54 | DebugLoc DL, SelectionDAG &DAG) const; | ||
55 | |||
56 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; | ||
57 | SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; | ||
58 | SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; | ||
59 | virtual const char* getTargetNodeName(unsigned Opcode) const; | ||
60 | |||
61 | // Functions defined in AMDILISelLowering.cpp | ||
62 | public: | ||
63 | |||
64 | /// computeMaskedBitsForTargetNode - Determine which of the bits specified | ||
65 | /// in Mask are known to be either zero or one and return them in the | ||
66 | /// KnownZero/KnownOne bitsets. | ||
67 | virtual void computeMaskedBitsForTargetNode(const SDValue Op, | ||
68 | APInt &KnownZero, | ||
69 | APInt &KnownOne, | ||
70 | const SelectionDAG &DAG, | ||
71 | unsigned Depth = 0) const; | ||
72 | |||
73 | virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, | ||
74 | const CallInst &I, unsigned Intrinsic) const; | ||
75 | |||
76 | /// isFPImmLegal - We want to mark f32/f64 floating point values as legal. | ||
77 | bool isFPImmLegal(const APFloat &Imm, EVT VT) const; | ||
78 | |||
79 | /// ShouldShrinkFPConstant - We don't want to shrink f64/f32 constants. | ||
80 | bool ShouldShrinkFPConstant(EVT VT) const; | ||
81 | |||
82 | private: | ||
83 | void InitAMDILLowering(); | ||
84 | SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const; | ||
85 | SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const; | ||
86 | SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const; | ||
87 | SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const; | ||
88 | SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const; | ||
89 | SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; | ||
90 | SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const; | ||
91 | SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const; | ||
92 | SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; | ||
93 | SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; | ||
94 | EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const; | ||
95 | SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; | ||
96 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; | ||
97 | }; | ||
98 | |||
99 | namespace AMDGPUISD | ||
100 | { | ||
101 | |||
102 | enum | ||
103 | { | ||
104 | // AMDIL ISD Opcodes | ||
105 | FIRST_NUMBER = ISD::BUILTIN_OP_END, | ||
106 | MAD, // 32bit Fused Multiply Add instruction | ||
107 | CALL, // Function call based on a single integer | ||
108 | UMUL, // 32bit unsigned multiplication | ||
109 | DIV_INF, // Divide with infinity returned on zero divisor | ||
110 | RET_FLAG, | ||
111 | BRANCH_COND, | ||
112 | // End AMDIL ISD Opcodes | ||
113 | BITALIGN, | ||
114 | FRACT, | ||
115 | FMAX, | ||
116 | SMAX, | ||
117 | UMAX, | ||
118 | FMIN, | ||
119 | SMIN, | ||
120 | UMIN, | ||
121 | URECIP, | ||
122 | INTERP, | ||
123 | INTERP_P0, | ||
124 | LAST_AMDGPU_ISD_NUMBER | ||
125 | }; | ||
126 | |||
127 | |||
128 | } // End namespace AMDGPUISD | ||
129 | |||
130 | namespace SIISD { | ||
131 | |||
132 | enum { | ||
133 | SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER, | ||
134 | VCC_AND, | ||
135 | VCC_BITCAST | ||
136 | }; | ||
137 | |||
138 | } // End namespace SIISD | ||
139 | |||
140 | } // End namespace llvm | ||
141 | |||
142 | #endif // AMDGPUISELLOWERING_H | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp deleted file mode 100644 index 9aae09a4a15..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp +++ /dev/null | |||
@@ -1,258 +0,0 @@ | |||
1 | //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains the implementation of the TargetInstrInfo class that is | ||
11 | // common to all AMD GPUs. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #include "AMDGPUInstrInfo.h" | ||
16 | #include "AMDGPURegisterInfo.h" | ||
17 | #include "AMDGPUTargetMachine.h" | ||
18 | #include "AMDIL.h" | ||
19 | #include "AMDILUtilityFunctions.h" | ||
20 | #include "llvm/CodeGen/MachineFrameInfo.h" | ||
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
22 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
23 | |||
24 | #define GET_INSTRINFO_CTOR | ||
25 | #include "AMDGPUGenInstrInfo.inc" | ||
26 | |||
27 | using namespace llvm; | ||
28 | |||
29 | AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm) | ||
30 | : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { } | ||
31 | |||
32 | const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const { | ||
33 | return RI; | ||
34 | } | ||
35 | |||
36 | bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, | ||
37 | unsigned &SrcReg, unsigned &DstReg, | ||
38 | unsigned &SubIdx) const { | ||
39 | // TODO: Implement this function | ||
40 | return false; | ||
41 | } | ||
42 | |||
43 | unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, | ||
44 | int &FrameIndex) const { | ||
45 | // TODO: Implement this function | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, | ||
50 | int &FrameIndex) const { | ||
51 | // TODO: Implement this function | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, | ||
56 | const MachineMemOperand *&MMO, | ||
57 | int &FrameIndex) const { | ||
58 | // TODO: Implement this function | ||
59 | return false; | ||
60 | } | ||
61 | unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI, | ||
62 | int &FrameIndex) const { | ||
63 | // TODO: Implement this function | ||
64 | return 0; | ||
65 | } | ||
66 | unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI, | ||
67 | int &FrameIndex) const { | ||
68 | // TODO: Implement this function | ||
69 | return 0; | ||
70 | } | ||
71 | bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI, | ||
72 | const MachineMemOperand *&MMO, | ||
73 | int &FrameIndex) const { | ||
74 | // TODO: Implement this function | ||
75 | return false; | ||
76 | } | ||
77 | |||
78 | MachineInstr * | ||
79 | AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, | ||
80 | MachineBasicBlock::iterator &MBBI, | ||
81 | LiveVariables *LV) const { | ||
82 | // TODO: Implement this function | ||
83 | return NULL; | ||
84 | } | ||
85 | bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, | ||
86 | MachineBasicBlock &MBB) const { | ||
87 | while (iter != MBB.end()) { | ||
88 | switch (iter->getOpcode()) { | ||
89 | default: | ||
90 | break; | ||
91 | ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); | ||
92 | case AMDGPU::BRANCH: | ||
93 | return true; | ||
94 | }; | ||
95 | ++iter; | ||
96 | } | ||
97 | return false; | ||
98 | } | ||
99 | |||
100 | MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) { | ||
101 | MachineBasicBlock::iterator tmp = MBB->end(); | ||
102 | if (!MBB->size()) { | ||
103 | return MBB->end(); | ||
104 | } | ||
105 | while (--tmp) { | ||
106 | if (tmp->getOpcode() == AMDGPU::ENDLOOP | ||
107 | || tmp->getOpcode() == AMDGPU::ENDIF | ||
108 | || tmp->getOpcode() == AMDGPU::ELSE) { | ||
109 | if (tmp == MBB->begin()) { | ||
110 | return tmp; | ||
111 | } else { | ||
112 | continue; | ||
113 | } | ||
114 | } else { | ||
115 | return ++tmp; | ||
116 | } | ||
117 | } | ||
118 | return MBB->end(); | ||
119 | } | ||
120 | |||
121 | void | ||
122 | AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, | ||
123 | MachineBasicBlock::iterator MI, | ||
124 | unsigned SrcReg, bool isKill, | ||
125 | int FrameIndex, | ||
126 | const TargetRegisterClass *RC, | ||
127 | const TargetRegisterInfo *TRI) const { | ||
128 | assert(!"Not Implemented"); | ||
129 | } | ||
130 | |||
131 | void | ||
132 | AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, | ||
133 | MachineBasicBlock::iterator MI, | ||
134 | unsigned DestReg, int FrameIndex, | ||
135 | const TargetRegisterClass *RC, | ||
136 | const TargetRegisterInfo *TRI) const { | ||
137 | assert(!"Not Implemented"); | ||
138 | } | ||
139 | |||
140 | MachineInstr * | ||
141 | AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, | ||
142 | MachineInstr *MI, | ||
143 | const SmallVectorImpl<unsigned> &Ops, | ||
144 | int FrameIndex) const { | ||
145 | // TODO: Implement this function | ||
146 | return 0; | ||
147 | } | ||
148 | MachineInstr* | ||
149 | AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, | ||
150 | MachineInstr *MI, | ||
151 | const SmallVectorImpl<unsigned> &Ops, | ||
152 | MachineInstr *LoadMI) const { | ||
153 | // TODO: Implement this function | ||
154 | return 0; | ||
155 | } | ||
156 | bool | ||
157 | AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, | ||
158 | const SmallVectorImpl<unsigned> &Ops) const | ||
159 | { | ||
160 | // TODO: Implement this function | ||
161 | return false; | ||
162 | } | ||
163 | bool | ||
164 | AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, | ||
165 | unsigned Reg, bool UnfoldLoad, | ||
166 | bool UnfoldStore, | ||
167 | SmallVectorImpl<MachineInstr*> &NewMIs) const { | ||
168 | // TODO: Implement this function | ||
169 | return false; | ||
170 | } | ||
171 | |||
172 | bool | ||
173 | AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, | ||
174 | SmallVectorImpl<SDNode*> &NewNodes) const { | ||
175 | // TODO: Implement this function | ||
176 | return false; | ||
177 | } | ||
178 | |||
179 | unsigned | ||
180 | AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, | ||
181 | bool UnfoldLoad, bool UnfoldStore, | ||
182 | unsigned *LoadRegIndex) const { | ||
183 | // TODO: Implement this function | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, | ||
188 | int64_t Offset1, int64_t Offset2, | ||
189 | unsigned NumLoads) const { | ||
190 | assert(Offset2 > Offset1 | ||
191 | && "Second offset should be larger than first offset!"); | ||
192 | // If we have less than 16 loads in a row, and the offsets are within 16, | ||
193 | // then schedule together. | ||
194 | // TODO: Make the loads schedule near if it fits in a cacheline | ||
195 | return (NumLoads < 16 && (Offset2 - Offset1) < 16); | ||
196 | } | ||
197 | |||
198 | bool | ||
199 | AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) | ||
200 | const { | ||
201 | // TODO: Implement this function | ||
202 | return true; | ||
203 | } | ||
204 | void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB, | ||
205 | MachineBasicBlock::iterator MI) const { | ||
206 | // TODO: Implement this function | ||
207 | } | ||
208 | |||
209 | bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const { | ||
210 | // TODO: Implement this function | ||
211 | return false; | ||
212 | } | ||
213 | bool | ||
214 | AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, | ||
215 | const SmallVectorImpl<MachineOperand> &Pred2) | ||
216 | const { | ||
217 | // TODO: Implement this function | ||
218 | return false; | ||
219 | } | ||
220 | |||
221 | bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI, | ||
222 | std::vector<MachineOperand> &Pred) const { | ||
223 | // TODO: Implement this function | ||
224 | return false; | ||
225 | } | ||
226 | |||
227 | bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const { | ||
228 | // TODO: Implement this function | ||
229 | return MI->getDesc().isPredicable(); | ||
230 | } | ||
231 | |||
232 | bool | ||
233 | AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { | ||
234 | // TODO: Implement this function | ||
235 | return true; | ||
236 | } | ||
237 | |||
238 | void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, | ||
239 | DebugLoc DL) const | ||
240 | { | ||
241 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||
242 | const AMDGPURegisterInfo & RI = getRegisterInfo(); | ||
243 | |||
244 | for (unsigned i = 0; i < MI.getNumOperands(); i++) { | ||
245 | MachineOperand &MO = MI.getOperand(i); | ||
246 | // Convert dst regclass to one that is supported by the ISA | ||
247 | if (MO.isReg() && MO.isDef()) { | ||
248 | if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { | ||
249 | const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg()); | ||
250 | const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass); | ||
251 | |||
252 | assert(newRegClass); | ||
253 | |||
254 | MRI.setRegClass(MO.getReg(), newRegClass); | ||
255 | } | ||
256 | } | ||
257 | } | ||
258 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h deleted file mode 100644 index a3080767883..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h +++ /dev/null | |||
@@ -1,148 +0,0 @@ | |||
1 | //===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains the definition of a TargetInstrInfo class that is common | ||
11 | // to all AMD GPUs. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #ifndef AMDGPUINSTRUCTIONINFO_H_ | ||
16 | #define AMDGPUINSTRUCTIONINFO_H_ | ||
17 | |||
18 | #include "AMDGPURegisterInfo.h" | ||
19 | #include "AMDGPUInstrInfo.h" | ||
20 | #include "llvm/Target/TargetInstrInfo.h" | ||
21 | |||
22 | #include <map> | ||
23 | |||
24 | #define GET_INSTRINFO_HEADER | ||
25 | #define GET_INSTRINFO_ENUM | ||
26 | #include "AMDGPUGenInstrInfo.inc" | ||
27 | |||
28 | #define OPCODE_IS_ZERO_INT 0x00000042 | ||
29 | #define OPCODE_IS_NOT_ZERO_INT 0x00000045 | ||
30 | #define OPCODE_IS_ZERO 0x00000020 | ||
31 | #define OPCODE_IS_NOT_ZERO 0x00000023 | ||
32 | |||
33 | namespace llvm { | ||
34 | |||
35 | class AMDGPUTargetMachine; | ||
36 | class MachineFunction; | ||
37 | class MachineInstr; | ||
38 | class MachineInstrBuilder; | ||
39 | |||
40 | class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { | ||
41 | private: | ||
42 | const AMDGPURegisterInfo RI; | ||
43 | TargetMachine &TM; | ||
44 | bool getNextBranchInstr(MachineBasicBlock::iterator &iter, | ||
45 | MachineBasicBlock &MBB) const; | ||
46 | public: | ||
47 | explicit AMDGPUInstrInfo(TargetMachine &tm); | ||
48 | |||
49 | virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; | ||
50 | |||
51 | bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, | ||
52 | unsigned &DstReg, unsigned &SubIdx) const; | ||
53 | |||
54 | unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; | ||
55 | unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, | ||
56 | int &FrameIndex) const; | ||
57 | bool hasLoadFromStackSlot(const MachineInstr *MI, | ||
58 | const MachineMemOperand *&MMO, | ||
59 | int &FrameIndex) const; | ||
60 | unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; | ||
61 | unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI, | ||
62 | int &FrameIndex) const; | ||
63 | bool hasStoreFromStackSlot(const MachineInstr *MI, | ||
64 | const MachineMemOperand *&MMO, | ||
65 | int &FrameIndex) const; | ||
66 | |||
67 | MachineInstr * | ||
68 | convertToThreeAddress(MachineFunction::iterator &MFI, | ||
69 | MachineBasicBlock::iterator &MBBI, | ||
70 | LiveVariables *LV) const; | ||
71 | |||
72 | |||
73 | virtual void copyPhysReg(MachineBasicBlock &MBB, | ||
74 | MachineBasicBlock::iterator MI, DebugLoc DL, | ||
75 | unsigned DestReg, unsigned SrcReg, | ||
76 | bool KillSrc) const = 0; | ||
77 | |||
78 | void storeRegToStackSlot(MachineBasicBlock &MBB, | ||
79 | MachineBasicBlock::iterator MI, | ||
80 | unsigned SrcReg, bool isKill, int FrameIndex, | ||
81 | const TargetRegisterClass *RC, | ||
82 | const TargetRegisterInfo *TRI) const; | ||
83 | void loadRegFromStackSlot(MachineBasicBlock &MBB, | ||
84 | MachineBasicBlock::iterator MI, | ||
85 | unsigned DestReg, int FrameIndex, | ||
86 | const TargetRegisterClass *RC, | ||
87 | const TargetRegisterInfo *TRI) const; | ||
88 | |||
89 | protected: | ||
90 | MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, | ||
91 | MachineInstr *MI, | ||
92 | const SmallVectorImpl<unsigned> &Ops, | ||
93 | int FrameIndex) const; | ||
94 | MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, | ||
95 | MachineInstr *MI, | ||
96 | const SmallVectorImpl<unsigned> &Ops, | ||
97 | MachineInstr *LoadMI) const; | ||
98 | public: | ||
99 | bool canFoldMemoryOperand(const MachineInstr *MI, | ||
100 | const SmallVectorImpl<unsigned> &Ops) const; | ||
101 | bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, | ||
102 | unsigned Reg, bool UnfoldLoad, bool UnfoldStore, | ||
103 | SmallVectorImpl<MachineInstr *> &NewMIs) const; | ||
104 | bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, | ||
105 | SmallVectorImpl<SDNode *> &NewNodes) const; | ||
106 | unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, | ||
107 | bool UnfoldLoad, bool UnfoldStore, | ||
108 | unsigned *LoadRegIndex = 0) const; | ||
109 | bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, | ||
110 | int64_t Offset1, int64_t Offset2, | ||
111 | unsigned NumLoads) const; | ||
112 | |||
113 | bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; | ||
114 | void insertNoop(MachineBasicBlock &MBB, | ||
115 | MachineBasicBlock::iterator MI) const; | ||
116 | bool isPredicated(const MachineInstr *MI) const; | ||
117 | bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, | ||
118 | const SmallVectorImpl<MachineOperand> &Pred2) const; | ||
119 | bool DefinesPredicate(MachineInstr *MI, | ||
120 | std::vector<MachineOperand> &Pred) const; | ||
121 | bool isPredicable(MachineInstr *MI) const; | ||
122 | bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; | ||
123 | |||
124 | // Helper functions that check the opcode for status information | ||
125 | bool isLoadInst(llvm::MachineInstr *MI) const; | ||
126 | bool isExtLoadInst(llvm::MachineInstr *MI) const; | ||
127 | bool isSWSExtLoadInst(llvm::MachineInstr *MI) const; | ||
128 | bool isSExtLoadInst(llvm::MachineInstr *MI) const; | ||
129 | bool isZExtLoadInst(llvm::MachineInstr *MI) const; | ||
130 | bool isAExtLoadInst(llvm::MachineInstr *MI) const; | ||
131 | bool isStoreInst(llvm::MachineInstr *MI) const; | ||
132 | bool isTruncStoreInst(llvm::MachineInstr *MI) const; | ||
133 | |||
134 | virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg, | ||
135 | int64_t Imm) const = 0; | ||
136 | virtual unsigned getIEQOpcode() const = 0; | ||
137 | virtual bool isMov(unsigned opcode) const = 0; | ||
138 | |||
139 | /// convertToISA - Convert the AMDIL MachineInstr to a supported ISA | ||
140 | /// MachineInstr | ||
141 | virtual void convertToISA(MachineInstr & MI, MachineFunction &MF, | ||
142 | DebugLoc DL) const; | ||
143 | |||
144 | }; | ||
145 | |||
146 | } // End llvm namespace | ||
147 | |||
148 | #endif // AMDGPUINSTRINFO_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.td b/src/gallium/drivers/radeon/AMDGPUInstrInfo.td deleted file mode 100644 index 23ca35aadc2..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.td +++ /dev/null | |||
@@ -1,71 +0,0 @@ | |||
1 | //===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains DAG node defintions for the AMDGPU target. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | //===----------------------------------------------------------------------===// | ||
15 | // AMDGPU DAG Profiles | ||
16 | //===----------------------------------------------------------------------===// | ||
17 | |||
18 | def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ | ||
19 | SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> | ||
20 | ]>; | ||
21 | |||
22 | //===----------------------------------------------------------------------===// | ||
23 | // AMDGPU DAG Nodes | ||
24 | // | ||
25 | |||
26 | // out = ((a << 32) | b) >> c) | ||
27 | // | ||
28 | // Can be used to optimize rtol: | ||
29 | // rotl(a, b) = bitalign(a, a, 32 - b) | ||
30 | def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>; | ||
31 | |||
32 | // out = a - floor(a) | ||
33 | def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; | ||
34 | |||
35 | // out = max(a, b) a and b are floats | ||
36 | def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp, | ||
37 | [SDNPCommutative, SDNPAssociative] | ||
38 | >; | ||
39 | |||
40 | // out = max(a, b) a and b are signed ints | ||
41 | def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp, | ||
42 | [SDNPCommutative, SDNPAssociative] | ||
43 | >; | ||
44 | |||
45 | // out = max(a, b) a and b are unsigned ints | ||
46 | def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp, | ||
47 | [SDNPCommutative, SDNPAssociative] | ||
48 | >; | ||
49 | |||
50 | // out = min(a, b) a and b are floats | ||
51 | def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp, | ||
52 | [SDNPCommutative, SDNPAssociative] | ||
53 | >; | ||
54 | |||
55 | // out = min(a, b) a snd b are signed ints | ||
56 | def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp, | ||
57 | [SDNPCommutative, SDNPAssociative] | ||
58 | >; | ||
59 | |||
60 | // out = min(a, b) a and b are unsigned ints | ||
61 | def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, | ||
62 | [SDNPCommutative, SDNPAssociative] | ||
63 | >; | ||
64 | |||
65 | // urecip - This operation is a helper for integer division, it returns the | ||
66 | // result of 1 / a as a fractional unsigned integer. | ||
67 | // out = (2^32 / a) + e | ||
68 | // e is rounding error | ||
69 | def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; | ||
70 | |||
71 | def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>; | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td deleted file mode 100644 index 9dbdc615e2d..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUInstructions.td +++ /dev/null | |||
@@ -1,183 +0,0 @@ | |||
1 | //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains instruction defs that are common to all hw codegen | ||
11 | // targets. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction { | ||
16 | field bits<16> AMDILOp = 0; | ||
17 | field bits<3> Gen = 0; | ||
18 | |||
19 | let Namespace = "AMDGPU"; | ||
20 | let OutOperandList = outs; | ||
21 | let InOperandList = ins; | ||
22 | let AsmString = asm; | ||
23 | let Pattern = pattern; | ||
24 | let Itinerary = NullALU; | ||
25 | let TSFlags{42-40} = Gen; | ||
26 | let TSFlags{63-48} = AMDILOp; | ||
27 | } | ||
28 | |||
29 | class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> | ||
30 | : AMDGPUInst<outs, ins, asm, pattern> { | ||
31 | |||
32 | field bits<32> Inst = 0xffffffff; | ||
33 | |||
34 | } | ||
35 | |||
36 | def COND_EQ : PatLeaf < | ||
37 | (cond), | ||
38 | [{switch(N->get()){{default: return false; | ||
39 | case ISD::SETOEQ: case ISD::SETUEQ: | ||
40 | case ISD::SETEQ: return true;}}}] | ||
41 | >; | ||
42 | |||
43 | def COND_NE : PatLeaf < | ||
44 | (cond), | ||
45 | [{switch(N->get()){{default: return false; | ||
46 | case ISD::SETONE: case ISD::SETUNE: | ||
47 | case ISD::SETNE: return true;}}}] | ||
48 | >; | ||
49 | def COND_GT : PatLeaf < | ||
50 | (cond), | ||
51 | [{switch(N->get()){{default: return false; | ||
52 | case ISD::SETOGT: case ISD::SETUGT: | ||
53 | case ISD::SETGT: return true;}}}] | ||
54 | >; | ||
55 | |||
56 | def COND_GE : PatLeaf < | ||
57 | (cond), | ||
58 | [{switch(N->get()){{default: return false; | ||
59 | case ISD::SETOGE: case ISD::SETUGE: | ||
60 | case ISD::SETGE: return true;}}}] | ||
61 | >; | ||
62 | |||
63 | def COND_LT : PatLeaf < | ||
64 | (cond), | ||
65 | [{switch(N->get()){{default: return false; | ||
66 | case ISD::SETOLT: case ISD::SETULT: | ||
67 | case ISD::SETLT: return true;}}}] | ||
68 | >; | ||
69 | |||
70 | def COND_LE : PatLeaf < | ||
71 | (cond), | ||
72 | [{switch(N->get()){{default: return false; | ||
73 | case ISD::SETOLE: case ISD::SETULE: | ||
74 | case ISD::SETLE: return true;}}}] | ||
75 | >; | ||
76 | |||
77 | //===----------------------------------------------------------------------===// | ||
78 | // Load/Store Pattern Fragments | ||
79 | //===----------------------------------------------------------------------===// | ||
80 | |||
81 | def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{ | ||
82 | return isGlobalLoad(dyn_cast<LoadSDNode>(N)); | ||
83 | }]>; | ||
84 | |||
85 | class Constants { | ||
86 | int TWO_PI = 0x40c90fdb; | ||
87 | int PI = 0x40490fdb; | ||
88 | int TWO_PI_INV = 0x3e22f983; | ||
89 | } | ||
90 | def CONST : Constants; | ||
91 | |||
92 | def FP_ZERO : PatLeaf < | ||
93 | (fpimm), | ||
94 | [{return N->getValueAPF().isZero();}] | ||
95 | >; | ||
96 | |||
97 | def FP_ONE : PatLeaf < | ||
98 | (fpimm), | ||
99 | [{return N->isExactlyValue(1.0);}] | ||
100 | >; | ||
101 | |||
102 | let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in { | ||
103 | |||
104 | class CLAMP <RegisterClass rc> : AMDGPUShaderInst < | ||
105 | (outs rc:$dst), | ||
106 | (ins rc:$src0), | ||
107 | "CLAMP $dst, $src0", | ||
108 | [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] | ||
109 | >; | ||
110 | |||
111 | class FABS <RegisterClass rc> : AMDGPUShaderInst < | ||
112 | (outs rc:$dst), | ||
113 | (ins rc:$src0), | ||
114 | "FABS $dst, $src0", | ||
115 | [(set rc:$dst, (fabs rc:$src0))] | ||
116 | >; | ||
117 | |||
118 | class FNEG <RegisterClass rc> : AMDGPUShaderInst < | ||
119 | (outs rc:$dst), | ||
120 | (ins rc:$src0), | ||
121 | "FNEG $dst, $src0", | ||
122 | [(set rc:$dst, (fneg rc:$src0))] | ||
123 | >; | ||
124 | |||
125 | def SHADER_TYPE : AMDGPUShaderInst < | ||
126 | (outs), | ||
127 | (ins i32imm:$type), | ||
128 | "SHADER_TYPE $type", | ||
129 | [(int_AMDGPU_shader_type imm:$type)] | ||
130 | >; | ||
131 | |||
132 | } // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1 | ||
133 | |||
134 | /* Generic helper patterns for intrinsics */ | ||
135 | /* -------------------------------------- */ | ||
136 | |||
137 | class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul, | ||
138 | RegisterClass rc> : Pat < | ||
139 | (fpow rc:$src0, rc:$src1), | ||
140 | (exp_ieee (mul rc:$src1, (log_ieee rc:$src0))) | ||
141 | >; | ||
142 | |||
143 | /* Other helper patterns */ | ||
144 | /* --------------------- */ | ||
145 | |||
146 | /* Extract element pattern */ | ||
147 | class Extract_Element <ValueType sub_type, ValueType vec_type, | ||
148 | RegisterClass vec_class, int sub_idx, | ||
149 | SubRegIndex sub_reg>: Pat< | ||
150 | (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)), | ||
151 | (EXTRACT_SUBREG vec_class:$src, sub_reg) | ||
152 | >; | ||
153 | |||
154 | /* Insert element pattern */ | ||
155 | class Insert_Element <ValueType elem_type, ValueType vec_type, | ||
156 | RegisterClass elem_class, RegisterClass vec_class, | ||
157 | int sub_idx, SubRegIndex sub_reg> : Pat < | ||
158 | |||
159 | (vec_type (vector_insert (vec_type vec_class:$vec), | ||
160 | (elem_type elem_class:$elem), sub_idx)), | ||
161 | (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg) | ||
162 | >; | ||
163 | |||
164 | // Vector Build pattern | ||
165 | class Vector_Build <ValueType vecType, RegisterClass vectorClass, | ||
166 | ValueType elemType, RegisterClass elemClass> : Pat < | ||
167 | (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), | ||
168 | (elemType elemClass:$z), (elemType elemClass:$w))), | ||
169 | (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG | ||
170 | (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y), | ||
171 | elemClass:$z, sel_z), elemClass:$w, sel_w) | ||
172 | >; | ||
173 | |||
174 | // bitconvert pattern | ||
175 | class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat < | ||
176 | (dt (bitconvert (st rc:$src0))), | ||
177 | (dt rc:$src0) | ||
178 | >; | ||
179 | |||
180 | include "R600Instructions.td" | ||
181 | |||
182 | include "SIInstrInfo.td" | ||
183 | |||
diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td deleted file mode 100644 index eaca4cf9856..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td +++ /dev/null | |||
@@ -1,63 +0,0 @@ | |||
1 | //===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file defines intrinsics that are used by all hw codegen targets. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | let TargetPrefix = "AMDGPU", isTarget = 1 in { | ||
15 | |||
16 | def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; | ||
17 | def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>; | ||
18 | def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; | ||
19 | def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; | ||
20 | def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
21 | |||
22 | def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; | ||
23 | def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
24 | def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
25 | def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; | ||
26 | def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; | ||
27 | def int_AMDGPU_kilp : Intrinsic<[], [], []>; | ||
28 | def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
29 | def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
30 | def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
31 | def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; | ||
32 | def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; | ||
33 | def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
34 | def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
35 | def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
36 | def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
37 | def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
38 | def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; | ||
39 | def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
40 | def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
41 | def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
42 | def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
43 | def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
44 | def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
45 | def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
46 | def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; | ||
47 | def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
48 | def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
49 | def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
50 | def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
51 | def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
52 | def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; | ||
53 | def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; | ||
54 | |||
55 | def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>; | ||
56 | } | ||
57 | |||
58 | let TargetPrefix = "TGSI", isTarget = 1 in { | ||
59 | |||
60 | def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>; | ||
61 | } | ||
62 | |||
63 | include "SIIntrinsics.td" | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUMCInstLower.cpp b/src/gallium/drivers/radeon/AMDGPUMCInstLower.cpp deleted file mode 100644 index f3d80a39c3c..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUMCInstLower.cpp +++ /dev/null | |||
@@ -1,82 +0,0 @@ | |||
1 | //===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains code to lower AMDGPU MachineInstrs to their corresponding | ||
11 | // MCInst. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | // | ||
15 | |||
16 | #include "AMDGPUMCInstLower.h" | ||
17 | #include "AMDGPUAsmPrinter.h" | ||
18 | #include "R600InstrInfo.h" | ||
19 | #include "llvm/CodeGen/MachineBasicBlock.h" | ||
20 | #include "llvm/CodeGen/MachineInstr.h" | ||
21 | #include "llvm/Constants.h" | ||
22 | #include "llvm/MC/MCInst.h" | ||
23 | #include "llvm/MC/MCStreamer.h" | ||
24 | #include "llvm/Support/ErrorHandling.h" | ||
25 | |||
26 | using namespace llvm; | ||
27 | |||
28 | AMDGPUMCInstLower::AMDGPUMCInstLower() { } | ||
29 | |||
30 | void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { | ||
31 | OutMI.setOpcode(MI->getOpcode()); | ||
32 | |||
33 | for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) { | ||
34 | const MachineOperand &MO = MI->getOperand(i); | ||
35 | |||
36 | MCOperand MCOp; | ||
37 | switch (MO.getType()) { | ||
38 | default: | ||
39 | llvm_unreachable("unknown operand type"); | ||
40 | case MachineOperand::MO_FPImmediate: { | ||
41 | const APFloat &FloatValue = MO.getFPImm()->getValueAPF(); | ||
42 | assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle && | ||
43 | "Only floating point immediates are supported at the moment."); | ||
44 | MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat()); | ||
45 | break; | ||
46 | } | ||
47 | case MachineOperand::MO_Immediate: | ||
48 | MCOp = MCOperand::CreateImm(MO.getImm()); | ||
49 | break; | ||
50 | case MachineOperand::MO_Register: | ||
51 | MCOp = MCOperand::CreateReg(MO.getReg()); | ||
52 | break; | ||
53 | } | ||
54 | OutMI.addOperand(MCOp); | ||
55 | } | ||
56 | } | ||
57 | |||
58 | void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { | ||
59 | AMDGPUMCInstLower MCInstLowering; | ||
60 | |||
61 | // Ignore placeholder instructions: | ||
62 | if (MI->getOpcode() == AMDGPU::MASK_WRITE) { | ||
63 | return; | ||
64 | } | ||
65 | |||
66 | if (MI->isBundle()) { | ||
67 | const MachineBasicBlock *MBB = MI->getParent(); | ||
68 | MachineBasicBlock::const_instr_iterator I = MI; | ||
69 | ++I; | ||
70 | while (I != MBB->end() && I->isInsideBundle()) { | ||
71 | MCInst MCBundleInst; | ||
72 | const MachineInstr *BundledInst = I; | ||
73 | MCInstLowering.lower(BundledInst, MCBundleInst); | ||
74 | OutStreamer.EmitInstruction(MCBundleInst); | ||
75 | ++I; | ||
76 | } | ||
77 | } else { | ||
78 | MCInst TmpInst; | ||
79 | MCInstLowering.lower(MI, TmpInst); | ||
80 | OutStreamer.EmitInstruction(TmpInst); | ||
81 | } | ||
82 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUMCInstLower.h b/src/gallium/drivers/radeon/AMDGPUMCInstLower.h deleted file mode 100644 index 3f68ff0874e..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUMCInstLower.h +++ /dev/null | |||
@@ -1,30 +0,0 @@ | |||
1 | //===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | |||
10 | #ifndef AMDGPU_MCINSTLOWER_H | ||
11 | #define AMDGPU_MCINSTLOWER_H | ||
12 | |||
13 | namespace llvm { | ||
14 | |||
15 | class MCInst; | ||
16 | class MachineInstr; | ||
17 | |||
18 | class AMDGPUMCInstLower { | ||
19 | |||
20 | public: | ||
21 | AMDGPUMCInstLower(); | ||
22 | |||
23 | /// lower - Lower a MachineInstr to an MCInst | ||
24 | void lower(const MachineInstr *MI, MCInst &OutMI) const; | ||
25 | |||
26 | }; | ||
27 | |||
28 | } // End namespace llvm | ||
29 | |||
30 | #endif //AMDGPU_MCINSTLOWER_H | ||
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp deleted file mode 100644 index 69bda631738..00000000000 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp +++ /dev/null | |||
@@ -1,50 +0,0 @@ | |||
1 | //===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Parent TargetRegisterInfo class common to all hw codegen targets. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #include "AMDGPURegisterInfo.h" | ||
15 | #include "AMDGPUTargetMachine.h" | ||
16 | |||
17 | using namespace llvm; | ||
18 | |||
19 | AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm, | ||
20 | const TargetInstrInfo &tii) | ||
21 | : AMDGPUGenRegisterInfo(0), | ||
22 | TM(tm), | ||
23 | TII(tii) | ||
24 | { } | ||
25 | |||
26 | //===----------------------------------------------------------------------===// | ||
27 | // Function handling callbacks - Functions are a seldom used feature of GPUS, so | ||
28 | // they are not supported at this time. | ||
29 | //===----------------------------------------------------------------------===// | ||
30 | |||
31 | const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister; | ||
32 | |||
33 | const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) | ||
34 | const { | ||
35 | return &CalleeSavedReg; | ||
36 | } | ||
37 | |||
38 | void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, | ||
39 | int SPAdj, | ||
40 | RegScavenger *RS) const { | ||
41 | assert(!"Subroutines not supported yet"); | ||
42 | } | ||
43 | |||
44 | unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { | ||
45 | assert(!"Subroutines not supported yet"); | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | #define GET_REGINFO_TARGET_DESC | ||
50 | #include "AMDGPUGenRegisterInfo.inc" | ||
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h deleted file mode 100644 index 326610d333e..00000000000 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h +++ /dev/null | |||
@@ -1,62 +0,0 @@ | |||
1 | //===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains the TargetRegisterInfo interface that is implemented | ||
11 | // by all hw codegen targets. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #ifndef AMDGPUREGISTERINFO_H_ | ||
16 | #define AMDGPUREGISTERINFO_H_ | ||
17 | |||
18 | #include "llvm/ADT/BitVector.h" | ||
19 | #include "llvm/Target/TargetRegisterInfo.h" | ||
20 | |||
21 | #define GET_REGINFO_HEADER | ||
22 | #define GET_REGINFO_ENUM | ||
23 | #include "AMDGPUGenRegisterInfo.inc" | ||
24 | |||
25 | namespace llvm { | ||
26 | |||
27 | class AMDGPUTargetMachine; | ||
28 | class TargetInstrInfo; | ||
29 | |||
30 | struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo | ||
31 | { | ||
32 | TargetMachine &TM; | ||
33 | const TargetInstrInfo &TII; | ||
34 | static const uint16_t CalleeSavedReg; | ||
35 | |||
36 | AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii); | ||
37 | |||
38 | virtual BitVector getReservedRegs(const MachineFunction &MF) const { | ||
39 | assert(!"Unimplemented"); return BitVector(); | ||
40 | } | ||
41 | |||
42 | /// getISARegClass - rc is an AMDIL reg class. This function returns the | ||
43 | /// ISA reg class that is equivalent to the given AMDIL reg class. | ||
44 | virtual const TargetRegisterClass * getISARegClass( | ||
45 | const TargetRegisterClass * rc) const { | ||
46 | assert(!"Unimplemented"); return NULL; | ||
47 | } | ||
48 | |||
49 | virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const { | ||
50 | assert(!"Unimplemented"); return NULL; | ||
51 | } | ||
52 | |||
53 | const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; | ||
54 | void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, | ||
55 | RegScavenger *RS) const; | ||
56 | unsigned getFrameRegister(const MachineFunction &MF) const; | ||
57 | |||
58 | }; | ||
59 | |||
60 | } // End namespace llvm | ||
61 | |||
62 | #endif // AMDIDSAREGISTERINFO_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td deleted file mode 100644 index 8181e023aa3..00000000000 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | //===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Tablegen register definitions common to all hw codegen targets. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | let Namespace = "AMDGPU" in { | ||
15 | def sel_x : SubRegIndex; | ||
16 | def sel_y : SubRegIndex; | ||
17 | def sel_z : SubRegIndex; | ||
18 | def sel_w : SubRegIndex; | ||
19 | } | ||
20 | |||
21 | include "R600RegisterInfo.td" | ||
22 | include "SIRegisterInfo.td" | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUSubtarget.cpp b/src/gallium/drivers/radeon/AMDGPUSubtarget.cpp deleted file mode 100644 index d4a70b6c62f..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUSubtarget.cpp +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file implements the AMDGPU specific subclass of TargetSubtarget. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #include "AMDGPUSubtarget.h" | ||
15 | |||
16 | using namespace llvm; | ||
17 | |||
18 | #define GET_SUBTARGETINFO_ENUM | ||
19 | #define GET_SUBTARGETINFO_TARGET_DESC | ||
20 | #define GET_SUBTARGETINFO_CTOR | ||
21 | #include "AMDGPUGenSubtargetInfo.inc" | ||
22 | |||
23 | AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) : | ||
24 | AMDGPUGenSubtargetInfo(TT, CPU, FS), mDumpCode(false) { | ||
25 | InstrItins = getInstrItineraryForCPU(CPU); | ||
26 | |||
27 | memset(CapsOverride, 0, sizeof(*CapsOverride) | ||
28 | * AMDGPUDeviceInfo::MaxNumberCapabilities); | ||
29 | // Default card | ||
30 | StringRef GPU = CPU; | ||
31 | mIs64bit = false; | ||
32 | mDefaultSize[0] = 64; | ||
33 | mDefaultSize[1] = 1; | ||
34 | mDefaultSize[2] = 1; | ||
35 | ParseSubtargetFeatures(GPU, FS); | ||
36 | mDevName = GPU; | ||
37 | mDevice = AMDGPUDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit); | ||
38 | } | ||
39 | |||
40 | AMDGPUSubtarget::~AMDGPUSubtarget() | ||
41 | { | ||
42 | delete mDevice; | ||
43 | } | ||
44 | |||
45 | bool | ||
46 | AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const | ||
47 | { | ||
48 | assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities && | ||
49 | "Caps index is out of bounds!"); | ||
50 | return CapsOverride[caps]; | ||
51 | } | ||
52 | bool | ||
53 | AMDGPUSubtarget::is64bit() const | ||
54 | { | ||
55 | return mIs64bit; | ||
56 | } | ||
57 | bool | ||
58 | AMDGPUSubtarget::isTargetELF() const | ||
59 | { | ||
60 | return false; | ||
61 | } | ||
62 | size_t | ||
63 | AMDGPUSubtarget::getDefaultSize(uint32_t dim) const | ||
64 | { | ||
65 | if (dim > 3) { | ||
66 | return 1; | ||
67 | } else { | ||
68 | return mDefaultSize[dim]; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | std::string | ||
73 | AMDGPUSubtarget::getDataLayout() const | ||
74 | { | ||
75 | if (!mDevice) { | ||
76 | return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" | ||
77 | "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" | ||
78 | "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" | ||
79 | "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" | ||
80 | "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64"); | ||
81 | } | ||
82 | return mDevice->getDataLayout(); | ||
83 | } | ||
84 | |||
85 | std::string | ||
86 | AMDGPUSubtarget::getDeviceName() const | ||
87 | { | ||
88 | return mDevName; | ||
89 | } | ||
90 | const AMDGPUDevice * | ||
91 | AMDGPUSubtarget::device() const | ||
92 | { | ||
93 | return mDevice; | ||
94 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUSubtarget.h b/src/gallium/drivers/radeon/AMDGPUSubtarget.h deleted file mode 100644 index 30bda83a205..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUSubtarget.h +++ /dev/null | |||
@@ -1,66 +0,0 @@ | |||
1 | //=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file declares the AMDGPU specific subclass of TargetSubtarget. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #ifndef _AMDGPUSUBTARGET_H_ | ||
15 | #define _AMDGPUSUBTARGET_H_ | ||
16 | #include "AMDILDevice.h" | ||
17 | #include "llvm/ADT/StringExtras.h" | ||
18 | #include "llvm/ADT/StringRef.h" | ||
19 | #include "llvm/Target/TargetSubtargetInfo.h" | ||
20 | |||
21 | #define GET_SUBTARGETINFO_HEADER | ||
22 | #include "AMDGPUGenSubtargetInfo.inc" | ||
23 | |||
24 | #define MAX_CB_SIZE (1 << 16) | ||
25 | |||
26 | namespace llvm { | ||
27 | |||
28 | class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo | ||
29 | { | ||
30 | private: | ||
31 | bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities]; | ||
32 | const AMDGPUDevice *mDevice; | ||
33 | size_t mDefaultSize[3]; | ||
34 | size_t mMinimumSize[3]; | ||
35 | std::string mDevName; | ||
36 | bool mIs64bit; | ||
37 | bool mIs32on64bit; | ||
38 | bool mDumpCode; | ||
39 | bool mR600ALUInst; | ||
40 | |||
41 | InstrItineraryData InstrItins; | ||
42 | |||
43 | public: | ||
44 | AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS); | ||
45 | virtual ~AMDGPUSubtarget(); | ||
46 | |||
47 | const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } | ||
48 | virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS); | ||
49 | |||
50 | bool isOverride(AMDGPUDeviceInfo::Caps) const; | ||
51 | bool is64bit() const; | ||
52 | |||
53 | // Helper functions to simplify if statements | ||
54 | bool isTargetELF() const; | ||
55 | const AMDGPUDevice* device() const; | ||
56 | std::string getDataLayout() const; | ||
57 | std::string getDeviceName() const; | ||
58 | virtual size_t getDefaultSize(uint32_t dim) const; | ||
59 | bool dumpCode() const { return mDumpCode; } | ||
60 | bool r600ALUEncoding() const { return mR600ALUInst; } | ||
61 | |||
62 | }; | ||
63 | |||
64 | } // End namespace llvm | ||
65 | |||
66 | #endif // AMDGPUSUBTARGET_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp deleted file mode 100644 index bfe9d81303b..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp +++ /dev/null | |||
@@ -1,143 +0,0 @@ | |||
1 | //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // The AMDGPU target machine contains all of the hardware specific information | ||
11 | // needed to emit code for R600 and SI GPUs. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #include "AMDGPUTargetMachine.h" | ||
16 | #include "AMDGPU.h" | ||
17 | #include "R600ISelLowering.h" | ||
18 | #include "R600InstrInfo.h" | ||
19 | #include "SIISelLowering.h" | ||
20 | #include "SIInstrInfo.h" | ||
21 | #include "llvm/Analysis/Passes.h" | ||
22 | #include "llvm/Analysis/Verifier.h" | ||
23 | #include "llvm/CodeGen/MachineFunctionAnalysis.h" | ||
24 | #include "llvm/CodeGen/MachineModuleInfo.h" | ||
25 | #include "llvm/CodeGen/Passes.h" | ||
26 | #include "llvm/MC/MCAsmInfo.h" | ||
27 | #include "llvm/PassManager.h" | ||
28 | #include "llvm/Support/TargetRegistry.h" | ||
29 | #include "llvm/Support/raw_os_ostream.h" | ||
30 | #include "llvm/Transforms/IPO.h" | ||
31 | #include "llvm/Transforms/Scalar.h" | ||
32 | #include <llvm/CodeGen/Passes.h> | ||
33 | |||
34 | using namespace llvm; | ||
35 | |||
36 | extern "C" void LLVMInitializeAMDGPUTarget() { | ||
37 | // Register the target | ||
38 | RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget); | ||
39 | } | ||
40 | |||
41 | AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, | ||
42 | StringRef CPU, StringRef FS, | ||
43 | TargetOptions Options, | ||
44 | Reloc::Model RM, CodeModel::Model CM, | ||
45 | CodeGenOpt::Level OptLevel | ||
46 | ) | ||
47 | : | ||
48 | LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), | ||
49 | Subtarget(TT, CPU, FS), | ||
50 | DataLayout(Subtarget.getDataLayout()), | ||
51 | FrameLowering(TargetFrameLowering::StackGrowsUp, | ||
52 | Subtarget.device()->getStackAlignment(), 0), | ||
53 | IntrinsicInfo(this), | ||
54 | InstrItins(&Subtarget.getInstrItineraryData()), | ||
55 | mDump(false) | ||
56 | |||
57 | { | ||
58 | // TLInfo uses InstrInfo so it must be initialized after. | ||
59 | if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { | ||
60 | InstrInfo = new R600InstrInfo(*this); | ||
61 | TLInfo = new R600TargetLowering(*this); | ||
62 | } else { | ||
63 | InstrInfo = new SIInstrInfo(*this); | ||
64 | TLInfo = new SITargetLowering(*this); | ||
65 | } | ||
66 | } | ||
67 | |||
68 | AMDGPUTargetMachine::~AMDGPUTargetMachine() | ||
69 | { | ||
70 | } | ||
71 | |||
72 | namespace { | ||
73 | class AMDGPUPassConfig : public TargetPassConfig { | ||
74 | public: | ||
75 | AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) | ||
76 | : TargetPassConfig(TM, PM) {} | ||
77 | |||
78 | AMDGPUTargetMachine &getAMDGPUTargetMachine() const { | ||
79 | return getTM<AMDGPUTargetMachine>(); | ||
80 | } | ||
81 | |||
82 | virtual bool addPreISel(); | ||
83 | virtual bool addInstSelector(); | ||
84 | virtual bool addPreRegAlloc(); | ||
85 | virtual bool addPostRegAlloc(); | ||
86 | virtual bool addPreSched2(); | ||
87 | virtual bool addPreEmitPass(); | ||
88 | }; | ||
89 | } // End of anonymous namespace | ||
90 | |||
91 | TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) { | ||
92 | return new AMDGPUPassConfig(this, PM); | ||
93 | } | ||
94 | |||
95 | bool | ||
96 | AMDGPUPassConfig::addPreISel() | ||
97 | { | ||
98 | return false; | ||
99 | } | ||
100 | |||
101 | bool AMDGPUPassConfig::addInstSelector() { | ||
102 | PM->add(createAMDGPUPeepholeOpt(*TM)); | ||
103 | PM->add(createAMDGPUISelDag(getAMDGPUTargetMachine())); | ||
104 | return false; | ||
105 | } | ||
106 | |||
107 | bool AMDGPUPassConfig::addPreRegAlloc() { | ||
108 | const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); | ||
109 | |||
110 | if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { | ||
111 | PM->add(createSIAssignInterpRegsPass(*TM)); | ||
112 | } | ||
113 | PM->add(createAMDGPUConvertToISAPass(*TM)); | ||
114 | return false; | ||
115 | } | ||
116 | |||
117 | bool AMDGPUPassConfig::addPostRegAlloc() { | ||
118 | return false; | ||
119 | } | ||
120 | |||
121 | bool AMDGPUPassConfig::addPreSched2() { | ||
122 | |||
123 | addPass(IfConverterID); | ||
124 | return false; | ||
125 | } | ||
126 | |||
127 | bool AMDGPUPassConfig::addPreEmitPass() { | ||
128 | PM->add(createAMDGPUCFGPreparationPass(*TM)); | ||
129 | PM->add(createAMDGPUCFGStructurizerPass(*TM)); | ||
130 | |||
131 | const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); | ||
132 | if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { | ||
133 | PM->add(createR600ExpandSpecialInstrsPass(*TM)); | ||
134 | addPass(FinalizeMachineBundlesID); | ||
135 | } else { | ||
136 | PM->add(createSILowerLiteralConstantsPass(*TM)); | ||
137 | // piglit is unreliable (VM protection faults, GPU lockups) with this pass: | ||
138 | //PM->add(createSILowerFlowControlPass(*TM)); | ||
139 | } | ||
140 | |||
141 | return false; | ||
142 | } | ||
143 | |||
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h deleted file mode 100644 index 8b405a882cc..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h +++ /dev/null | |||
@@ -1,70 +0,0 @@ | |||
1 | //===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // The AMDGPU TargetMachine interface definition for hw codgen targets. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #ifndef AMDGPU_TARGET_MACHINE_H | ||
15 | #define AMDGPU_TARGET_MACHINE_H | ||
16 | |||
17 | #include "AMDGPUInstrInfo.h" | ||
18 | #include "AMDGPUSubtarget.h" | ||
19 | #include "AMDILFrameLowering.h" | ||
20 | #include "AMDILIntrinsicInfo.h" | ||
21 | #include "R600ISelLowering.h" | ||
22 | #include "llvm/ADT/OwningPtr.h" | ||
23 | #include "llvm/Target/TargetData.h" | ||
24 | |||
25 | namespace llvm { | ||
26 | |||
27 | MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT); | ||
28 | |||
29 | class AMDGPUTargetMachine : public LLVMTargetMachine { | ||
30 | |||
31 | AMDGPUSubtarget Subtarget; | ||
32 | const TargetData DataLayout; | ||
33 | AMDGPUFrameLowering FrameLowering; | ||
34 | AMDGPUIntrinsicInfo IntrinsicInfo; | ||
35 | const AMDGPUInstrInfo * InstrInfo; | ||
36 | AMDGPUTargetLowering * TLInfo; | ||
37 | const InstrItineraryData* InstrItins; | ||
38 | bool mDump; | ||
39 | |||
40 | public: | ||
41 | AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS, | ||
42 | StringRef CPU, | ||
43 | TargetOptions Options, | ||
44 | Reloc::Model RM, CodeModel::Model CM, | ||
45 | CodeGenOpt::Level OL); | ||
46 | ~AMDGPUTargetMachine(); | ||
47 | virtual const AMDGPUFrameLowering* getFrameLowering() const { | ||
48 | return &FrameLowering; | ||
49 | } | ||
50 | virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const { | ||
51 | return &IntrinsicInfo; | ||
52 | } | ||
53 | virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;} | ||
54 | virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; } | ||
55 | virtual const AMDGPURegisterInfo *getRegisterInfo() const { | ||
56 | return &InstrInfo->getRegisterInfo(); | ||
57 | } | ||
58 | virtual AMDGPUTargetLowering * getTargetLowering() const { | ||
59 | return TLInfo; | ||
60 | } | ||
61 | virtual const InstrItineraryData* getInstrItineraryData() const { | ||
62 | return InstrItins; | ||
63 | } | ||
64 | virtual const TargetData* getTargetData() const { return &DataLayout; } | ||
65 | virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); | ||
66 | }; | ||
67 | |||
68 | } // End namespace llvm | ||
69 | |||
70 | #endif // AMDGPU_TARGET_MACHINE_H | ||
diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h deleted file mode 100644 index e96b123bb7d..00000000000 --- a/src/gallium/drivers/radeon/AMDIL.h +++ /dev/null | |||
@@ -1,106 +0,0 @@ | |||
1 | //===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains the entry points for global functions defined in the LLVM | ||
11 | // AMDGPU back-end. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #ifndef AMDIL_H_ | ||
16 | #define AMDIL_H_ | ||
17 | |||
18 | #include "llvm/CodeGen/MachineFunction.h" | ||
19 | #include "llvm/Target/TargetMachine.h" | ||
20 | |||
21 | #define ARENA_SEGMENT_RESERVED_UAVS 12 | ||
22 | #define DEFAULT_ARENA_UAV_ID 8 | ||
23 | #define DEFAULT_RAW_UAV_ID 7 | ||
24 | #define GLOBAL_RETURN_RAW_UAV_ID 11 | ||
25 | #define HW_MAX_NUM_CB 8 | ||
26 | #define MAX_NUM_UNIQUE_UAVS 8 | ||
27 | #define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8 | ||
28 | #define OPENCL_MAX_READ_IMAGES 128 | ||
29 | #define OPENCL_MAX_WRITE_IMAGES 8 | ||
30 | #define OPENCL_MAX_SAMPLERS 16 | ||
31 | |||
32 | // The next two values can never be zero, as zero is the ID that is | ||
33 | // used to assert against. | ||
34 | #define DEFAULT_LDS_ID 1 | ||
35 | #define DEFAULT_GDS_ID 1 | ||
36 | #define DEFAULT_SCRATCH_ID 1 | ||
37 | #define DEFAULT_VEC_SLOTS 8 | ||
38 | |||
39 | #define OCL_DEVICE_RV710 0x0001 | ||
40 | #define OCL_DEVICE_RV730 0x0002 | ||
41 | #define OCL_DEVICE_RV770 0x0004 | ||
42 | #define OCL_DEVICE_CEDAR 0x0008 | ||
43 | #define OCL_DEVICE_REDWOOD 0x0010 | ||
44 | #define OCL_DEVICE_JUNIPER 0x0020 | ||
45 | #define OCL_DEVICE_CYPRESS 0x0040 | ||
46 | #define OCL_DEVICE_CAICOS 0x0080 | ||
47 | #define OCL_DEVICE_TURKS 0x0100 | ||
48 | #define OCL_DEVICE_BARTS 0x0200 | ||
49 | #define OCL_DEVICE_CAYMAN 0x0400 | ||
50 | #define OCL_DEVICE_ALL 0x3FFF | ||
51 | |||
52 | /// The number of function ID's that are reserved for | ||
53 | /// internal compiler usage. | ||
54 | const unsigned int RESERVED_FUNCS = 1024; | ||
55 | |||
56 | namespace llvm { | ||
57 | class AMDGPUInstrPrinter; | ||
58 | class FunctionPass; | ||
59 | class MCAsmInfo; | ||
60 | class raw_ostream; | ||
61 | class Target; | ||
62 | class TargetMachine; | ||
63 | |||
64 | /// Instruction selection passes. | ||
65 | FunctionPass* | ||
66 | createAMDGPUISelDag(TargetMachine &TM); | ||
67 | FunctionPass* | ||
68 | createAMDGPUPeepholeOpt(TargetMachine &TM); | ||
69 | |||
70 | /// Pre emit passes. | ||
71 | FunctionPass* | ||
72 | createAMDGPUCFGPreparationPass(TargetMachine &TM); | ||
73 | FunctionPass* | ||
74 | createAMDGPUCFGStructurizerPass(TargetMachine &TM); | ||
75 | |||
76 | extern Target TheAMDGPUTarget; | ||
77 | } // end namespace llvm; | ||
78 | |||
79 | /// Include device information enumerations | ||
80 | #include "AMDILDeviceInfo.h" | ||
81 | |||
82 | namespace llvm { | ||
83 | /// OpenCL uses address spaces to differentiate between | ||
84 | /// various memory regions on the hardware. On the CPU | ||
85 | /// all of the address spaces point to the same memory, | ||
86 | /// however on the GPU, each address space points to | ||
87 | /// a seperate piece of memory that is unique from other | ||
88 | /// memory locations. | ||
89 | namespace AMDGPUAS { | ||
90 | enum AddressSpaces { | ||
91 | PRIVATE_ADDRESS = 0, // Address space for private memory. | ||
92 | GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0). | ||
93 | CONSTANT_ADDRESS = 2, // Address space for constant memory. | ||
94 | LOCAL_ADDRESS = 3, // Address space for local memory. | ||
95 | REGION_ADDRESS = 4, // Address space for region memory. | ||
96 | ADDRESS_NONE = 5, // Address space for unknown memory. | ||
97 | PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0) | ||
98 | PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1) | ||
99 | USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI | ||
100 | LAST_ADDRESS = 9 | ||
101 | }; | ||
102 | |||
103 | } // namespace AMDGPUAS | ||
104 | |||
105 | } // end namespace llvm | ||
106 | #endif // AMDIL_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp deleted file mode 100644 index 8561f0b3175..00000000000 --- a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp +++ /dev/null | |||
@@ -1,129 +0,0 @@ | |||
1 | //===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | #include "AMDIL7XXDevice.h" | ||
10 | #include "AMDGPUSubtarget.h" | ||
11 | #include "AMDILDevice.h" | ||
12 | |||
13 | using namespace llvm; | ||
14 | |||
15 | AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) | ||
16 | { | ||
17 | setCaps(); | ||
18 | std::string name = mSTM->getDeviceName(); | ||
19 | if (name == "rv710") { | ||
20 | mDeviceFlag = OCL_DEVICE_RV710; | ||
21 | } else if (name == "rv730") { | ||
22 | mDeviceFlag = OCL_DEVICE_RV730; | ||
23 | } else { | ||
24 | mDeviceFlag = OCL_DEVICE_RV770; | ||
25 | } | ||
26 | } | ||
27 | |||
28 | AMDGPU7XXDevice::~AMDGPU7XXDevice() | ||
29 | { | ||
30 | } | ||
31 | |||
32 | void AMDGPU7XXDevice::setCaps() | ||
33 | { | ||
34 | mSWBits.set(AMDGPUDeviceInfo::LocalMem); | ||
35 | } | ||
36 | |||
37 | size_t AMDGPU7XXDevice::getMaxLDSSize() const | ||
38 | { | ||
39 | if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { | ||
40 | return MAX_LDS_SIZE_700; | ||
41 | } | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | size_t AMDGPU7XXDevice::getWavefrontSize() const | ||
46 | { | ||
47 | return AMDGPUDevice::HalfWavefrontSize; | ||
48 | } | ||
49 | |||
50 | uint32_t AMDGPU7XXDevice::getGeneration() const | ||
51 | { | ||
52 | return AMDGPUDeviceInfo::HD4XXX; | ||
53 | } | ||
54 | |||
55 | uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const | ||
56 | { | ||
57 | switch (DeviceID) { | ||
58 | default: | ||
59 | assert(0 && "ID type passed in is unknown!"); | ||
60 | break; | ||
61 | case GLOBAL_ID: | ||
62 | case CONSTANT_ID: | ||
63 | case RAW_UAV_ID: | ||
64 | case ARENA_UAV_ID: | ||
65 | break; | ||
66 | case LDS_ID: | ||
67 | if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { | ||
68 | return DEFAULT_LDS_ID; | ||
69 | } | ||
70 | break; | ||
71 | case SCRATCH_ID: | ||
72 | if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) { | ||
73 | return DEFAULT_SCRATCH_ID; | ||
74 | } | ||
75 | break; | ||
76 | case GDS_ID: | ||
77 | assert(0 && "GDS UAV ID is not supported on this chip"); | ||
78 | if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { | ||
79 | return DEFAULT_GDS_ID; | ||
80 | } | ||
81 | break; | ||
82 | }; | ||
83 | |||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const | ||
88 | { | ||
89 | return 1; | ||
90 | } | ||
91 | |||
92 | AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) | ||
93 | { | ||
94 | setCaps(); | ||
95 | } | ||
96 | |||
97 | AMDGPU770Device::~AMDGPU770Device() | ||
98 | { | ||
99 | } | ||
100 | |||
101 | void AMDGPU770Device::setCaps() | ||
102 | { | ||
103 | if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { | ||
104 | mSWBits.set(AMDGPUDeviceInfo::FMA); | ||
105 | mHWBits.set(AMDGPUDeviceInfo::DoubleOps); | ||
106 | } | ||
107 | mSWBits.set(AMDGPUDeviceInfo::BarrierDetect); | ||
108 | mHWBits.reset(AMDGPUDeviceInfo::LongOps); | ||
109 | mSWBits.set(AMDGPUDeviceInfo::LongOps); | ||
110 | mSWBits.set(AMDGPUDeviceInfo::LocalMem); | ||
111 | } | ||
112 | |||
113 | size_t AMDGPU770Device::getWavefrontSize() const | ||
114 | { | ||
115 | return AMDGPUDevice::WavefrontSize; | ||
116 | } | ||
117 | |||
118 | AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) | ||
119 | { | ||
120 | } | ||
121 | |||
122 | AMDGPU710Device::~AMDGPU710Device() | ||
123 | { | ||
124 | } | ||
125 | |||
126 | size_t AMDGPU710Device::getWavefrontSize() const | ||
127 | { | ||
128 | return AMDGPUDevice::QuarterWavefrontSize; | ||
129 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.h b/src/gallium/drivers/radeon/AMDIL7XXDevice.h deleted file mode 100644 index e848e2e0f2c..00000000000 --- a/src/gallium/drivers/radeon/AMDIL7XXDevice.h +++ /dev/null | |||
@@ -1,70 +0,0 @@ | |||
1 | //==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface for the subtarget data classes. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | // This file will define the interface that each generation needs to | ||
14 | // implement in order to correctly answer queries on the capabilities of the | ||
15 | // specific hardware. | ||
16 | //===----------------------------------------------------------------------===// | ||
17 | #ifndef _AMDIL7XXDEVICEIMPL_H_ | ||
18 | #define _AMDIL7XXDEVICEIMPL_H_ | ||
19 | #include "AMDILDevice.h" | ||
20 | |||
21 | namespace llvm { | ||
22 | class AMDGPUSubtarget; | ||
23 | |||
24 | //===----------------------------------------------------------------------===// | ||
25 | // 7XX generation of devices and their respective sub classes | ||
26 | //===----------------------------------------------------------------------===// | ||
27 | |||
28 | // The AMDGPU7XXDevice class represents the generic 7XX device. All 7XX | ||
29 | // devices are derived from this class. The AMDGPU7XX device will only | ||
30 | // support the minimal features that are required to be considered OpenCL 1.0 | ||
31 | // compliant and nothing more. | ||
32 | class AMDGPU7XXDevice : public AMDGPUDevice { | ||
33 | public: | ||
34 | AMDGPU7XXDevice(AMDGPUSubtarget *ST); | ||
35 | virtual ~AMDGPU7XXDevice(); | ||
36 | virtual size_t getMaxLDSSize() const; | ||
37 | virtual size_t getWavefrontSize() const; | ||
38 | virtual uint32_t getGeneration() const; | ||
39 | virtual uint32_t getResourceID(uint32_t DeviceID) const; | ||
40 | virtual uint32_t getMaxNumUAVs() const; | ||
41 | |||
42 | protected: | ||
43 | virtual void setCaps(); | ||
44 | }; // AMDGPU7XXDevice | ||
45 | |||
46 | // The AMDGPU770Device class represents the RV770 chip and it's | ||
47 | // derivative cards. The difference between this device and the base | ||
48 | // class is this device device adds support for double precision | ||
49 | // and has a larger wavefront size. | ||
50 | class AMDGPU770Device : public AMDGPU7XXDevice { | ||
51 | public: | ||
52 | AMDGPU770Device(AMDGPUSubtarget *ST); | ||
53 | virtual ~AMDGPU770Device(); | ||
54 | virtual size_t getWavefrontSize() const; | ||
55 | private: | ||
56 | virtual void setCaps(); | ||
57 | }; // AMDGPU770Device | ||
58 | |||
59 | // The AMDGPU710Device class derives from the 7XX base class, but this | ||
60 | // class is a smaller derivative, so we need to overload some of the | ||
61 | // functions in order to correctly specify this information. | ||
62 | class AMDGPU710Device : public AMDGPU7XXDevice { | ||
63 | public: | ||
64 | AMDGPU710Device(AMDGPUSubtarget *ST); | ||
65 | virtual ~AMDGPU710Device(); | ||
66 | virtual size_t getWavefrontSize() const; | ||
67 | }; // AMDGPU710Device | ||
68 | |||
69 | } // namespace llvm | ||
70 | #endif // _AMDILDEVICEIMPL_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td deleted file mode 100644 index ffe9ce2c532..00000000000 --- a/src/gallium/drivers/radeon/AMDILBase.td +++ /dev/null | |||
@@ -1,85 +0,0 @@ | |||
1 | //===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // Target-independent interfaces which we are implementing | ||
10 | //===----------------------------------------------------------------------===// | ||
11 | |||
12 | include "llvm/Target/Target.td" | ||
13 | |||
14 | // Dummy Instruction itineraries for pseudo instructions | ||
15 | def ALU_NULL : FuncUnit; | ||
16 | def NullALU : InstrItinClass; | ||
17 | |||
18 | //===----------------------------------------------------------------------===// | ||
19 | // AMDIL Subtarget features. | ||
20 | //===----------------------------------------------------------------------===// | ||
21 | def FeatureFP64 : SubtargetFeature<"fp64", | ||
22 | "CapsOverride[AMDGPUDeviceInfo::DoubleOps]", | ||
23 | "true", | ||
24 | "Enable 64bit double precision operations">; | ||
25 | def FeatureByteAddress : SubtargetFeature<"byte_addressable_store", | ||
26 | "CapsOverride[AMDGPUDeviceInfo::ByteStores]", | ||
27 | "true", | ||
28 | "Enable byte addressable stores">; | ||
29 | def FeatureBarrierDetect : SubtargetFeature<"barrier_detect", | ||
30 | "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]", | ||
31 | "true", | ||
32 | "Enable duplicate barrier detection(HD5XXX or later).">; | ||
33 | def FeatureImages : SubtargetFeature<"images", | ||
34 | "CapsOverride[AMDGPUDeviceInfo::Images]", | ||
35 | "true", | ||
36 | "Enable image functions">; | ||
37 | def FeatureMultiUAV : SubtargetFeature<"multi_uav", | ||
38 | "CapsOverride[AMDGPUDeviceInfo::MultiUAV]", | ||
39 | "true", | ||
40 | "Generate multiple UAV code(HD5XXX family or later)">; | ||
41 | def FeatureMacroDB : SubtargetFeature<"macrodb", | ||
42 | "CapsOverride[AMDGPUDeviceInfo::MacroDB]", | ||
43 | "true", | ||
44 | "Use internal macrodb, instead of macrodb in driver">; | ||
45 | def FeatureNoAlias : SubtargetFeature<"noalias", | ||
46 | "CapsOverride[AMDGPUDeviceInfo::NoAlias]", | ||
47 | "true", | ||
48 | "assert that all kernel argument pointers are not aliased">; | ||
49 | def FeatureNoInline : SubtargetFeature<"no-inline", | ||
50 | "CapsOverride[AMDGPUDeviceInfo::NoInline]", | ||
51 | "true", | ||
52 | "specify whether to not inline functions">; | ||
53 | |||
54 | def Feature64BitPtr : SubtargetFeature<"64BitPtr", | ||
55 | "mIs64bit", | ||
56 | "false", | ||
57 | "Specify if 64bit addressing should be used.">; | ||
58 | |||
59 | def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr", | ||
60 | "mIs32on64bit", | ||
61 | "false", | ||
62 | "Specify if 64bit sized pointers with 32bit addressing should be used.">; | ||
63 | def FeatureDebug : SubtargetFeature<"debug", | ||
64 | "CapsOverride[AMDGPUDeviceInfo::Debug]", | ||
65 | "true", | ||
66 | "Debug mode is enabled, so disable hardware accelerated address spaces.">; | ||
67 | def FeatureDumpCode : SubtargetFeature <"DumpCode", | ||
68 | "mDumpCode", | ||
69 | "true", | ||
70 | "Dump MachineInstrs in the CodeEmitter">; | ||
71 | |||
72 | def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", | ||
73 | "mR600ALUInst", | ||
74 | "false", | ||
75 | "Older version of ALU instructions encoding.">; | ||
76 | |||
77 | |||
78 | //===----------------------------------------------------------------------===// | ||
79 | // Register File, Calling Conv, Instruction Descriptions | ||
80 | //===----------------------------------------------------------------------===// | ||
81 | |||
82 | |||
83 | include "AMDILRegisterInfo.td" | ||
84 | include "AMDILInstrInfo.td" | ||
85 | |||
diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp deleted file mode 100644 index 20e27ef1132..00000000000 --- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp +++ /dev/null | |||
@@ -1,3274 +0,0 @@ | |||
1 | //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | |||
10 | #define DEBUGME 0 | ||
11 | #define DEBUG_TYPE "structcfg" | ||
12 | |||
13 | #include "AMDGPUInstrInfo.h" | ||
14 | #include "AMDIL.h" | ||
15 | #include "AMDILUtilityFunctions.h" | ||
16 | #include "llvm/ADT/SCCIterator.h" | ||
17 | #include "llvm/ADT/SmallVector.h" | ||
18 | #include "llvm/ADT/Statistic.h" | ||
19 | #include "llvm/Analysis/DominatorInternals.h" | ||
20 | #include "llvm/Analysis/Dominators.h" | ||
21 | #include "llvm/CodeGen/MachineDominators.h" | ||
22 | #include "llvm/CodeGen/MachineDominators.h" | ||
23 | #include "llvm/CodeGen/MachineFunction.h" | ||
24 | #include "llvm/CodeGen/MachineFunctionAnalysis.h" | ||
25 | #include "llvm/CodeGen/MachineFunctionPass.h" | ||
26 | #include "llvm/CodeGen/MachineFunctionPass.h" | ||
27 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
28 | #include "llvm/CodeGen/MachineJumpTableInfo.h" | ||
29 | #include "llvm/CodeGen/MachineLoopInfo.h" | ||
30 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
31 | #include "llvm/Target/TargetInstrInfo.h" | ||
32 | |||
33 | #define FirstNonDebugInstr(A) A->begin() | ||
34 | using namespace llvm; | ||
35 | |||
36 | // TODO: move-begin. | ||
37 | |||
38 | //===----------------------------------------------------------------------===// | ||
39 | // | ||
40 | // Statistics for CFGStructurizer. | ||
41 | // | ||
42 | //===----------------------------------------------------------------------===// | ||
43 | |||
44 | STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " | ||
45 | "matched"); | ||
46 | STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " | ||
47 | "matched"); | ||
48 | STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " | ||
49 | "pattern matched"); | ||
50 | STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " | ||
51 | "pattern matched"); | ||
52 | STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " | ||
53 | "matched"); | ||
54 | STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); | ||
55 | STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); | ||
56 | |||
57 | //===----------------------------------------------------------------------===// | ||
58 | // | ||
59 | // Miscellaneous utility for CFGStructurizer. | ||
60 | // | ||
61 | //===----------------------------------------------------------------------===// | ||
62 | namespace llvmCFGStruct | ||
63 | { | ||
64 | #define SHOWNEWINSTR(i) \ | ||
65 | if (DEBUGME) errs() << "New instr: " << *i << "\n" | ||
66 | |||
67 | #define SHOWNEWBLK(b, msg) \ | ||
68 | if (DEBUGME) { \ | ||
69 | errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ | ||
70 | errs() << "\n"; \ | ||
71 | } | ||
72 | |||
73 | #define SHOWBLK_DETAIL(b, msg) \ | ||
74 | if (DEBUGME) { \ | ||
75 | if (b) { \ | ||
76 | errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ | ||
77 | b->print(errs()); \ | ||
78 | errs() << "\n"; \ | ||
79 | } \ | ||
80 | } | ||
81 | |||
82 | #define INVALIDSCCNUM -1 | ||
83 | #define INVALIDREGNUM 0 | ||
84 | |||
85 | template<class LoopinfoT> | ||
86 | void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { | ||
87 | for (typename LoopinfoT::iterator iter = LoopInfo.begin(), | ||
88 | iterEnd = LoopInfo.end(); | ||
89 | iter != iterEnd; ++iter) { | ||
90 | (*iter)->print(OS, 0); | ||
91 | } | ||
92 | } | ||
93 | |||
94 | template<class NodeT> | ||
95 | void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) { | ||
96 | size_t sz = Src.size(); | ||
97 | for (size_t i = 0; i < sz/2; ++i) { | ||
98 | NodeT *t = Src[i]; | ||
99 | Src[i] = Src[sz - i - 1]; | ||
100 | Src[sz - i - 1] = t; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | } //end namespace llvmCFGStruct | ||
105 | |||
106 | |||
107 | //===----------------------------------------------------------------------===// | ||
108 | // | ||
109 | // MachinePostDominatorTree | ||
110 | // | ||
111 | //===----------------------------------------------------------------------===// | ||
112 | |||
113 | namespace llvm { | ||
114 | |||
115 | /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used | ||
116 | /// to compute the a post-dominator tree. | ||
117 | /// | ||
118 | struct MachinePostDominatorTree : public MachineFunctionPass { | ||
119 | static char ID; // Pass identification, replacement for typeid | ||
120 | DominatorTreeBase<MachineBasicBlock> *DT; | ||
121 | MachinePostDominatorTree() : MachineFunctionPass(ID) | ||
122 | { | ||
123 | DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate | ||
124 | // postdominator | ||
125 | } | ||
126 | |||
127 | ~MachinePostDominatorTree(); | ||
128 | |||
129 | virtual bool runOnMachineFunction(MachineFunction &MF); | ||
130 | |||
131 | virtual void getAnalysisUsage(AnalysisUsage &AU) const { | ||
132 | AU.setPreservesAll(); | ||
133 | MachineFunctionPass::getAnalysisUsage(AU); | ||
134 | } | ||
135 | |||
136 | inline const std::vector<MachineBasicBlock *> &getRoots() const { | ||
137 | return DT->getRoots(); | ||
138 | } | ||
139 | |||
140 | inline MachineDomTreeNode *getRootNode() const { | ||
141 | return DT->getRootNode(); | ||
142 | } | ||
143 | |||
144 | inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { | ||
145 | return DT->getNode(BB); | ||
146 | } | ||
147 | |||
148 | inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { | ||
149 | return DT->getNode(BB); | ||
150 | } | ||
151 | |||
152 | inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const { | ||
153 | return DT->dominates(A, B); | ||
154 | } | ||
155 | |||
156 | inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const { | ||
157 | return DT->dominates(A, B); | ||
158 | } | ||
159 | |||
160 | inline bool | ||
161 | properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const { | ||
162 | return DT->properlyDominates(A, B); | ||
163 | } | ||
164 | |||
165 | inline bool | ||
166 | properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const { | ||
167 | return DT->properlyDominates(A, B); | ||
168 | } | ||
169 | |||
170 | inline MachineBasicBlock * | ||
171 | findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) { | ||
172 | return DT->findNearestCommonDominator(A, B); | ||
173 | } | ||
174 | |||
175 | virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const { | ||
176 | DT->print(OS); | ||
177 | } | ||
178 | }; | ||
179 | } //end of namespace llvm | ||
180 | |||
181 | char MachinePostDominatorTree::ID = 0; | ||
182 | static RegisterPass<MachinePostDominatorTree> | ||
183 | machinePostDominatorTreePass("machinepostdomtree", | ||
184 | "MachinePostDominator Tree Construction", | ||
185 | true, true); | ||
186 | |||
187 | //const PassInfo *const llvm::MachinePostDominatorsID | ||
188 | //= &machinePostDominatorTreePass; | ||
189 | |||
190 | bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { | ||
191 | DT->recalculate(F); | ||
192 | //DEBUG(DT->dump()); | ||
193 | return false; | ||
194 | } | ||
195 | |||
196 | MachinePostDominatorTree::~MachinePostDominatorTree() { | ||
197 | delete DT; | ||
198 | } | ||
199 | |||
200 | //===----------------------------------------------------------------------===// | ||
201 | // | ||
202 | // supporting data structure for CFGStructurizer | ||
203 | // | ||
204 | //===----------------------------------------------------------------------===// | ||
205 | |||
206 | namespace llvmCFGStruct | ||
207 | { | ||
208 | template<class PassT> | ||
209 | struct CFGStructTraits { | ||
210 | }; | ||
211 | |||
212 | template <class InstrT> | ||
213 | class BlockInformation { | ||
214 | public: | ||
215 | bool isRetired; | ||
216 | int sccNum; | ||
217 | //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr; | ||
218 | //Instructions defining the corresponding successor. | ||
219 | BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} | ||
220 | }; | ||
221 | |||
222 | template <class BlockT, class InstrT, class RegiT> | ||
223 | class LandInformation { | ||
224 | public: | ||
225 | BlockT *landBlk; | ||
226 | std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before | ||
227 | //WHILELOOP(thisloop) init before entering | ||
228 | //thisloop. | ||
229 | std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after | ||
230 | //WHILELOOP(thisloop) init after entering | ||
231 | //thisloop. | ||
232 | std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop | ||
233 | //land block, branch cond on this reg. | ||
234 | std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break | ||
235 | //endif" after ENDLOOP(thisloop) break | ||
236 | //outerLoopOf(thisLoop). | ||
237 | std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue | ||
238 | //endif" after ENDLOOP(thisloop) continue on | ||
239 | //outerLoopOf(thisLoop). | ||
240 | LandInformation() : landBlk(NULL) {} | ||
241 | }; | ||
242 | |||
243 | } //end of namespace llvmCFGStruct | ||
244 | |||
245 | //===----------------------------------------------------------------------===// | ||
246 | // | ||
247 | // CFGStructurizer | ||
248 | // | ||
249 | //===----------------------------------------------------------------------===// | ||
250 | |||
251 | namespace llvmCFGStruct | ||
252 | { | ||
253 | // bixia TODO: port it to BasicBlock, not just MachineBasicBlock. | ||
254 | template<class PassT> | ||
255 | class CFGStructurizer | ||
256 | { | ||
257 | public: | ||
258 | typedef enum { | ||
259 | Not_SinglePath = 0, | ||
260 | SinglePath_InPath = 1, | ||
261 | SinglePath_NotInPath = 2 | ||
262 | } PathToKind; | ||
263 | |||
264 | public: | ||
265 | typedef typename PassT::InstructionType InstrT; | ||
266 | typedef typename PassT::FunctionType FuncT; | ||
267 | typedef typename PassT::DominatortreeType DomTreeT; | ||
268 | typedef typename PassT::PostDominatortreeType PostDomTreeT; | ||
269 | typedef typename PassT::DomTreeNodeType DomTreeNodeT; | ||
270 | typedef typename PassT::LoopinfoType LoopInfoT; | ||
271 | |||
272 | typedef GraphTraits<FuncT *> FuncGTraits; | ||
273 | //typedef FuncGTraits::nodes_iterator BlockIterator; | ||
274 | typedef typename FuncT::iterator BlockIterator; | ||
275 | |||
276 | typedef typename FuncGTraits::NodeType BlockT; | ||
277 | typedef GraphTraits<BlockT *> BlockGTraits; | ||
278 | typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits; | ||
279 | //typedef BlockGTraits::succ_iterator InstructionIterator; | ||
280 | typedef typename BlockT::iterator InstrIterator; | ||
281 | |||
282 | typedef CFGStructTraits<PassT> CFGTraits; | ||
283 | typedef BlockInformation<InstrT> BlockInfo; | ||
284 | typedef std::map<BlockT *, BlockInfo *> BlockInfoMap; | ||
285 | |||
286 | typedef int RegiT; | ||
287 | typedef typename PassT::LoopType LoopT; | ||
288 | typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo; | ||
289 | typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap; | ||
290 | //landing info for loop break | ||
291 | typedef SmallVector<BlockT *, 32> BlockTSmallerVector; | ||
292 | |||
293 | public: | ||
294 | CFGStructurizer(); | ||
295 | ~CFGStructurizer(); | ||
296 | |||
297 | /// Perform the CFG structurization | ||
298 | bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); | ||
299 | |||
300 | /// Perform the CFG preparation | ||
301 | bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); | ||
302 | |||
303 | private: | ||
304 | void reversePredicateSetter(typename BlockT::iterator); | ||
305 | void orderBlocks(); | ||
306 | void printOrderedBlocks(llvm::raw_ostream &OS); | ||
307 | int patternMatch(BlockT *CurBlock); | ||
308 | int patternMatchGroup(BlockT *CurBlock); | ||
309 | |||
310 | int serialPatternMatch(BlockT *CurBlock); | ||
311 | int ifPatternMatch(BlockT *CurBlock); | ||
312 | int switchPatternMatch(BlockT *CurBlock); | ||
313 | int loopendPatternMatch(BlockT *CurBlock); | ||
314 | int loopPatternMatch(BlockT *CurBlock); | ||
315 | |||
316 | int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); | ||
317 | int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); | ||
318 | //int loopWithoutBreak(BlockT *); | ||
319 | |||
320 | void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, | ||
321 | BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); | ||
322 | void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, | ||
323 | BlockT *ContBlock, LoopT *contLoop); | ||
324 | bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); | ||
325 | int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, | ||
326 | BlockT *FalseBlock); | ||
327 | int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, | ||
328 | BlockT *FalseBlock); | ||
329 | int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, | ||
330 | BlockT *FalseBlock, BlockT **LandBlockPtr); | ||
331 | void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, | ||
332 | BlockT *FalseBlock, BlockT *LandBlock, | ||
333 | bool Detail = false); | ||
334 | PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, | ||
335 | bool AllowSideEntry = true); | ||
336 | BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, | ||
337 | bool AllowSideEntry = true); | ||
338 | int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); | ||
339 | void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); | ||
340 | |||
341 | void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, | ||
342 | BlockT *TrueBlock, BlockT *FalseBlock, | ||
343 | BlockT *LandBlock); | ||
344 | void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); | ||
345 | void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, | ||
346 | BlockT *ExitLandBlock, RegiT SetReg); | ||
347 | void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, | ||
348 | RegiT SetReg); | ||
349 | BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, | ||
350 | std::set<BlockT*> &ExitBlockSet, | ||
351 | BlockT *ExitLandBlk); | ||
352 | BlockT *addLoopEndbranchBlock(LoopT *LoopRep, | ||
353 | BlockTSmallerVector &ExitingBlocks, | ||
354 | BlockTSmallerVector &ExitBlocks); | ||
355 | BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); | ||
356 | void removeUnconditionalBranch(BlockT *SrcBlock); | ||
357 | void removeRedundantConditionalBranch(BlockT *SrcBlock); | ||
358 | void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks); | ||
359 | |||
360 | void removeSuccessor(BlockT *SrcBlock); | ||
361 | BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); | ||
362 | BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); | ||
363 | |||
364 | void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, | ||
365 | InstrIterator InsertPos); | ||
366 | |||
367 | void recordSccnum(BlockT *SrcBlock, int SCCNum); | ||
368 | int getSCCNum(BlockT *srcBlk); | ||
369 | |||
370 | void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); | ||
371 | bool isRetiredBlock(BlockT *SrcBlock); | ||
372 | bool isActiveLoophead(BlockT *CurBlock); | ||
373 | bool needMigrateBlock(BlockT *Block); | ||
374 | |||
375 | BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, | ||
376 | BlockTSmallerVector &exitBlocks, | ||
377 | std::set<BlockT*> &ExitBlockSet); | ||
378 | void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); | ||
379 | BlockT *getLoopLandBlock(LoopT *LoopRep); | ||
380 | LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); | ||
381 | |||
382 | void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); | ||
383 | void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); | ||
384 | void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); | ||
385 | void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); | ||
386 | void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); | ||
387 | |||
388 | bool hasBackEdge(BlockT *curBlock); | ||
389 | unsigned getLoopDepth (LoopT *LoopRep); | ||
390 | int countActiveBlock( | ||
391 | typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart, | ||
392 | typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd); | ||
393 | BlockT *findNearestCommonPostDom(std::set<BlockT *>&); | ||
394 | BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); | ||
395 | |||
396 | private: | ||
397 | DomTreeT *domTree; | ||
398 | PostDomTreeT *postDomTree; | ||
399 | LoopInfoT *loopInfo; | ||
400 | PassT *passRep; | ||
401 | FuncT *funcRep; | ||
402 | |||
403 | BlockInfoMap blockInfoMap; | ||
404 | LoopLandInfoMap loopLandInfoMap; | ||
405 | SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks; | ||
406 | const AMDGPURegisterInfo *TRI; | ||
407 | |||
408 | }; //template class CFGStructurizer | ||
409 | |||
410 | template<class PassT> CFGStructurizer<PassT>::CFGStructurizer() | ||
411 | : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { | ||
412 | } | ||
413 | |||
414 | template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() { | ||
415 | for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), | ||
416 | E = blockInfoMap.end(); I != E; ++I) { | ||
417 | delete I->second; | ||
418 | } | ||
419 | } | ||
420 | |||
421 | template<class PassT> | ||
422 | bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass, | ||
423 | const AMDGPURegisterInfo * tri) { | ||
424 | passRep = &pass; | ||
425 | funcRep = &func; | ||
426 | TRI = tri; | ||
427 | |||
428 | bool changed = false; | ||
429 | //func.RenumberBlocks(); | ||
430 | |||
431 | //to do, if not reducible flow graph, make it so ??? | ||
432 | |||
433 | if (DEBUGME) { | ||
434 | errs() << "AMDGPUCFGStructurizer::prepare\n"; | ||
435 | //func.viewCFG(); | ||
436 | //func.viewCFGOnly(); | ||
437 | //func.dump(); | ||
438 | } | ||
439 | |||
440 | //FIXME: gcc complains on this. | ||
441 | //domTree = &pass.getAnalysis<DomTreeT>(); | ||
442 | //domTree = CFGTraits::getDominatorTree(pass); | ||
443 | //if (DEBUGME) { | ||
444 | // domTree->print(errs()); | ||
445 | //} | ||
446 | |||
447 | //FIXME: gcc complains on this. | ||
448 | //domTree = &pass.getAnalysis<DomTreeT>(); | ||
449 | //postDomTree = CFGTraits::getPostDominatorTree(pass); | ||
450 | //if (DEBUGME) { | ||
451 | // postDomTree->print(errs()); | ||
452 | //} | ||
453 | |||
454 | //FIXME: gcc complains on this. | ||
455 | //loopInfo = &pass.getAnalysis<LoopInfoT>(); | ||
456 | loopInfo = CFGTraits::getLoopInfo(pass); | ||
457 | if (DEBUGME) { | ||
458 | errs() << "LoopInfo:\n"; | ||
459 | PrintLoopinfo(*loopInfo, errs()); | ||
460 | } | ||
461 | |||
462 | orderBlocks(); | ||
463 | if (DEBUGME) { | ||
464 | errs() << "Ordered blocks:\n"; | ||
465 | printOrderedBlocks(errs()); | ||
466 | } | ||
467 | |||
468 | SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks; | ||
469 | |||
470 | for (typename LoopInfoT::iterator iter = loopInfo->begin(), | ||
471 | iterEnd = loopInfo->end(); | ||
472 | iter != iterEnd; ++iter) { | ||
473 | LoopT* loopRep = (*iter); | ||
474 | BlockTSmallerVector exitingBlks; | ||
475 | loopRep->getExitingBlocks(exitingBlks); | ||
476 | |||
477 | if (exitingBlks.size() == 0) { | ||
478 | BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); | ||
479 | if (dummyExitBlk != NULL) | ||
480 | retBlks.push_back(dummyExitBlk); | ||
481 | } | ||
482 | } | ||
483 | |||
484 | // Remove unconditional branch instr. | ||
485 | // Add dummy exit block iff there are multiple returns. | ||
486 | |||
487 | for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator | ||
488 | iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); | ||
489 | iterBlk != iterEndBlk; | ||
490 | ++iterBlk) { | ||
491 | BlockT *curBlk = *iterBlk; | ||
492 | removeUnconditionalBranch(curBlk); | ||
493 | removeRedundantConditionalBranch(curBlk); | ||
494 | if (CFGTraits::isReturnBlock(curBlk)) { | ||
495 | retBlks.push_back(curBlk); | ||
496 | } | ||
497 | assert(curBlk->succ_size() <= 2); | ||
498 | //assert(curBlk->size() > 0); | ||
499 | //removeEmptyBlock(curBlk) ?? | ||
500 | } //for | ||
501 | |||
502 | if (retBlks.size() >= 2) { | ||
503 | addDummyExitBlock(retBlks); | ||
504 | changed = true; | ||
505 | } | ||
506 | |||
507 | return changed; | ||
508 | } //CFGStructurizer::prepare | ||
509 | |||
510 | template<class PassT> | ||
511 | bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass, | ||
512 | const AMDGPURegisterInfo * tri) { | ||
513 | passRep = &pass; | ||
514 | funcRep = &func; | ||
515 | TRI = tri; | ||
516 | |||
517 | //func.RenumberBlocks(); | ||
518 | |||
519 | //Assume reducible CFG... | ||
520 | if (DEBUGME) { | ||
521 | errs() << "AMDGPUCFGStructurizer::run\n"; | ||
522 | //errs() << func.getFunction()->getNameStr() << "\n"; | ||
523 | func.viewCFG(); | ||
524 | //func.viewCFGOnly(); | ||
525 | //func.dump(); | ||
526 | } | ||
527 | |||
528 | #if 1 | ||
529 | //FIXME: gcc complains on this. | ||
530 | //domTree = &pass.getAnalysis<DomTreeT>(); | ||
531 | domTree = CFGTraits::getDominatorTree(pass); | ||
532 | if (DEBUGME) { | ||
533 | domTree->print(errs(), (const llvm::Module*)0); | ||
534 | } | ||
535 | #endif | ||
536 | |||
537 | //FIXME: gcc complains on this. | ||
538 | //domTree = &pass.getAnalysis<DomTreeT>(); | ||
539 | postDomTree = CFGTraits::getPostDominatorTree(pass); | ||
540 | if (DEBUGME) { | ||
541 | postDomTree->print(errs()); | ||
542 | } | ||
543 | |||
544 | //FIXME: gcc complains on this. | ||
545 | //loopInfo = &pass.getAnalysis<LoopInfoT>(); | ||
546 | loopInfo = CFGTraits::getLoopInfo(pass); | ||
547 | if (DEBUGME) { | ||
548 | errs() << "LoopInfo:\n"; | ||
549 | PrintLoopinfo(*loopInfo, errs()); | ||
550 | } | ||
551 | |||
552 | orderBlocks(); | ||
553 | //#define STRESSTEST | ||
554 | #ifdef STRESSTEST | ||
555 | //Use the worse block ordering to test the algorithm. | ||
556 | ReverseVector(orderedBlks); | ||
557 | #endif | ||
558 | |||
559 | if (DEBUGME) { | ||
560 | errs() << "Ordered blocks:\n"; | ||
561 | printOrderedBlocks(errs()); | ||
562 | } | ||
563 | int numIter = 0; | ||
564 | bool finish = false; | ||
565 | BlockT *curBlk; | ||
566 | bool makeProgress = false; | ||
567 | int numRemainedBlk = countActiveBlock(orderedBlks.begin(), | ||
568 | orderedBlks.end()); | ||
569 | |||
570 | do { | ||
571 | ++numIter; | ||
572 | if (DEBUGME) { | ||
573 | errs() << "numIter = " << numIter | ||
574 | << ", numRemaintedBlk = " << numRemainedBlk << "\n"; | ||
575 | } | ||
576 | |||
577 | typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator | ||
578 | iterBlk = orderedBlks.begin(); | ||
579 | typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator | ||
580 | iterBlkEnd = orderedBlks.end(); | ||
581 | |||
582 | typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator | ||
583 | sccBeginIter = iterBlk; | ||
584 | BlockT *sccBeginBlk = NULL; | ||
585 | int sccNumBlk = 0; // The number of active blocks, init to a | ||
586 | // maximum possible number. | ||
587 | int sccNumIter; // Number of iteration in this SCC. | ||
588 | |||
589 | while (iterBlk != iterBlkEnd) { | ||
590 | curBlk = *iterBlk; | ||
591 | |||
592 | if (sccBeginBlk == NULL) { | ||
593 | sccBeginIter = iterBlk; | ||
594 | sccBeginBlk = curBlk; | ||
595 | sccNumIter = 0; | ||
596 | sccNumBlk = numRemainedBlk; // Init to maximum possible number. | ||
597 | if (DEBUGME) { | ||
598 | errs() << "start processing SCC" << getSCCNum(sccBeginBlk); | ||
599 | errs() << "\n"; | ||
600 | } | ||
601 | } | ||
602 | |||
603 | if (!isRetiredBlock(curBlk)) { | ||
604 | patternMatch(curBlk); | ||
605 | } | ||
606 | |||
607 | ++iterBlk; | ||
608 | |||
609 | bool contNextScc = true; | ||
610 | if (iterBlk == iterBlkEnd | ||
611 | || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { | ||
612 | // Just finish one scc. | ||
613 | ++sccNumIter; | ||
614 | int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); | ||
615 | if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { | ||
616 | if (DEBUGME) { | ||
617 | errs() << "Can't reduce SCC " << getSCCNum(curBlk) | ||
618 | << ", sccNumIter = " << sccNumIter; | ||
619 | errs() << "doesn't make any progress\n"; | ||
620 | } | ||
621 | contNextScc = true; | ||
622 | } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { | ||
623 | sccNumBlk = sccRemainedNumBlk; | ||
624 | iterBlk = sccBeginIter; | ||
625 | contNextScc = false; | ||
626 | if (DEBUGME) { | ||
627 | errs() << "repeat processing SCC" << getSCCNum(curBlk) | ||
628 | << "sccNumIter = " << sccNumIter << "\n"; | ||
629 | func.viewCFG(); | ||
630 | //func.viewCFGOnly(); | ||
631 | } | ||
632 | } else { | ||
633 | // Finish the current scc. | ||
634 | contNextScc = true; | ||
635 | } | ||
636 | } else { | ||
637 | // Continue on next component in the current scc. | ||
638 | contNextScc = false; | ||
639 | } | ||
640 | |||
641 | if (contNextScc) { | ||
642 | sccBeginBlk = NULL; | ||
643 | } | ||
644 | } //while, "one iteration" over the function. | ||
645 | |||
646 | BlockT *entryBlk = FuncGTraits::nodes_begin(&func); | ||
647 | if (entryBlk->succ_size() == 0) { | ||
648 | finish = true; | ||
649 | if (DEBUGME) { | ||
650 | errs() << "Reduce to one block\n"; | ||
651 | } | ||
652 | } else { | ||
653 | int newnumRemainedBlk | ||
654 | = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); | ||
655 | // consider cloned blocks ?? | ||
656 | if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { | ||
657 | makeProgress = true; | ||
658 | numRemainedBlk = newnumRemainedBlk; | ||
659 | } else { | ||
660 | makeProgress = false; | ||
661 | if (DEBUGME) { | ||
662 | errs() << "No progress\n"; | ||
663 | } | ||
664 | } | ||
665 | } | ||
666 | } while (!finish && makeProgress); | ||
667 | |||
668 | // Misc wrap up to maintain the consistency of the Function representation. | ||
669 | CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); | ||
670 | |||
671 | // Detach retired Block, release memory. | ||
672 | for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), | ||
673 | iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { | ||
674 | if ((*iterMap).second && (*iterMap).second->isRetired) { | ||
675 | assert(((*iterMap).first)->getNumber() != -1); | ||
676 | if (DEBUGME) { | ||
677 | errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; | ||
678 | } | ||
679 | (*iterMap).first->eraseFromParent(); //Remove from the parent Function. | ||
680 | } | ||
681 | delete (*iterMap).second; | ||
682 | } | ||
683 | blockInfoMap.clear(); | ||
684 | |||
685 | // clear loopLandInfoMap | ||
686 | for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), | ||
687 | iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { | ||
688 | delete (*iterMap).second; | ||
689 | } | ||
690 | loopLandInfoMap.clear(); | ||
691 | |||
692 | if (DEBUGME) { | ||
693 | func.viewCFG(); | ||
694 | //func.dump(); | ||
695 | } | ||
696 | |||
697 | if (!finish) { | ||
698 | assert(!"IRREDUCIBL_CF"); | ||
699 | } | ||
700 | |||
701 | return true; | ||
702 | } //CFGStructurizer::run | ||
703 | |||
704 | /// Print the ordered Blocks. | ||
705 | /// | ||
706 | template<class PassT> | ||
707 | void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) { | ||
708 | size_t i = 0; | ||
709 | for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator | ||
710 | iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); | ||
711 | iterBlk != iterBlkEnd; | ||
712 | ++iterBlk, ++i) { | ||
713 | os << "BB" << (*iterBlk)->getNumber(); | ||
714 | os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; | ||
715 | if (i != 0 && i % 10 == 0) { | ||
716 | os << "\n"; | ||
717 | } else { | ||
718 | os << " "; | ||
719 | } | ||
720 | } | ||
721 | } //printOrderedBlocks | ||
722 | |||
723 | /// Compute the reversed DFS post order of Blocks | ||
724 | /// | ||
725 | template<class PassT> void CFGStructurizer<PassT>::orderBlocks() { | ||
726 | int sccNum = 0; | ||
727 | BlockT *bb; | ||
728 | for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep), | ||
729 | sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { | ||
730 | std::vector<BlockT *> &sccNext = *sccIter; | ||
731 | for (typename std::vector<BlockT *>::const_iterator | ||
732 | blockIter = sccNext.begin(), blockEnd = sccNext.end(); | ||
733 | blockIter != blockEnd; ++blockIter) { | ||
734 | bb = *blockIter; | ||
735 | orderedBlks.push_back(bb); | ||
736 | recordSccnum(bb, sccNum); | ||
737 | } | ||
738 | } | ||
739 | |||
740 | //walk through all the block in func to check for unreachable | ||
741 | for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), | ||
742 | blockEnd1 = FuncGTraits::nodes_end(funcRep); | ||
743 | blockIter1 != blockEnd1; ++blockIter1) { | ||
744 | BlockT *bb = &(*blockIter1); | ||
745 | sccNum = getSCCNum(bb); | ||
746 | if (sccNum == INVALIDSCCNUM) { | ||
747 | errs() << "unreachable block BB" << bb->getNumber() << "\n"; | ||
748 | } | ||
749 | } //end of for | ||
750 | } //orderBlocks | ||
751 | |||
752 | template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) { | ||
753 | int numMatch = 0; | ||
754 | int curMatch; | ||
755 | |||
756 | if (DEBUGME) { | ||
757 | errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; | ||
758 | } | ||
759 | |||
760 | while ((curMatch = patternMatchGroup(curBlk)) > 0) { | ||
761 | numMatch += curMatch; | ||
762 | } | ||
763 | |||
764 | if (DEBUGME) { | ||
765 | errs() << "End patternMatch BB" << curBlk->getNumber() | ||
766 | << ", numMatch = " << numMatch << "\n"; | ||
767 | } | ||
768 | |||
769 | return numMatch; | ||
770 | } //patternMatch | ||
771 | |||
772 | template<class PassT> | ||
773 | int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) { | ||
774 | int numMatch = 0; | ||
775 | numMatch += serialPatternMatch(curBlk); | ||
776 | numMatch += ifPatternMatch(curBlk); | ||
777 | //numMatch += switchPatternMatch(curBlk); | ||
778 | numMatch += loopendPatternMatch(curBlk); | ||
779 | numMatch += loopPatternMatch(curBlk); | ||
780 | return numMatch; | ||
781 | }//patternMatchGroup | ||
782 | |||
783 | template<class PassT> | ||
784 | int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) { | ||
785 | if (curBlk->succ_size() != 1) { | ||
786 | return 0; | ||
787 | } | ||
788 | |||
789 | BlockT *childBlk = *curBlk->succ_begin(); | ||
790 | if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { | ||
791 | return 0; | ||
792 | } | ||
793 | |||
794 | mergeSerialBlock(curBlk, childBlk); | ||
795 | ++numSerialPatternMatch; | ||
796 | return 1; | ||
797 | } //serialPatternMatch | ||
798 | |||
799 | template<class PassT> | ||
800 | int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) { | ||
801 | //two edges | ||
802 | if (curBlk->succ_size() != 2) { | ||
803 | return 0; | ||
804 | } | ||
805 | |||
806 | if (hasBackEdge(curBlk)) { | ||
807 | return 0; | ||
808 | } | ||
809 | |||
810 | InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); | ||
811 | if (branchInstr == NULL) { | ||
812 | return 0; | ||
813 | } | ||
814 | |||
815 | assert(CFGTraits::isCondBranch(branchInstr)); | ||
816 | |||
817 | BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); | ||
818 | BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); | ||
819 | BlockT *landBlk; | ||
820 | int cloned = 0; | ||
821 | |||
822 | // TODO: Simplify | ||
823 | if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 | ||
824 | && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { | ||
825 | landBlk = *trueBlk->succ_begin(); | ||
826 | } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { | ||
827 | landBlk = NULL; | ||
828 | } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { | ||
829 | landBlk = falseBlk; | ||
830 | falseBlk = NULL; | ||
831 | } else if (falseBlk->succ_size() == 1 | ||
832 | && *falseBlk->succ_begin() == trueBlk) { | ||
833 | landBlk = trueBlk; | ||
834 | trueBlk = NULL; | ||
835 | } else if (falseBlk->succ_size() == 1 | ||
836 | && isSameloopDetachedContbreak(trueBlk, falseBlk)) { | ||
837 | landBlk = *falseBlk->succ_begin(); | ||
838 | } else if (trueBlk->succ_size() == 1 | ||
839 | && isSameloopDetachedContbreak(falseBlk, trueBlk)) { | ||
840 | landBlk = *trueBlk->succ_begin(); | ||
841 | } else { | ||
842 | return handleJumpintoIf(curBlk, trueBlk, falseBlk); | ||
843 | } | ||
844 | |||
845 | // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the | ||
846 | // new BB created for landBlk==NULL may introduce new challenge to the | ||
847 | // reduction process. | ||
848 | if (landBlk != NULL && | ||
849 | ((trueBlk && trueBlk->pred_size() > 1) | ||
850 | || (falseBlk && falseBlk->pred_size() > 1))) { | ||
851 | cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); | ||
852 | } | ||
853 | |||
854 | if (trueBlk && trueBlk->pred_size() > 1) { | ||
855 | trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); | ||
856 | ++cloned; | ||
857 | } | ||
858 | |||
859 | if (falseBlk && falseBlk->pred_size() > 1) { | ||
860 | falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); | ||
861 | ++cloned; | ||
862 | } | ||
863 | |||
864 | mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); | ||
865 | |||
866 | ++numIfPatternMatch; | ||
867 | |||
868 | numClonedBlock += cloned; | ||
869 | |||
870 | return 1 + cloned; | ||
871 | } //ifPatternMatch | ||
872 | |||
873 | template<class PassT> | ||
874 | int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) { | ||
875 | return 0; | ||
876 | } //switchPatternMatch | ||
877 | |||
878 | template<class PassT> | ||
879 | int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) { | ||
880 | LoopT *loopRep = loopInfo->getLoopFor(curBlk); | ||
881 | typename std::vector<LoopT *> nestedLoops; | ||
882 | while (loopRep) { | ||
883 | nestedLoops.push_back(loopRep); | ||
884 | loopRep = loopRep->getParentLoop(); | ||
885 | } | ||
886 | |||
887 | if (nestedLoops.size() == 0) { | ||
888 | return 0; | ||
889 | } | ||
890 | |||
891 | // Process nested loop outside->inside, so "continue" to a outside loop won't | ||
892 | // be mistaken as "break" of the current loop. | ||
893 | int num = 0; | ||
894 | for (typename std::vector<LoopT *>::reverse_iterator | ||
895 | iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); | ||
896 | iter != iterEnd; ++iter) { | ||
897 | loopRep = *iter; | ||
898 | |||
899 | if (getLoopLandBlock(loopRep) != NULL) { | ||
900 | continue; | ||
901 | } | ||
902 | |||
903 | BlockT *loopHeader = loopRep->getHeader(); | ||
904 | |||
905 | int numBreak = loopbreakPatternMatch(loopRep, loopHeader); | ||
906 | |||
907 | if (numBreak == -1) { | ||
908 | break; | ||
909 | } | ||
910 | |||
911 | int numCont = loopcontPatternMatch(loopRep, loopHeader); | ||
912 | num += numBreak + numCont; | ||
913 | } | ||
914 | |||
915 | return num; | ||
916 | } //loopendPatternMatch | ||
917 | |||
918 | template<class PassT> | ||
919 | int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) { | ||
920 | if (curBlk->succ_size() != 0) { | ||
921 | return 0; | ||
922 | } | ||
923 | |||
924 | int numLoop = 0; | ||
925 | LoopT *loopRep = loopInfo->getLoopFor(curBlk); | ||
926 | while (loopRep && loopRep->getHeader() == curBlk) { | ||
927 | LoopLandInfo *loopLand = getLoopLandInfo(loopRep); | ||
928 | if (loopLand) { | ||
929 | BlockT *landBlk = loopLand->landBlk; | ||
930 | assert(landBlk); | ||
931 | if (!isRetiredBlock(landBlk)) { | ||
932 | mergeLooplandBlock(curBlk, loopLand); | ||
933 | ++numLoop; | ||
934 | } | ||
935 | } | ||
936 | loopRep = loopRep->getParentLoop(); | ||
937 | } | ||
938 | |||
939 | numLoopPatternMatch += numLoop; | ||
940 | |||
941 | return numLoop; | ||
942 | } //loopPatternMatch | ||
943 | |||
944 | template<class PassT> | ||
945 | int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep, | ||
946 | BlockT *loopHeader) { | ||
947 | BlockTSmallerVector exitingBlks; | ||
948 | loopRep->getExitingBlocks(exitingBlks); | ||
949 | |||
950 | if (DEBUGME) { | ||
951 | errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; | ||
952 | } | ||
953 | |||
954 | if (exitingBlks.size() == 0) { | ||
955 | setLoopLandBlock(loopRep); | ||
956 | return 0; | ||
957 | } | ||
958 | |||
959 | // Compute the corresponding exitBlks and exit block set. | ||
960 | BlockTSmallerVector exitBlks; | ||
961 | std::set<BlockT *> exitBlkSet; | ||
962 | for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), | ||
963 | iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { | ||
964 | BlockT *exitingBlk = *iter; | ||
965 | BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); | ||
966 | exitBlks.push_back(exitBlk); | ||
967 | exitBlkSet.insert(exitBlk); //non-duplicate insert | ||
968 | } | ||
969 | |||
970 | assert(exitBlkSet.size() > 0); | ||
971 | assert(exitBlks.size() == exitingBlks.size()); | ||
972 | |||
973 | if (DEBUGME) { | ||
974 | errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; | ||
975 | } | ||
976 | |||
977 | // Find exitLandBlk. | ||
978 | BlockT *exitLandBlk = NULL; | ||
979 | int numCloned = 0; | ||
980 | int numSerial = 0; | ||
981 | |||
982 | if (exitBlkSet.size() == 1) | ||
983 | { | ||
984 | exitLandBlk = *exitBlkSet.begin(); | ||
985 | } else { | ||
986 | exitLandBlk = findNearestCommonPostDom(exitBlkSet); | ||
987 | |||
988 | if (exitLandBlk == NULL) { | ||
989 | return -1; | ||
990 | } | ||
991 | |||
992 | bool allInPath = true; | ||
993 | bool allNotInPath = true; | ||
994 | for (typename std::set<BlockT*>::const_iterator | ||
995 | iter = exitBlkSet.begin(), | ||
996 | iterEnd = exitBlkSet.end(); | ||
997 | iter != iterEnd; ++iter) { | ||
998 | BlockT *exitBlk = *iter; | ||
999 | |||
1000 | PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); | ||
1001 | if (DEBUGME) { | ||
1002 | errs() << "BB" << exitBlk->getNumber() | ||
1003 | << " to BB" << exitLandBlk->getNumber() << " PathToKind=" | ||
1004 | << pathKind << "\n"; | ||
1005 | } | ||
1006 | |||
1007 | allInPath = allInPath && (pathKind == SinglePath_InPath); | ||
1008 | allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); | ||
1009 | |||
1010 | if (!allInPath && !allNotInPath) { | ||
1011 | if (DEBUGME) { | ||
1012 | errs() << "singlePath check fail\n"; | ||
1013 | } | ||
1014 | return -1; | ||
1015 | } | ||
1016 | } // check all exit blocks | ||
1017 | |||
1018 | if (allNotInPath) { | ||
1019 | #if 1 | ||
1020 | |||
1021 | // TODO: Simplify, maybe separate function? | ||
1022 | //funcRep->viewCFG(); | ||
1023 | LoopT *parentLoopRep = loopRep->getParentLoop(); | ||
1024 | BlockT *parentLoopHeader = NULL; | ||
1025 | if (parentLoopRep) | ||
1026 | parentLoopHeader = parentLoopRep->getHeader(); | ||
1027 | |||
1028 | if (exitLandBlk == parentLoopHeader && | ||
1029 | (exitLandBlk = relocateLoopcontBlock(parentLoopRep, | ||
1030 | loopRep, | ||
1031 | exitBlkSet, | ||
1032 | exitLandBlk)) != NULL) { | ||
1033 | if (DEBUGME) { | ||
1034 | errs() << "relocateLoopcontBlock success\n"; | ||
1035 | } | ||
1036 | } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, | ||
1037 | exitingBlks, | ||
1038 | exitBlks)) != NULL) { | ||
1039 | if (DEBUGME) { | ||
1040 | errs() << "insertEndbranchBlock success\n"; | ||
1041 | } | ||
1042 | } else { | ||
1043 | if (DEBUGME) { | ||
1044 | errs() << "loop exit fail\n"; | ||
1045 | } | ||
1046 | return -1; | ||
1047 | } | ||
1048 | #else | ||
1049 | return -1; | ||
1050 | #endif | ||
1051 | } | ||
1052 | |||
1053 | // Handle side entry to exit path. | ||
1054 | exitBlks.clear(); | ||
1055 | exitBlkSet.clear(); | ||
1056 | for (typename BlockTSmallerVector::iterator iterExiting = | ||
1057 | exitingBlks.begin(), | ||
1058 | iterExitingEnd = exitingBlks.end(); | ||
1059 | iterExiting != iterExitingEnd; ++iterExiting) { | ||
1060 | BlockT *exitingBlk = *iterExiting; | ||
1061 | BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); | ||
1062 | BlockT *newExitBlk = exitBlk; | ||
1063 | |||
1064 | if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { | ||
1065 | newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); | ||
1066 | ++numCloned; | ||
1067 | } | ||
1068 | |||
1069 | numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); | ||
1070 | |||
1071 | exitBlks.push_back(newExitBlk); | ||
1072 | exitBlkSet.insert(newExitBlk); | ||
1073 | } | ||
1074 | |||
1075 | for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), | ||
1076 | iterExitEnd = exitBlks.end(); | ||
1077 | iterExit != iterExitEnd; ++iterExit) { | ||
1078 | BlockT *exitBlk = *iterExit; | ||
1079 | numSerial += serialPatternMatch(exitBlk); | ||
1080 | } | ||
1081 | |||
1082 | for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), | ||
1083 | iterExitEnd = exitBlks.end(); | ||
1084 | iterExit != iterExitEnd; ++iterExit) { | ||
1085 | BlockT *exitBlk = *iterExit; | ||
1086 | if (exitBlk->pred_size() > 1) { | ||
1087 | if (exitBlk != exitLandBlk) { | ||
1088 | return -1; | ||
1089 | } | ||
1090 | } else { | ||
1091 | if (exitBlk != exitLandBlk && | ||
1092 | (exitBlk->succ_size() != 1 || | ||
1093 | *exitBlk->succ_begin() != exitLandBlk)) { | ||
1094 | return -1; | ||
1095 | } | ||
1096 | } | ||
1097 | } | ||
1098 | } // else | ||
1099 | |||
1100 | // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk); | ||
1101 | exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); | ||
1102 | |||
1103 | // Fold break into the breaking block. Leverage across level breaks. | ||
1104 | assert(exitingBlks.size() == exitBlks.size()); | ||
1105 | for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), | ||
1106 | iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); | ||
1107 | iterExit != iterExitEnd; ++iterExit, ++iterExiting) { | ||
1108 | BlockT *exitBlk = *iterExit; | ||
1109 | BlockT *exitingBlk = *iterExiting; | ||
1110 | assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); | ||
1111 | LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); | ||
1112 | handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); | ||
1113 | } | ||
1114 | |||
1115 | int numBreak = static_cast<int>(exitingBlks.size()); | ||
1116 | numLoopbreakPatternMatch += numBreak; | ||
1117 | numClonedBlock += numCloned; | ||
1118 | return numBreak + numSerial + numCloned; | ||
1119 | } //loopbreakPatternMatch | ||
1120 | |||
1121 | template<class PassT> | ||
1122 | int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep, | ||
1123 | BlockT *loopHeader) { | ||
1124 | int numCont = 0; | ||
1125 | SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk; | ||
1126 | for (typename InvBlockGTraits::ChildIteratorType iter = | ||
1127 | InvBlockGTraits::child_begin(loopHeader), | ||
1128 | iterEnd = InvBlockGTraits::child_end(loopHeader); | ||
1129 | iter != iterEnd; ++iter) { | ||
1130 | BlockT *curBlk = *iter; | ||
1131 | if (loopRep->contains(curBlk)) { | ||
1132 | handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), | ||
1133 | loopHeader, loopRep); | ||
1134 | contBlk.push_back(curBlk); | ||
1135 | ++numCont; | ||
1136 | } | ||
1137 | } | ||
1138 | |||
1139 | for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator | ||
1140 | iter = contBlk.begin(), iterEnd = contBlk.end(); | ||
1141 | iter != iterEnd; ++iter) { | ||
1142 | (*iter)->removeSuccessor(loopHeader); | ||
1143 | } | ||
1144 | |||
1145 | numLoopcontPatternMatch += numCont; | ||
1146 | |||
1147 | return numCont; | ||
1148 | } //loopcontPatternMatch | ||
1149 | |||
1150 | |||
1151 | template<class PassT> | ||
1152 | bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk, | ||
1153 | BlockT *src2Blk) { | ||
1154 | // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the | ||
1155 | // same loop with LoopLandInfo without explicitly keeping track of | ||
1156 | // loopContBlks and loopBreakBlks, this is a method to get the information. | ||
1157 | // | ||
1158 | if (src1Blk->succ_size() == 0) { | ||
1159 | LoopT *loopRep = loopInfo->getLoopFor(src1Blk); | ||
1160 | if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { | ||
1161 | LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; | ||
1162 | if (theEntry != NULL) { | ||
1163 | if (DEBUGME) { | ||
1164 | errs() << "isLoopContBreakBlock yes src1 = BB" | ||
1165 | << src1Blk->getNumber() | ||
1166 | << " src2 = BB" << src2Blk->getNumber() << "\n"; | ||
1167 | } | ||
1168 | return true; | ||
1169 | } | ||
1170 | } | ||
1171 | } | ||
1172 | return false; | ||
1173 | } //isSameloopDetachedContbreak | ||
1174 | |||
1175 | template<class PassT> | ||
1176 | int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk, | ||
1177 | BlockT *trueBlk, | ||
1178 | BlockT *falseBlk) { | ||
1179 | int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); | ||
1180 | if (num == 0) { | ||
1181 | if (DEBUGME) { | ||
1182 | errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; | ||
1183 | } | ||
1184 | num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); | ||
1185 | } | ||
1186 | return num; | ||
1187 | } | ||
1188 | |||
1189 | template<class PassT> | ||
1190 | int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk, | ||
1191 | BlockT *trueBlk, | ||
1192 | BlockT *falseBlk) { | ||
1193 | int num = 0; | ||
1194 | BlockT *downBlk; | ||
1195 | |||
1196 | //trueBlk could be the common post dominator | ||
1197 | downBlk = trueBlk; | ||
1198 | |||
1199 | if (DEBUGME) { | ||
1200 | errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() | ||
1201 | << " true = BB" << trueBlk->getNumber() | ||
1202 | << ", numSucc=" << trueBlk->succ_size() | ||
1203 | << " false = BB" << falseBlk->getNumber() << "\n"; | ||
1204 | } | ||
1205 | |||
1206 | while (downBlk) { | ||
1207 | if (DEBUGME) { | ||
1208 | errs() << "check down = BB" << downBlk->getNumber(); | ||
1209 | } | ||
1210 | |||
1211 | if (//postDomTree->dominates(downBlk, falseBlk) && | ||
1212 | singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { | ||
1213 | if (DEBUGME) { | ||
1214 | errs() << " working\n"; | ||
1215 | } | ||
1216 | |||
1217 | num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); | ||
1218 | num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); | ||
1219 | |||
1220 | numClonedBlock += num; | ||
1221 | num += serialPatternMatch(*headBlk->succ_begin()); | ||
1222 | num += serialPatternMatch(*(++headBlk->succ_begin())); | ||
1223 | num += ifPatternMatch(headBlk); | ||
1224 | assert(num > 0); // | ||
1225 | |||
1226 | break; | ||
1227 | } | ||
1228 | if (DEBUGME) { | ||
1229 | errs() << " not working\n"; | ||
1230 | } | ||
1231 | downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; | ||
1232 | } // walk down the postDomTree | ||
1233 | |||
1234 | return num; | ||
1235 | } //handleJumpintoIf | ||
1236 | |||
1237 | template<class PassT> | ||
1238 | void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk, | ||
1239 | BlockT *trueBlk, | ||
1240 | BlockT *falseBlk, | ||
1241 | BlockT *landBlk, | ||
1242 | bool detail) { | ||
1243 | errs() << "head = BB" << headBlk->getNumber() | ||
1244 | << " size = " << headBlk->size(); | ||
1245 | if (detail) { | ||
1246 | errs() << "\n"; | ||
1247 | headBlk->print(errs()); | ||
1248 | errs() << "\n"; | ||
1249 | } | ||
1250 | |||
1251 | if (trueBlk) { | ||
1252 | errs() << ", true = BB" << trueBlk->getNumber() << " size = " | ||
1253 | << trueBlk->size() << " numPred = " << trueBlk->pred_size(); | ||
1254 | if (detail) { | ||
1255 | errs() << "\n"; | ||
1256 | trueBlk->print(errs()); | ||
1257 | errs() << "\n"; | ||
1258 | } | ||
1259 | } | ||
1260 | if (falseBlk) { | ||
1261 | errs() << ", false = BB" << falseBlk->getNumber() << " size = " | ||
1262 | << falseBlk->size() << " numPred = " << falseBlk->pred_size(); | ||
1263 | if (detail) { | ||
1264 | errs() << "\n"; | ||
1265 | falseBlk->print(errs()); | ||
1266 | errs() << "\n"; | ||
1267 | } | ||
1268 | } | ||
1269 | if (landBlk) { | ||
1270 | errs() << ", land = BB" << landBlk->getNumber() << " size = " | ||
1271 | << landBlk->size() << " numPred = " << landBlk->pred_size(); | ||
1272 | if (detail) { | ||
1273 | errs() << "\n"; | ||
1274 | landBlk->print(errs()); | ||
1275 | errs() << "\n"; | ||
1276 | } | ||
1277 | } | ||
1278 | |||
1279 | errs() << "\n"; | ||
1280 | } //showImproveSimpleJumpintoIf | ||
1281 | |||
1282 | template<class PassT> | ||
1283 | int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk, | ||
1284 | BlockT *trueBlk, | ||
1285 | BlockT *falseBlk, | ||
1286 | BlockT **plandBlk) { | ||
1287 | bool migrateTrue = false; | ||
1288 | bool migrateFalse = false; | ||
1289 | |||
1290 | BlockT *landBlk = *plandBlk; | ||
1291 | |||
1292 | assert((trueBlk == NULL || trueBlk->succ_size() <= 1) | ||
1293 | && (falseBlk == NULL || falseBlk->succ_size() <= 1)); | ||
1294 | |||
1295 | if (trueBlk == falseBlk) { | ||
1296 | return 0; | ||
1297 | } | ||
1298 | |||
1299 | #if 0 | ||
1300 | if (DEBUGME) { | ||
1301 | errs() << "improveSimpleJumpintoIf: "; | ||
1302 | showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); | ||
1303 | } | ||
1304 | #endif | ||
1305 | |||
1306 | // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0; | ||
1307 | // May consider the # landBlk->pred_size() as it represents the number of | ||
1308 | // assignment initReg = .. needed to insert. | ||
1309 | migrateTrue = needMigrateBlock(trueBlk); | ||
1310 | migrateFalse = needMigrateBlock(falseBlk); | ||
1311 | |||
1312 | if (!migrateTrue && !migrateFalse) { | ||
1313 | return 0; | ||
1314 | } | ||
1315 | |||
1316 | // If we need to migrate either trueBlk and falseBlk, migrate the rest that | ||
1317 | // have more than one predecessors. without doing this, its predecessor | ||
1318 | // rather than headBlk will have undefined value in initReg. | ||
1319 | if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { | ||
1320 | migrateTrue = true; | ||
1321 | } | ||
1322 | if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { | ||
1323 | migrateFalse = true; | ||
1324 | } | ||
1325 | |||
1326 | if (DEBUGME) { | ||
1327 | errs() << "before improveSimpleJumpintoIf: "; | ||
1328 | showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); | ||
1329 | //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); | ||
1330 | } | ||
1331 | |||
1332 | // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk | ||
1333 | // | ||
1334 | // new: headBlk => if () {initReg = 1; org trueBlk branch} else | ||
1335 | // {initReg = 0; org falseBlk branch } | ||
1336 | // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} | ||
1337 | // => org landBlk | ||
1338 | // if landBlk->pred_size() > 2, put the about if-else inside | ||
1339 | // if (initReg !=2) {...} | ||
1340 | // | ||
1341 | // add initReg = initVal to headBlk | ||
1342 | |||
1343 | const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); | ||
1344 | unsigned initReg = | ||
1345 | funcRep->getRegInfo().createVirtualRegister(I32RC); | ||
1346 | if (!migrateTrue || !migrateFalse) { | ||
1347 | int initVal = migrateTrue ? 0 : 1; | ||
1348 | CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); | ||
1349 | } | ||
1350 | |||
1351 | int numNewBlk = 0; | ||
1352 | |||
1353 | if (landBlk == NULL) { | ||
1354 | landBlk = funcRep->CreateMachineBasicBlock(); | ||
1355 | funcRep->push_back(landBlk); //insert to function | ||
1356 | |||
1357 | if (trueBlk) { | ||
1358 | trueBlk->addSuccessor(landBlk); | ||
1359 | } else { | ||
1360 | headBlk->addSuccessor(landBlk); | ||
1361 | } | ||
1362 | |||
1363 | if (falseBlk) { | ||
1364 | falseBlk->addSuccessor(landBlk); | ||
1365 | } else { | ||
1366 | headBlk->addSuccessor(landBlk); | ||
1367 | } | ||
1368 | |||
1369 | numNewBlk ++; | ||
1370 | } | ||
1371 | |||
1372 | bool landBlkHasOtherPred = (landBlk->pred_size() > 2); | ||
1373 | |||
1374 | //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" | ||
1375 | typename BlockT::iterator insertPos = | ||
1376 | CFGTraits::getInstrPos | ||
1377 | (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep)); | ||
1378 | |||
1379 | if (landBlkHasOtherPred) { | ||
1380 | unsigned immReg = | ||
1381 | funcRep->getRegInfo().createVirtualRegister(I32RC); | ||
1382 | CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); | ||
1383 | unsigned cmpResReg = | ||
1384 | funcRep->getRegInfo().createVirtualRegister(I32RC); | ||
1385 | |||
1386 | CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, | ||
1387 | initReg, immReg); | ||
1388 | CFGTraits::insertCondBranchBefore(landBlk, insertPos, | ||
1389 | AMDGPU::IF_LOGICALZ_i32, passRep, | ||
1390 | cmpResReg, DebugLoc()); | ||
1391 | } | ||
1392 | |||
1393 | CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32, | ||
1394 | passRep, initReg, DebugLoc()); | ||
1395 | |||
1396 | if (migrateTrue) { | ||
1397 | migrateInstruction(trueBlk, landBlk, insertPos); | ||
1398 | // need to uncondionally insert the assignment to ensure a path from its | ||
1399 | // predecessor rather than headBlk has valid value in initReg if | ||
1400 | // (initVal != 1). | ||
1401 | CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); | ||
1402 | } | ||
1403 | CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep); | ||
1404 | |||
1405 | if (migrateFalse) { | ||
1406 | migrateInstruction(falseBlk, landBlk, insertPos); | ||
1407 | // need to uncondionally insert the assignment to ensure a path from its | ||
1408 | // predecessor rather than headBlk has valid value in initReg if | ||
1409 | // (initVal != 0) | ||
1410 | CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); | ||
1411 | } | ||
1412 | //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); | ||
1413 | |||
1414 | if (landBlkHasOtherPred) { | ||
1415 | // add endif | ||
1416 | CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); | ||
1417 | |||
1418 | // put initReg = 2 to other predecessors of landBlk | ||
1419 | for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), | ||
1420 | predIterEnd = landBlk->pred_end(); predIter != predIterEnd; | ||
1421 | ++predIter) { | ||
1422 | BlockT *curBlk = *predIter; | ||
1423 | if (curBlk != trueBlk && curBlk != falseBlk) { | ||
1424 | CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); | ||
1425 | } | ||
1426 | } //for | ||
1427 | } | ||
1428 | if (DEBUGME) { | ||
1429 | errs() << "result from improveSimpleJumpintoIf: "; | ||
1430 | showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); | ||
1431 | //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); | ||
1432 | } | ||
1433 | |||
1434 | // update landBlk | ||
1435 | *plandBlk = landBlk; | ||
1436 | |||
1437 | return numNewBlk; | ||
1438 | } //improveSimpleJumpintoIf | ||
1439 | |||
1440 | template<class PassT> | ||
1441 | void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk, | ||
1442 | LoopT *exitingLoop, | ||
1443 | BlockT *exitBlk, | ||
1444 | LoopT *exitLoop, | ||
1445 | BlockT *landBlk) { | ||
1446 | if (DEBUGME) { | ||
1447 | errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) | ||
1448 | << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; | ||
1449 | } | ||
1450 | const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); | ||
1451 | |||
1452 | RegiT initReg = INVALIDREGNUM; | ||
1453 | if (exitingLoop != exitLoop) { | ||
1454 | initReg = static_cast<int> | ||
1455 | (funcRep->getRegInfo().createVirtualRegister(I32RC)); | ||
1456 | assert(initReg != INVALIDREGNUM); | ||
1457 | addLoopBreakInitReg(exitLoop, initReg); | ||
1458 | while (exitingLoop != exitLoop && exitingLoop) { | ||
1459 | addLoopBreakOnReg(exitingLoop, initReg); | ||
1460 | exitingLoop = exitingLoop->getParentLoop(); | ||
1461 | } | ||
1462 | assert(exitingLoop == exitLoop); | ||
1463 | } | ||
1464 | |||
1465 | mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); | ||
1466 | |||
1467 | } //handleLoopbreak | ||
1468 | |||
1469 | template<class PassT> | ||
1470 | void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk, | ||
1471 | LoopT *contingLoop, | ||
1472 | BlockT *contBlk, | ||
1473 | LoopT *contLoop) { | ||
1474 | if (DEBUGME) { | ||
1475 | errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() | ||
1476 | << " header = BB" << contBlk->getNumber() << "\n"; | ||
1477 | |||
1478 | errs() << "Trying to continue loop-depth = " | ||
1479 | << getLoopDepth(contLoop) | ||
1480 | << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; | ||
1481 | } | ||
1482 | |||
1483 | RegiT initReg = INVALIDREGNUM; | ||
1484 | const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); | ||
1485 | if (contingLoop != contLoop) { | ||
1486 | initReg = static_cast<int> | ||
1487 | (funcRep->getRegInfo().createVirtualRegister(I32RC)); | ||
1488 | assert(initReg != INVALIDREGNUM); | ||
1489 | addLoopContInitReg(contLoop, initReg); | ||
1490 | while (contingLoop && contingLoop->getParentLoop() != contLoop) { | ||
1491 | addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg | ||
1492 | contingLoop = contingLoop->getParentLoop(); | ||
1493 | } | ||
1494 | assert(contingLoop && contingLoop->getParentLoop() == contLoop); | ||
1495 | addLoopContOnReg(contingLoop, initReg); | ||
1496 | } | ||
1497 | |||
1498 | settleLoopcontBlock(contingBlk, contBlk, initReg); | ||
1499 | //contingBlk->removeSuccessor(loopHeader); | ||
1500 | } //handleLoopcontBlock | ||
1501 | |||
1502 | template<class PassT> | ||
1503 | void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { | ||
1504 | if (DEBUGME) { | ||
1505 | errs() << "serialPattern BB" << dstBlk->getNumber() | ||
1506 | << " <= BB" << srcBlk->getNumber() << "\n"; | ||
1507 | } | ||
1508 | //removeUnconditionalBranch(dstBlk); | ||
1509 | dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end()); | ||
1510 | |||
1511 | dstBlk->removeSuccessor(srcBlk); | ||
1512 | CFGTraits::cloneSuccessorList(dstBlk, srcBlk); | ||
1513 | |||
1514 | removeSuccessor(srcBlk); | ||
1515 | retireBlock(dstBlk, srcBlk); | ||
1516 | } //mergeSerialBlock | ||
1517 | |||
1518 | template<class PassT> | ||
1519 | void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr, | ||
1520 | BlockT *curBlk, | ||
1521 | BlockT *trueBlk, | ||
1522 | BlockT *falseBlk, | ||
1523 | BlockT *landBlk) { | ||
1524 | if (DEBUGME) { | ||
1525 | errs() << "ifPattern BB" << curBlk->getNumber(); | ||
1526 | errs() << "{ "; | ||
1527 | if (trueBlk) { | ||
1528 | errs() << "BB" << trueBlk->getNumber(); | ||
1529 | } | ||
1530 | errs() << " } else "; | ||
1531 | errs() << "{ "; | ||
1532 | if (falseBlk) { | ||
1533 | errs() << "BB" << falseBlk->getNumber(); | ||
1534 | } | ||
1535 | errs() << " }\n "; | ||
1536 | errs() << "landBlock: "; | ||
1537 | if (landBlk == NULL) { | ||
1538 | errs() << "NULL"; | ||
1539 | } else { | ||
1540 | errs() << "BB" << landBlk->getNumber(); | ||
1541 | } | ||
1542 | errs() << "\n"; | ||
1543 | } | ||
1544 | |||
1545 | int oldOpcode = branchInstr->getOpcode(); | ||
1546 | DebugLoc branchDL = branchInstr->getDebugLoc(); | ||
1547 | |||
1548 | // transform to | ||
1549 | // if cond | ||
1550 | // trueBlk | ||
1551 | // else | ||
1552 | // falseBlk | ||
1553 | // endif | ||
1554 | // landBlk | ||
1555 | |||
1556 | typename BlockT::iterator branchInstrPos = | ||
1557 | CFGTraits::getInstrPos(curBlk, branchInstr); | ||
1558 | CFGTraits::insertCondBranchBefore(branchInstrPos, | ||
1559 | CFGTraits::getBranchNzeroOpcode(oldOpcode), | ||
1560 | passRep, | ||
1561 | branchDL); | ||
1562 | |||
1563 | if (trueBlk) { | ||
1564 | curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end()); | ||
1565 | curBlk->removeSuccessor(trueBlk); | ||
1566 | if (landBlk && trueBlk->succ_size()!=0) { | ||
1567 | trueBlk->removeSuccessor(landBlk); | ||
1568 | } | ||
1569 | retireBlock(curBlk, trueBlk); | ||
1570 | } | ||
1571 | CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep); | ||
1572 | |||
1573 | if (falseBlk) { | ||
1574 | curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk), | ||
1575 | falseBlk->end()); | ||
1576 | curBlk->removeSuccessor(falseBlk); | ||
1577 | if (landBlk && falseBlk->succ_size() != 0) { | ||
1578 | falseBlk->removeSuccessor(landBlk); | ||
1579 | } | ||
1580 | retireBlock(curBlk, falseBlk); | ||
1581 | } | ||
1582 | CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); | ||
1583 | |||
1584 | //curBlk->remove(branchInstrPos); | ||
1585 | branchInstr->eraseFromParent(); | ||
1586 | |||
1587 | if (landBlk && trueBlk && falseBlk) { | ||
1588 | curBlk->addSuccessor(landBlk); | ||
1589 | } | ||
1590 | |||
1591 | } //mergeIfthenelseBlock | ||
1592 | |||
1593 | template<class PassT> | ||
1594 | void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk, | ||
1595 | LoopLandInfo *loopLand) { | ||
1596 | BlockT *landBlk = loopLand->landBlk; | ||
1597 | |||
1598 | if (DEBUGME) { | ||
1599 | errs() << "loopPattern header = BB" << dstBlk->getNumber() | ||
1600 | << " land = BB" << landBlk->getNumber() << "\n"; | ||
1601 | } | ||
1602 | |||
1603 | // Loop contInitRegs are init at the beginning of the loop. | ||
1604 | for (typename std::set<RegiT>::const_iterator iter = | ||
1605 | loopLand->contInitRegs.begin(), | ||
1606 | iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { | ||
1607 | CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); | ||
1608 | } | ||
1609 | |||
1610 | /* we last inserterd the DebugLoc in the | ||
1611 | * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk. | ||
1612 | * search for the DebugLoc in the that statement. | ||
1613 | * if not found, we have to insert the empty/default DebugLoc */ | ||
1614 | InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); | ||
1615 | DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); | ||
1616 | |||
1617 | CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak); | ||
1618 | // Loop breakInitRegs are init before entering the loop. | ||
1619 | for (typename std::set<RegiT>::const_iterator iter = | ||
1620 | loopLand->breakInitRegs.begin(), | ||
1621 | iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) | ||
1622 | { | ||
1623 | CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); | ||
1624 | } | ||
1625 | // Loop endbranchInitRegs are init before entering the loop. | ||
1626 | for (typename std::set<RegiT>::const_iterator iter = | ||
1627 | loopLand->endbranchInitRegs.begin(), | ||
1628 | iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { | ||
1629 | CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); | ||
1630 | } | ||
1631 | |||
1632 | /* we last inserterd the DebugLoc in the continue statement in the current dstBlk | ||
1633 | * search for the DebugLoc in the continue statement. | ||
1634 | * if not found, we have to insert the empty/default DebugLoc */ | ||
1635 | InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); | ||
1636 | DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); | ||
1637 | |||
1638 | CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue); | ||
1639 | // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this | ||
1640 | // loop. | ||
1641 | for (typename std::set<RegiT>::const_iterator iter = | ||
1642 | loopLand->breakOnRegs.begin(), | ||
1643 | iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { | ||
1644 | CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep, | ||
1645 | *iter); | ||
1646 | } | ||
1647 | |||
1648 | // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this | ||
1649 | // loop. | ||
1650 | for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(), | ||
1651 | iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { | ||
1652 | CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32, | ||
1653 | passRep, *iter); | ||
1654 | } | ||
1655 | |||
1656 | dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); | ||
1657 | |||
1658 | for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), | ||
1659 | iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { | ||
1660 | dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. | ||
1661 | } | ||
1662 | |||
1663 | removeSuccessor(landBlk); | ||
1664 | retireBlock(dstBlk, landBlk); | ||
1665 | } //mergeLooplandBlock | ||
1666 | |||
1667 | template<class PassT> | ||
1668 | void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) | ||
1669 | { | ||
1670 | while (I--) { | ||
1671 | if (I->getOpcode() == AMDGPU::PRED_X) { | ||
1672 | switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) { | ||
1673 | case OPCODE_IS_ZERO_INT: | ||
1674 | static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT); | ||
1675 | return; | ||
1676 | case OPCODE_IS_NOT_ZERO_INT: | ||
1677 | static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT); | ||
1678 | return; | ||
1679 | case OPCODE_IS_ZERO: | ||
1680 | static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO); | ||
1681 | return; | ||
1682 | case OPCODE_IS_NOT_ZERO: | ||
1683 | static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO); | ||
1684 | return; | ||
1685 | default: | ||
1686 | assert(0 && "PRED_X Opcode invalid!"); | ||
1687 | } | ||
1688 | } | ||
1689 | } | ||
1690 | } | ||
1691 | |||
1692 | template<class PassT> | ||
1693 | void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk, | ||
1694 | BlockT *exitBlk, | ||
1695 | BlockT *exitLandBlk, | ||
1696 | RegiT setReg) { | ||
1697 | if (DEBUGME) { | ||
1698 | errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() | ||
1699 | << " exit = BB" << exitBlk->getNumber() | ||
1700 | << " land = BB" << exitLandBlk->getNumber() << "\n"; | ||
1701 | } | ||
1702 | |||
1703 | InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); | ||
1704 | assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); | ||
1705 | |||
1706 | DebugLoc DL = branchInstr->getDebugLoc(); | ||
1707 | |||
1708 | BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); | ||
1709 | int oldOpcode = branchInstr->getOpcode(); | ||
1710 | |||
1711 | // transform exitingBlk to | ||
1712 | // if ( ) { | ||
1713 | // exitBlk (if exitBlk != exitLandBlk) | ||
1714 | // setReg = 1 | ||
1715 | // break | ||
1716 | // }endif | ||
1717 | // successor = {orgSuccessor(exitingBlk) - exitBlk} | ||
1718 | |||
1719 | typename BlockT::iterator branchInstrPos = | ||
1720 | CFGTraits::getInstrPos(exitingBlk, branchInstr); | ||
1721 | |||
1722 | if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { | ||
1723 | //break_logical | ||
1724 | |||
1725 | if (trueBranch != exitBlk) { | ||
1726 | reversePredicateSetter(branchInstrPos); | ||
1727 | } | ||
1728 | int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode); | ||
1729 | CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); | ||
1730 | } else { | ||
1731 | if (trueBranch != exitBlk) { | ||
1732 | reversePredicateSetter(branchInstr); | ||
1733 | } | ||
1734 | int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode); | ||
1735 | CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); | ||
1736 | if (exitBlk != exitLandBlk) { | ||
1737 | //splice is insert-before ... | ||
1738 | exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), | ||
1739 | exitBlk->end()); | ||
1740 | } | ||
1741 | if (setReg != INVALIDREGNUM) { | ||
1742 | CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); | ||
1743 | } | ||
1744 | CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep); | ||
1745 | CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); | ||
1746 | } //if_logical | ||
1747 | |||
1748 | //now branchInst can be erase safely | ||
1749 | //exitingBlk->eraseFromParent(branchInstr); | ||
1750 | branchInstr->eraseFromParent(); | ||
1751 | |||
1752 | //now take care of successors, retire blocks | ||
1753 | exitingBlk->removeSuccessor(exitBlk); | ||
1754 | if (exitBlk != exitLandBlk) { | ||
1755 | //splice is insert-before ... | ||
1756 | exitBlk->removeSuccessor(exitLandBlk); | ||
1757 | retireBlock(exitingBlk, exitBlk); | ||
1758 | } | ||
1759 | |||
1760 | } //mergeLoopbreakBlock | ||
1761 | |||
1762 | template<class PassT> | ||
1763 | void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk, | ||
1764 | BlockT *contBlk, | ||
1765 | RegiT setReg) { | ||
1766 | if (DEBUGME) { | ||
1767 | errs() << "settleLoopcontBlock conting = BB" | ||
1768 | << contingBlk->getNumber() | ||
1769 | << ", cont = BB" << contBlk->getNumber() << "\n"; | ||
1770 | } | ||
1771 | |||
1772 | InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); | ||
1773 | if (branchInstr) { | ||
1774 | assert(CFGTraits::isCondBranch(branchInstr)); | ||
1775 | typename BlockT::iterator branchInstrPos = | ||
1776 | CFGTraits::getInstrPos(contingBlk, branchInstr); | ||
1777 | BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); | ||
1778 | int oldOpcode = branchInstr->getOpcode(); | ||
1779 | DebugLoc DL = branchInstr->getDebugLoc(); | ||
1780 | |||
1781 | // transform contingBlk to | ||
1782 | // if () { | ||
1783 | // move instr after branchInstr | ||
1784 | // continue | ||
1785 | // or | ||
1786 | // setReg = 1 | ||
1787 | // break | ||
1788 | // }endif | ||
1789 | // successor = {orgSuccessor(contingBlk) - loopHeader} | ||
1790 | |||
1791 | bool useContinueLogical = | ||
1792 | (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); | ||
1793 | |||
1794 | if (useContinueLogical == false) | ||
1795 | { | ||
1796 | int branchOpcode = | ||
1797 | trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) | ||
1798 | : CFGTraits::getBranchZeroOpcode(oldOpcode); | ||
1799 | |||
1800 | CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); | ||
1801 | |||
1802 | if (setReg != INVALIDREGNUM) { | ||
1803 | CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); | ||
1804 | // insertEnd to ensure phi-moves, if exist, go before the continue-instr. | ||
1805 | CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL); | ||
1806 | } else { | ||
1807 | // insertEnd to ensure phi-moves, if exist, go before the continue-instr. | ||
1808 | CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL); | ||
1809 | } | ||
1810 | |||
1811 | CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL); | ||
1812 | } else { | ||
1813 | int branchOpcode = | ||
1814 | trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) | ||
1815 | : CFGTraits::getContinueZeroOpcode(oldOpcode); | ||
1816 | |||
1817 | CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); | ||
1818 | } | ||
1819 | |||
1820 | //contingBlk->eraseFromParent(branchInstr); | ||
1821 | branchInstr->eraseFromParent(); | ||
1822 | } else { | ||
1823 | /* if we've arrived here then we've already erased the branch instruction | ||
1824 | * travel back up the basic block to see the last reference of our debug location | ||
1825 | * we've just inserted that reference here so it should be representative */ | ||
1826 | if (setReg != INVALIDREGNUM) { | ||
1827 | CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); | ||
1828 | // insertEnd to ensure phi-moves, if exist, go before the continue-instr. | ||
1829 | CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); | ||
1830 | } else { | ||
1831 | // insertEnd to ensure phi-moves, if exist, go before the continue-instr. | ||
1832 | CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); | ||
1833 | } | ||
1834 | } //else | ||
1835 | |||
1836 | } //settleLoopcontBlock | ||
1837 | |||
1838 | // BBs in exitBlkSet are determined as in break-path for loopRep, | ||
1839 | // before we can put code for BBs as inside loop-body for loopRep | ||
1840 | // check whether those BBs are determined as cont-BB for parentLoopRep | ||
1841 | // earlier. | ||
1842 | // If so, generate a new BB newBlk | ||
1843 | // (1) set newBlk common successor of BBs in exitBlkSet | ||
1844 | // (2) change the continue-instr in BBs in exitBlkSet to break-instr | ||
1845 | // (3) generate continue-instr in newBlk | ||
1846 | // | ||
1847 | template<class PassT> | ||
1848 | typename CFGStructurizer<PassT>::BlockT * | ||
1849 | CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep, | ||
1850 | LoopT *loopRep, | ||
1851 | std::set<BlockT *> &exitBlkSet, | ||
1852 | BlockT *exitLandBlk) { | ||
1853 | std::set<BlockT *> endBlkSet; | ||
1854 | |||
1855 | // BlockT *parentLoopHead = parentLoopRep->getHeader(); | ||
1856 | |||
1857 | |||
1858 | for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(), | ||
1859 | iterEnd = exitBlkSet.end(); | ||
1860 | iter != iterEnd; ++iter) { | ||
1861 | BlockT *exitBlk = *iter; | ||
1862 | BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); | ||
1863 | |||
1864 | if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) | ||
1865 | return NULL; | ||
1866 | |||
1867 | endBlkSet.insert(endBlk); | ||
1868 | } | ||
1869 | |||
1870 | BlockT *newBlk = funcRep->CreateMachineBasicBlock(); | ||
1871 | funcRep->push_back(newBlk); //insert to function | ||
1872 | CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep); | ||
1873 | SHOWNEWBLK(newBlk, "New continue block: "); | ||
1874 | |||
1875 | for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(), | ||
1876 | iterEnd = endBlkSet.end(); | ||
1877 | iter != iterEnd; ++iter) { | ||
1878 | BlockT *endBlk = *iter; | ||
1879 | InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); | ||
1880 | if (contInstr) { | ||
1881 | contInstr->eraseFromParent(); | ||
1882 | } | ||
1883 | endBlk->addSuccessor(newBlk); | ||
1884 | if (DEBUGME) { | ||
1885 | errs() << "Add new continue Block to BB" | ||
1886 | << endBlk->getNumber() << " successors\n"; | ||
1887 | } | ||
1888 | } | ||
1889 | |||
1890 | return newBlk; | ||
1891 | } //relocateLoopcontBlock | ||
1892 | |||
1893 | |||
1894 | // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as | ||
1895 | // LoopLandBlock. This BB branch on the loop endBranchInit register to the | ||
1896 | // pathes corresponding to the loop exiting branches. | ||
1897 | |||
1898 | template<class PassT> | ||
1899 | typename CFGStructurizer<PassT>::BlockT * | ||
1900 | CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep, | ||
1901 | BlockTSmallerVector &exitingBlks, | ||
1902 | BlockTSmallerVector &exitBlks) { | ||
1903 | const AMDGPUInstrInfo *tii = | ||
1904 | static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); | ||
1905 | const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); | ||
1906 | |||
1907 | RegiT endBranchReg = static_cast<int> | ||
1908 | (funcRep->getRegInfo().createVirtualRegister(I32RC)); | ||
1909 | assert(endBranchReg >= 0); | ||
1910 | |||
1911 | // reg = 0 before entering the loop | ||
1912 | addLoopEndbranchInitReg(loopRep, endBranchReg); | ||
1913 | |||
1914 | uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size()); | ||
1915 | assert(numBlks >=2 && numBlks == exitBlks.size()); | ||
1916 | |||
1917 | BlockT *preExitingBlk = exitingBlks[0]; | ||
1918 | BlockT *preExitBlk = exitBlks[0]; | ||
1919 | BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); | ||
1920 | funcRep->push_back(preBranchBlk); //insert to function | ||
1921 | SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); | ||
1922 | |||
1923 | BlockT *newLandBlk = preBranchBlk; | ||
1924 | |||
1925 | CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, | ||
1926 | newLandBlk); | ||
1927 | preExitingBlk->removeSuccessor(preExitBlk); | ||
1928 | preExitingBlk->addSuccessor(newLandBlk); | ||
1929 | |||
1930 | //it is redundant to add reg = 0 to exitingBlks[0] | ||
1931 | |||
1932 | // For 1..n th exiting path (the last iteration handles two pathes) create the | ||
1933 | // branch to the previous path and the current path. | ||
1934 | for (uint32_t i = 1; i < numBlks; ++i) { | ||
1935 | BlockT *curExitingBlk = exitingBlks[i]; | ||
1936 | BlockT *curExitBlk = exitBlks[i]; | ||
1937 | BlockT *curBranchBlk; | ||
1938 | |||
1939 | if (i == numBlks - 1) { | ||
1940 | curBranchBlk = curExitBlk; | ||
1941 | } else { | ||
1942 | curBranchBlk = funcRep->CreateMachineBasicBlock(); | ||
1943 | funcRep->push_back(curBranchBlk); //insert to function | ||
1944 | SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); | ||
1945 | } | ||
1946 | |||
1947 | // Add reg = i to exitingBlks[i]. | ||
1948 | CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, | ||
1949 | endBranchReg, i); | ||
1950 | |||
1951 | // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge | ||
1952 | // (exitingBlks[i], newLandBlk). | ||
1953 | CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, | ||
1954 | newLandBlk); | ||
1955 | curExitingBlk->removeSuccessor(curExitBlk); | ||
1956 | curExitingBlk->addSuccessor(newLandBlk); | ||
1957 | |||
1958 | // add to preBranchBlk the branch instruction: | ||
1959 | // if (endBranchReg == preVal) | ||
1960 | // preExitBlk | ||
1961 | // else | ||
1962 | // curBranchBlk | ||
1963 | // | ||
1964 | // preValReg = i - 1 | ||
1965 | |||
1966 | DebugLoc DL; | ||
1967 | RegiT preValReg = static_cast<int> | ||
1968 | (funcRep->getRegInfo().createVirtualRegister(I32RC)); | ||
1969 | |||
1970 | preBranchBlk->insert(preBranchBlk->begin(), | ||
1971 | tii->getMovImmInstr(preBranchBlk->getParent(), preValReg, | ||
1972 | i - 1)); | ||
1973 | |||
1974 | // condResReg = (endBranchReg == preValReg) | ||
1975 | RegiT condResReg = static_cast<int> | ||
1976 | (funcRep->getRegInfo().createVirtualRegister(I32RC)); | ||
1977 | BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg) | ||
1978 | .addReg(endBranchReg).addReg(preValReg); | ||
1979 | |||
1980 | BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32)) | ||
1981 | .addMBB(preExitBlk).addReg(condResReg); | ||
1982 | |||
1983 | preBranchBlk->addSuccessor(preExitBlk); | ||
1984 | preBranchBlk->addSuccessor(curBranchBlk); | ||
1985 | |||
1986 | // Update preExitingBlk, preExitBlk, preBranchBlk. | ||
1987 | preExitingBlk = curExitingBlk; | ||
1988 | preExitBlk = curExitBlk; | ||
1989 | preBranchBlk = curBranchBlk; | ||
1990 | |||
1991 | } //end for 1 .. n blocks | ||
1992 | |||
1993 | return newLandBlk; | ||
1994 | } //addLoopEndbranchBlock | ||
1995 | |||
1996 | template<class PassT> | ||
1997 | typename CFGStructurizer<PassT>::PathToKind | ||
1998 | CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, | ||
1999 | bool allowSideEntry) { | ||
2000 | assert(dstBlk); | ||
2001 | |||
2002 | if (srcBlk == dstBlk) { | ||
2003 | return SinglePath_InPath; | ||
2004 | } | ||
2005 | |||
2006 | while (srcBlk && srcBlk->succ_size() == 1) { | ||
2007 | srcBlk = *srcBlk->succ_begin(); | ||
2008 | if (srcBlk == dstBlk) { | ||
2009 | return SinglePath_InPath; | ||
2010 | } | ||
2011 | |||
2012 | if (!allowSideEntry && srcBlk->pred_size() > 1) { | ||
2013 | return Not_SinglePath; | ||
2014 | } | ||
2015 | } | ||
2016 | |||
2017 | if (srcBlk && srcBlk->succ_size()==0) { | ||
2018 | return SinglePath_NotInPath; | ||
2019 | } | ||
2020 | |||
2021 | return Not_SinglePath; | ||
2022 | } //singlePathTo | ||
2023 | |||
2024 | // If there is a single path from srcBlk to dstBlk, return the last block before | ||
2025 | // dstBlk If there is a single path from srcBlk->end without dstBlk, return the | ||
2026 | // last block in the path Otherwise, return NULL | ||
2027 | template<class PassT> | ||
2028 | typename CFGStructurizer<PassT>::BlockT * | ||
2029 | CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, | ||
2030 | bool allowSideEntry) { | ||
2031 | assert(dstBlk); | ||
2032 | |||
2033 | if (srcBlk == dstBlk) { | ||
2034 | return srcBlk; | ||
2035 | } | ||
2036 | |||
2037 | if (srcBlk->succ_size() == 0) { | ||
2038 | return srcBlk; | ||
2039 | } | ||
2040 | |||
2041 | while (srcBlk && srcBlk->succ_size() == 1) { | ||
2042 | BlockT *preBlk = srcBlk; | ||
2043 | |||
2044 | srcBlk = *srcBlk->succ_begin(); | ||
2045 | if (srcBlk == NULL) { | ||
2046 | return preBlk; | ||
2047 | } | ||
2048 | |||
2049 | if (!allowSideEntry && srcBlk->pred_size() > 1) { | ||
2050 | return NULL; | ||
2051 | } | ||
2052 | } | ||
2053 | |||
2054 | if (srcBlk && srcBlk->succ_size()==0) { | ||
2055 | return srcBlk; | ||
2056 | } | ||
2057 | |||
2058 | return NULL; | ||
2059 | |||
2060 | } //singlePathEnd | ||
2061 | |||
2062 | template<class PassT> | ||
2063 | int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, | ||
2064 | BlockT *dstBlk) { | ||
2065 | int cloned = 0; | ||
2066 | assert(preBlk->isSuccessor(srcBlk)); | ||
2067 | while (srcBlk && srcBlk != dstBlk) { | ||
2068 | assert(srcBlk->succ_size() == 1); | ||
2069 | if (srcBlk->pred_size() > 1) { | ||
2070 | srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); | ||
2071 | ++cloned; | ||
2072 | } | ||
2073 | |||
2074 | preBlk = srcBlk; | ||
2075 | srcBlk = *srcBlk->succ_begin(); | ||
2076 | } | ||
2077 | |||
2078 | return cloned; | ||
2079 | } //cloneOnSideEntryTo | ||
2080 | |||
2081 | template<class PassT> | ||
2082 | typename CFGStructurizer<PassT>::BlockT * | ||
2083 | CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk, | ||
2084 | BlockT *predBlk) { | ||
2085 | assert(predBlk->isSuccessor(curBlk) && | ||
2086 | "succBlk is not a prececessor of curBlk"); | ||
2087 | |||
2088 | BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions | ||
2089 | CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); | ||
2090 | //srcBlk, oldBlk, newBlk | ||
2091 | |||
2092 | predBlk->removeSuccessor(curBlk); | ||
2093 | predBlk->addSuccessor(cloneBlk); | ||
2094 | |||
2095 | // add all successor to cloneBlk | ||
2096 | CFGTraits::cloneSuccessorList(cloneBlk, curBlk); | ||
2097 | |||
2098 | numClonedInstr += curBlk->size(); | ||
2099 | |||
2100 | if (DEBUGME) { | ||
2101 | errs() << "Cloned block: " << "BB" | ||
2102 | << curBlk->getNumber() << "size " << curBlk->size() << "\n"; | ||
2103 | } | ||
2104 | |||
2105 | SHOWNEWBLK(cloneBlk, "result of Cloned block: "); | ||
2106 | |||
2107 | return cloneBlk; | ||
2108 | } //cloneBlockForPredecessor | ||
2109 | |||
2110 | template<class PassT> | ||
2111 | typename CFGStructurizer<PassT>::BlockT * | ||
2112 | CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep, | ||
2113 | BlockT *exitingBlk) { | ||
2114 | BlockT *exitBlk = NULL; | ||
2115 | |||
2116 | for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), | ||
2117 | iterSuccEnd = exitingBlk->succ_end(); | ||
2118 | iterSucc != iterSuccEnd; ++iterSucc) { | ||
2119 | BlockT *curBlk = *iterSucc; | ||
2120 | if (!loopRep->contains(curBlk)) { | ||
2121 | assert(exitBlk == NULL); | ||
2122 | exitBlk = curBlk; | ||
2123 | } | ||
2124 | } | ||
2125 | |||
2126 | assert(exitBlk != NULL); | ||
2127 | |||
2128 | return exitBlk; | ||
2129 | } //exitingBlock2ExitBlock | ||
2130 | |||
2131 | template<class PassT> | ||
2132 | void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk, | ||
2133 | BlockT *dstBlk, | ||
2134 | InstrIterator insertPos) { | ||
2135 | InstrIterator spliceEnd; | ||
2136 | //look for the input branchinstr, not the AMDGPU branchinstr | ||
2137 | InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); | ||
2138 | if (branchInstr == NULL) { | ||
2139 | if (DEBUGME) { | ||
2140 | errs() << "migrateInstruction don't see branch instr\n" ; | ||
2141 | } | ||
2142 | spliceEnd = srcBlk->end(); | ||
2143 | } else { | ||
2144 | if (DEBUGME) { | ||
2145 | errs() << "migrateInstruction see branch instr\n" ; | ||
2146 | branchInstr->dump(); | ||
2147 | } | ||
2148 | spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); | ||
2149 | } | ||
2150 | if (DEBUGME) { | ||
2151 | errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() | ||
2152 | << "srcSize = " << srcBlk->size() << "\n"; | ||
2153 | } | ||
2154 | |||
2155 | //splice insert before insertPos | ||
2156 | dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); | ||
2157 | |||
2158 | if (DEBUGME) { | ||
2159 | errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() | ||
2160 | << "srcSize = " << srcBlk->size() << "\n"; | ||
2161 | } | ||
2162 | } //migrateInstruction | ||
2163 | |||
2164 | // normalizeInfiniteLoopExit change | ||
2165 | // B1: | ||
2166 | // uncond_br LoopHeader | ||
2167 | // | ||
2168 | // to | ||
2169 | // B1: | ||
2170 | // cond_br 1 LoopHeader dummyExit | ||
2171 | // and return the newly added dummy exit block | ||
2172 | // | ||
2173 | template<class PassT> | ||
2174 | typename CFGStructurizer<PassT>::BlockT * | ||
2175 | CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) { | ||
2176 | BlockT *loopHeader; | ||
2177 | BlockT *loopLatch; | ||
2178 | loopHeader = LoopRep->getHeader(); | ||
2179 | loopLatch = LoopRep->getLoopLatch(); | ||
2180 | BlockT *dummyExitBlk = NULL; | ||
2181 | const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); | ||
2182 | if (loopHeader!=NULL && loopLatch!=NULL) { | ||
2183 | InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); | ||
2184 | if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { | ||
2185 | dummyExitBlk = funcRep->CreateMachineBasicBlock(); | ||
2186 | funcRep->push_back(dummyExitBlk); //insert to function | ||
2187 | SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); | ||
2188 | |||
2189 | if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; | ||
2190 | |||
2191 | typename BlockT::iterator insertPos = | ||
2192 | CFGTraits::getInstrPos(loopLatch, branchInstr); | ||
2193 | unsigned immReg = | ||
2194 | funcRep->getRegInfo().createVirtualRegister(I32RC); | ||
2195 | CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); | ||
2196 | InstrT *newInstr = | ||
2197 | CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep); | ||
2198 | MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false); | ||
2199 | |||
2200 | SHOWNEWINSTR(newInstr); | ||
2201 | |||
2202 | branchInstr->eraseFromParent(); | ||
2203 | loopLatch->addSuccessor(dummyExitBlk); | ||
2204 | } | ||
2205 | } | ||
2206 | |||
2207 | return dummyExitBlk; | ||
2208 | } //normalizeInfiniteLoopExit | ||
2209 | |||
2210 | template<class PassT> | ||
2211 | void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) { | ||
2212 | InstrT *branchInstr; | ||
2213 | |||
2214 | // I saw two unconditional branch in one basic block in example | ||
2215 | // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. | ||
2216 | while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) | ||
2217 | && CFGTraits::isUncondBranch(branchInstr)) { | ||
2218 | if (DEBUGME) { | ||
2219 | errs() << "Removing unconditional branch instruction" ; | ||
2220 | branchInstr->dump(); | ||
2221 | } | ||
2222 | branchInstr->eraseFromParent(); | ||
2223 | } | ||
2224 | } //removeUnconditionalBranch | ||
2225 | |||
2226 | template<class PassT> | ||
2227 | void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) { | ||
2228 | if (srcBlk->succ_size() == 2) { | ||
2229 | BlockT *blk1 = *srcBlk->succ_begin(); | ||
2230 | BlockT *blk2 = *(++srcBlk->succ_begin()); | ||
2231 | |||
2232 | if (blk1 == blk2) { | ||
2233 | InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); | ||
2234 | assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); | ||
2235 | if (DEBUGME) { | ||
2236 | errs() << "Removing unneeded conditional branch instruction" ; | ||
2237 | branchInstr->dump(); | ||
2238 | } | ||
2239 | branchInstr->eraseFromParent(); | ||
2240 | SHOWNEWBLK(blk1, "Removing redundant successor"); | ||
2241 | srcBlk->removeSuccessor(blk1); | ||
2242 | } | ||
2243 | } | ||
2244 | } //removeRedundantConditionalBranch | ||
2245 | |||
2246 | template<class PassT> | ||
2247 | void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*, | ||
2248 | DEFAULT_VEC_SLOTS> &retBlks) { | ||
2249 | BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); | ||
2250 | funcRep->push_back(dummyExitBlk); //insert to function | ||
2251 | CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); | ||
2252 | |||
2253 | for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter = | ||
2254 | retBlks.begin(), | ||
2255 | iterEnd = retBlks.end(); iter != iterEnd; ++iter) { | ||
2256 | BlockT *curBlk = *iter; | ||
2257 | InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); | ||
2258 | if (curInstr) { | ||
2259 | curInstr->eraseFromParent(); | ||
2260 | } | ||
2261 | #if 0 | ||
2262 | if (curBlk->size()==0 && curBlk->pred_size() == 1) { | ||
2263 | if (DEBUGME) { | ||
2264 | errs() << "Replace empty block BB" << curBlk->getNumber() | ||
2265 | << " with dummyExitBlock\n"; | ||
2266 | } | ||
2267 | BlockT *predb = *curBlk->pred_begin(); | ||
2268 | predb->removeSuccessor(curBlk); | ||
2269 | curBlk = predb; | ||
2270 | } //handle empty curBlk | ||
2271 | #endif | ||
2272 | curBlk->addSuccessor(dummyExitBlk); | ||
2273 | if (DEBUGME) { | ||
2274 | errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() | ||
2275 | << " successors\n"; | ||
2276 | } | ||
2277 | } //for | ||
2278 | |||
2279 | SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); | ||
2280 | } //addDummyExitBlock | ||
2281 | |||
2282 | template<class PassT> | ||
2283 | void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) { | ||
2284 | while (srcBlk->succ_size()) { | ||
2285 | srcBlk->removeSuccessor(*srcBlk->succ_begin()); | ||
2286 | } | ||
2287 | } | ||
2288 | |||
2289 | template<class PassT> | ||
2290 | void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) { | ||
2291 | BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; | ||
2292 | |||
2293 | if (srcBlkInfo == NULL) { | ||
2294 | srcBlkInfo = new BlockInfo(); | ||
2295 | } | ||
2296 | |||
2297 | srcBlkInfo->sccNum = sccNum; | ||
2298 | } | ||
2299 | |||
2300 | template<class PassT> | ||
2301 | int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) { | ||
2302 | BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; | ||
2303 | return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; | ||
2304 | } | ||
2305 | |||
2306 | template<class PassT> | ||
2307 | void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { | ||
2308 | if (DEBUGME) { | ||
2309 | errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; | ||
2310 | } | ||
2311 | |||
2312 | BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; | ||
2313 | |||
2314 | if (srcBlkInfo == NULL) { | ||
2315 | srcBlkInfo = new BlockInfo(); | ||
2316 | } | ||
2317 | |||
2318 | srcBlkInfo->isRetired = true; | ||
2319 | //int i = srcBlk->succ_size(); | ||
2320 | //int j = srcBlk->pred_size(); | ||
2321 | assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 | ||
2322 | && "can't retire block yet"); | ||
2323 | } | ||
2324 | |||
2325 | template<class PassT> | ||
2326 | bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) { | ||
2327 | BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; | ||
2328 | return (srcBlkInfo && srcBlkInfo->isRetired); | ||
2329 | } | ||
2330 | |||
2331 | template<class PassT> | ||
2332 | bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) { | ||
2333 | LoopT *loopRep = loopInfo->getLoopFor(curBlk); | ||
2334 | while (loopRep && loopRep->getHeader() == curBlk) { | ||
2335 | LoopLandInfo *loopLand = getLoopLandInfo(loopRep); | ||
2336 | |||
2337 | if(loopLand == NULL) | ||
2338 | return true; | ||
2339 | |||
2340 | BlockT *landBlk = loopLand->landBlk; | ||
2341 | assert(landBlk); | ||
2342 | if (!isRetiredBlock(landBlk)) { | ||
2343 | return true; | ||
2344 | } | ||
2345 | |||
2346 | loopRep = loopRep->getParentLoop(); | ||
2347 | } | ||
2348 | |||
2349 | return false; | ||
2350 | } //isActiveLoophead | ||
2351 | |||
2352 | template<class PassT> | ||
2353 | bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) { | ||
2354 | const unsigned blockSizeThreshold = 30; | ||
2355 | const unsigned cloneInstrThreshold = 100; | ||
2356 | |||
2357 | bool multiplePreds = blk && (blk->pred_size() > 1); | ||
2358 | |||
2359 | if(!multiplePreds) | ||
2360 | return false; | ||
2361 | |||
2362 | unsigned blkSize = blk->size(); | ||
2363 | return ((blkSize > blockSizeThreshold) | ||
2364 | && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); | ||
2365 | } //needMigrateBlock | ||
2366 | |||
2367 | template<class PassT> | ||
2368 | typename CFGStructurizer<PassT>::BlockT * | ||
2369 | CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, | ||
2370 | BlockTSmallerVector &exitBlks, | ||
2371 | std::set<BlockT *> &exitBlkSet) { | ||
2372 | SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks | ||
2373 | |||
2374 | for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), | ||
2375 | predIterEnd = landBlk->pred_end(); | ||
2376 | predIter != predIterEnd; ++predIter) { | ||
2377 | BlockT *curBlk = *predIter; | ||
2378 | if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { | ||
2379 | inpathBlks.push_back(curBlk); | ||
2380 | } | ||
2381 | } //for | ||
2382 | |||
2383 | //if landBlk has predecessors that are not in the given loop, | ||
2384 | //create a new block | ||
2385 | BlockT *newLandBlk = landBlk; | ||
2386 | if (inpathBlks.size() != landBlk->pred_size()) { | ||
2387 | newLandBlk = funcRep->CreateMachineBasicBlock(); | ||
2388 | funcRep->push_back(newLandBlk); //insert to function | ||
2389 | newLandBlk->addSuccessor(landBlk); | ||
2390 | for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter = | ||
2391 | inpathBlks.begin(), | ||
2392 | iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { | ||
2393 | BlockT *curBlk = *iter; | ||
2394 | CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); | ||
2395 | //srcBlk, oldBlk, newBlk | ||
2396 | curBlk->removeSuccessor(landBlk); | ||
2397 | curBlk->addSuccessor(newLandBlk); | ||
2398 | } | ||
2399 | for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { | ||
2400 | if (exitBlks[i] == landBlk) { | ||
2401 | exitBlks[i] = newLandBlk; | ||
2402 | } | ||
2403 | } | ||
2404 | SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); | ||
2405 | } | ||
2406 | |||
2407 | setLoopLandBlock(loopRep, newLandBlk); | ||
2408 | |||
2409 | return newLandBlk; | ||
2410 | } // recordLoopbreakLand | ||
2411 | |||
2412 | template<class PassT> | ||
2413 | void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { | ||
2414 | LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; | ||
2415 | |||
2416 | if (theEntry == NULL) { | ||
2417 | theEntry = new LoopLandInfo(); | ||
2418 | } | ||
2419 | assert(theEntry->landBlk == NULL); | ||
2420 | |||
2421 | if (blk == NULL) { | ||
2422 | blk = funcRep->CreateMachineBasicBlock(); | ||
2423 | funcRep->push_back(blk); //insert to function | ||
2424 | SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); | ||
2425 | } | ||
2426 | |||
2427 | theEntry->landBlk = blk; | ||
2428 | |||
2429 | if (DEBUGME) { | ||
2430 | errs() << "setLoopLandBlock loop-header = BB" | ||
2431 | << loopRep->getHeader()->getNumber() | ||
2432 | << " landing-block = BB" << blk->getNumber() << "\n"; | ||
2433 | } | ||
2434 | } // setLoopLandBlock | ||
2435 | |||
2436 | template<class PassT> | ||
2437 | void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { | ||
2438 | LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; | ||
2439 | |||
2440 | if (theEntry == NULL) { | ||
2441 | theEntry = new LoopLandInfo(); | ||
2442 | } | ||
2443 | |||
2444 | theEntry->breakOnRegs.insert(regNum); | ||
2445 | |||
2446 | if (DEBUGME) { | ||
2447 | errs() << "addLoopBreakOnReg loop-header = BB" | ||
2448 | << loopRep->getHeader()->getNumber() | ||
2449 | << " regNum = " << regNum << "\n"; | ||
2450 | } | ||
2451 | } // addLoopBreakOnReg | ||
2452 | |||
2453 | template<class PassT> | ||
2454 | void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { | ||
2455 | LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; | ||
2456 | |||
2457 | if (theEntry == NULL) { | ||
2458 | theEntry = new LoopLandInfo(); | ||
2459 | } | ||
2460 | theEntry->contOnRegs.insert(regNum); | ||
2461 | |||
2462 | if (DEBUGME) { | ||
2463 | errs() << "addLoopContOnReg loop-header = BB" | ||
2464 | << loopRep->getHeader()->getNumber() | ||
2465 | << " regNum = " << regNum << "\n"; | ||
2466 | } | ||
2467 | } // addLoopContOnReg | ||
2468 | |||
2469 | template<class PassT> | ||
2470 | void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { | ||
2471 | LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; | ||
2472 | |||
2473 | if (theEntry == NULL) { | ||
2474 | theEntry = new LoopLandInfo(); | ||
2475 | } | ||
2476 | theEntry->breakInitRegs.insert(regNum); | ||
2477 | |||
2478 | if (DEBUGME) { | ||
2479 | errs() << "addLoopBreakInitReg loop-header = BB" | ||
2480 | << loopRep->getHeader()->getNumber() | ||
2481 | << " regNum = " << regNum << "\n"; | ||
2482 | } | ||
2483 | } // addLoopBreakInitReg | ||
2484 | |||
2485 | template<class PassT> | ||
2486 | void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { | ||
2487 | LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; | ||
2488 | |||
2489 | if (theEntry == NULL) { | ||
2490 | theEntry = new LoopLandInfo(); | ||
2491 | } | ||
2492 | theEntry->contInitRegs.insert(regNum); | ||
2493 | |||
2494 | if (DEBUGME) { | ||
2495 | errs() << "addLoopContInitReg loop-header = BB" | ||
2496 | << loopRep->getHeader()->getNumber() | ||
2497 | << " regNum = " << regNum << "\n"; | ||
2498 | } | ||
2499 | } // addLoopContInitReg | ||
2500 | |||
2501 | template<class PassT> | ||
2502 | void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep, | ||
2503 | RegiT regNum) { | ||
2504 | LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; | ||
2505 | |||
2506 | if (theEntry == NULL) { | ||
2507 | theEntry = new LoopLandInfo(); | ||
2508 | } | ||
2509 | theEntry->endbranchInitRegs.insert(regNum); | ||
2510 | |||
2511 | if (DEBUGME) | ||
2512 | { | ||
2513 | errs() << "addLoopEndbranchInitReg loop-header = BB" | ||
2514 | << loopRep->getHeader()->getNumber() | ||
2515 | << " regNum = " << regNum << "\n"; | ||
2516 | } | ||
2517 | } // addLoopEndbranchInitReg | ||
2518 | |||
2519 | template<class PassT> | ||
2520 | typename CFGStructurizer<PassT>::LoopLandInfo * | ||
2521 | CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) { | ||
2522 | LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; | ||
2523 | |||
2524 | return theEntry; | ||
2525 | } // getLoopLandInfo | ||
2526 | |||
2527 | template<class PassT> | ||
2528 | typename CFGStructurizer<PassT>::BlockT * | ||
2529 | CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) { | ||
2530 | LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; | ||
2531 | |||
2532 | return theEntry ? theEntry->landBlk : NULL; | ||
2533 | } // getLoopLandBlock | ||
2534 | |||
2535 | |||
2536 | template<class PassT> | ||
2537 | bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) { | ||
2538 | LoopT *loopRep = loopInfo->getLoopFor(curBlk); | ||
2539 | if (loopRep == NULL) | ||
2540 | return false; | ||
2541 | |||
2542 | BlockT *loopHeader = loopRep->getHeader(); | ||
2543 | |||
2544 | return curBlk->isSuccessor(loopHeader); | ||
2545 | |||
2546 | } //hasBackEdge | ||
2547 | |||
2548 | template<class PassT> | ||
2549 | unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) { | ||
2550 | return loopRep ? loopRep->getLoopDepth() : 0; | ||
2551 | } //getLoopDepth | ||
2552 | |||
2553 | template<class PassT> | ||
2554 | int CFGStructurizer<PassT>::countActiveBlock | ||
2555 | (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart, | ||
2556 | typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) { | ||
2557 | int count = 0; | ||
2558 | while (iterStart != iterEnd) { | ||
2559 | if (!isRetiredBlock(*iterStart)) { | ||
2560 | ++count; | ||
2561 | } | ||
2562 | ++iterStart; | ||
2563 | } | ||
2564 | |||
2565 | return count; | ||
2566 | } //countActiveBlock | ||
2567 | |||
2568 | // This is work around solution for findNearestCommonDominator not avaiable to | ||
2569 | // post dom a proper fix should go to Dominators.h. | ||
2570 | |||
2571 | template<class PassT> | ||
2572 | typename CFGStructurizer<PassT>::BlockT* | ||
2573 | CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { | ||
2574 | |||
2575 | if (postDomTree->dominates(blk1, blk2)) { | ||
2576 | return blk1; | ||
2577 | } | ||
2578 | if (postDomTree->dominates(blk2, blk1)) { | ||
2579 | return blk2; | ||
2580 | } | ||
2581 | |||
2582 | DomTreeNodeT *node1 = postDomTree->getNode(blk1); | ||
2583 | DomTreeNodeT *node2 = postDomTree->getNode(blk2); | ||
2584 | |||
2585 | // Handle newly cloned node. | ||
2586 | if (node1 == NULL && blk1->succ_size() == 1) { | ||
2587 | return findNearestCommonPostDom(*blk1->succ_begin(), blk2); | ||
2588 | } | ||
2589 | if (node2 == NULL && blk2->succ_size() == 1) { | ||
2590 | return findNearestCommonPostDom(blk1, *blk2->succ_begin()); | ||
2591 | } | ||
2592 | |||
2593 | if (node1 == NULL || node2 == NULL) { | ||
2594 | return NULL; | ||
2595 | } | ||
2596 | |||
2597 | node1 = node1->getIDom(); | ||
2598 | while (node1) { | ||
2599 | if (postDomTree->dominates(node1, node2)) { | ||
2600 | return node1->getBlock(); | ||
2601 | } | ||
2602 | node1 = node1->getIDom(); | ||
2603 | } | ||
2604 | |||
2605 | return NULL; | ||
2606 | } | ||
2607 | |||
2608 | template<class PassT> | ||
2609 | typename CFGStructurizer<PassT>::BlockT * | ||
2610 | CFGStructurizer<PassT>::findNearestCommonPostDom | ||
2611 | (typename std::set<BlockT *> &blks) { | ||
2612 | BlockT *commonDom; | ||
2613 | typename std::set<BlockT *>::const_iterator iter = blks.begin(); | ||
2614 | typename std::set<BlockT *>::const_iterator iterEnd = blks.end(); | ||
2615 | for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { | ||
2616 | BlockT *curBlk = *iter; | ||
2617 | if (curBlk != commonDom) { | ||
2618 | commonDom = findNearestCommonPostDom(curBlk, commonDom); | ||
2619 | } | ||
2620 | } | ||
2621 | |||
2622 | if (DEBUGME) { | ||
2623 | errs() << "Common post dominator for exit blocks is "; | ||
2624 | if (commonDom) { | ||
2625 | errs() << "BB" << commonDom->getNumber() << "\n"; | ||
2626 | } else { | ||
2627 | errs() << "NULL\n"; | ||
2628 | } | ||
2629 | } | ||
2630 | |||
2631 | return commonDom; | ||
2632 | } //findNearestCommonPostDom | ||
2633 | |||
2634 | } //end namespace llvm | ||
2635 | |||
2636 | //todo: move-end | ||
2637 | |||
2638 | |||
2639 | //===----------------------------------------------------------------------===// | ||
2640 | // | ||
2641 | // CFGStructurizer for AMDGPU | ||
2642 | // | ||
2643 | //===----------------------------------------------------------------------===// | ||
2644 | |||
2645 | |||
2646 | using namespace llvmCFGStruct; | ||
2647 | |||
2648 | namespace llvm | ||
2649 | { | ||
2650 | class AMDGPUCFGStructurizer : public MachineFunctionPass | ||
2651 | { | ||
2652 | public: | ||
2653 | typedef MachineInstr InstructionType; | ||
2654 | typedef MachineFunction FunctionType; | ||
2655 | typedef MachineBasicBlock BlockType; | ||
2656 | typedef MachineLoopInfo LoopinfoType; | ||
2657 | typedef MachineDominatorTree DominatortreeType; | ||
2658 | typedef MachinePostDominatorTree PostDominatortreeType; | ||
2659 | typedef MachineDomTreeNode DomTreeNodeType; | ||
2660 | typedef MachineLoop LoopType; | ||
2661 | |||
2662 | protected: | ||
2663 | TargetMachine &TM; | ||
2664 | const TargetInstrInfo *TII; | ||
2665 | const AMDGPURegisterInfo *TRI; | ||
2666 | |||
2667 | public: | ||
2668 | AMDGPUCFGStructurizer(char &pid, TargetMachine &tm); | ||
2669 | const TargetInstrInfo *getTargetInstrInfo() const; | ||
2670 | //bool runOnMachineFunction(MachineFunction &F); | ||
2671 | |||
2672 | private: | ||
2673 | |||
2674 | }; //end of class AMDGPUCFGStructurizer | ||
2675 | |||
2676 | //char AMDGPUCFGStructurizer::ID = 0; | ||
2677 | } //end of namespace llvm | ||
2678 | AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm | ||
2679 | ) | ||
2680 | : MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()), | ||
2681 | TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo()) | ||
2682 | ) { | ||
2683 | } | ||
2684 | |||
2685 | const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const { | ||
2686 | return TII; | ||
2687 | } | ||
2688 | //===----------------------------------------------------------------------===// | ||
2689 | // | ||
2690 | // CFGPrepare | ||
2691 | // | ||
2692 | //===----------------------------------------------------------------------===// | ||
2693 | |||
2694 | |||
2695 | using namespace llvmCFGStruct; | ||
2696 | |||
2697 | namespace llvm | ||
2698 | { | ||
2699 | class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer | ||
2700 | { | ||
2701 | public: | ||
2702 | static char ID; | ||
2703 | |||
2704 | public: | ||
2705 | AMDGPUCFGPrepare(TargetMachine &tm); | ||
2706 | |||
2707 | virtual const char *getPassName() const; | ||
2708 | virtual void getAnalysisUsage(AnalysisUsage &AU) const; | ||
2709 | |||
2710 | bool runOnMachineFunction(MachineFunction &F); | ||
2711 | |||
2712 | private: | ||
2713 | |||
2714 | }; //end of class AMDGPUCFGPrepare | ||
2715 | |||
2716 | char AMDGPUCFGPrepare::ID = 0; | ||
2717 | } //end of namespace llvm | ||
2718 | |||
2719 | AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm) | ||
2720 | : AMDGPUCFGStructurizer(ID, tm ) | ||
2721 | { | ||
2722 | } | ||
2723 | const char *AMDGPUCFGPrepare::getPassName() const { | ||
2724 | return "AMD IL Control Flow Graph Preparation Pass"; | ||
2725 | } | ||
2726 | |||
2727 | void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { | ||
2728 | AU.addPreserved<MachineFunctionAnalysis>(); | ||
2729 | AU.addRequired<MachineFunctionAnalysis>(); | ||
2730 | AU.addRequired<MachineDominatorTree>(); | ||
2731 | AU.addRequired<MachinePostDominatorTree>(); | ||
2732 | AU.addRequired<MachineLoopInfo>(); | ||
2733 | } | ||
2734 | |||
2735 | //===----------------------------------------------------------------------===// | ||
2736 | // | ||
2737 | // CFGPerform | ||
2738 | // | ||
2739 | //===----------------------------------------------------------------------===// | ||
2740 | |||
2741 | |||
2742 | using namespace llvmCFGStruct; | ||
2743 | |||
2744 | namespace llvm | ||
2745 | { | ||
2746 | class AMDGPUCFGPerform : public AMDGPUCFGStructurizer | ||
2747 | { | ||
2748 | public: | ||
2749 | static char ID; | ||
2750 | |||
2751 | public: | ||
2752 | AMDGPUCFGPerform(TargetMachine &tm); | ||
2753 | virtual const char *getPassName() const; | ||
2754 | virtual void getAnalysisUsage(AnalysisUsage &AU) const; | ||
2755 | bool runOnMachineFunction(MachineFunction &F); | ||
2756 | |||
2757 | private: | ||
2758 | |||
2759 | }; //end of class AMDGPUCFGPerform | ||
2760 | |||
2761 | char AMDGPUCFGPerform::ID = 0; | ||
2762 | } //end of namespace llvm | ||
2763 | |||
2764 | AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm) | ||
2765 | : AMDGPUCFGStructurizer(ID, tm) | ||
2766 | { | ||
2767 | } | ||
2768 | |||
2769 | const char *AMDGPUCFGPerform::getPassName() const { | ||
2770 | return "AMD IL Control Flow Graph structurizer Pass"; | ||
2771 | } | ||
2772 | |||
2773 | void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { | ||
2774 | AU.addPreserved<MachineFunctionAnalysis>(); | ||
2775 | AU.addRequired<MachineFunctionAnalysis>(); | ||
2776 | AU.addRequired<MachineDominatorTree>(); | ||
2777 | AU.addRequired<MachinePostDominatorTree>(); | ||
2778 | AU.addRequired<MachineLoopInfo>(); | ||
2779 | } | ||
2780 | |||
2781 | //===----------------------------------------------------------------------===// | ||
2782 | // | ||
2783 | // CFGStructTraits<AMDGPUCFGStructurizer> | ||
2784 | // | ||
2785 | //===----------------------------------------------------------------------===// | ||
2786 | |||
2787 | namespace llvmCFGStruct | ||
2788 | { | ||
2789 | // this class is tailor to the AMDGPU backend | ||
2790 | template<> | ||
2791 | struct CFGStructTraits<AMDGPUCFGStructurizer> | ||
2792 | { | ||
2793 | typedef int RegiT; | ||
2794 | |||
2795 | static int getBreakNzeroOpcode(int oldOpcode) { | ||
2796 | switch(oldOpcode) { | ||
2797 | case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32; | ||
2798 | default: | ||
2799 | assert(0 && "internal error"); | ||
2800 | }; | ||
2801 | return -1; | ||
2802 | } | ||
2803 | |||
2804 | static int getBreakZeroOpcode(int oldOpcode) { | ||
2805 | switch(oldOpcode) { | ||
2806 | case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32; | ||
2807 | default: | ||
2808 | assert(0 && "internal error"); | ||
2809 | }; | ||
2810 | return -1; | ||
2811 | } | ||
2812 | |||
2813 | static int getBranchNzeroOpcode(int oldOpcode) { | ||
2814 | switch(oldOpcode) { | ||
2815 | case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32; | ||
2816 | ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ); | ||
2817 | case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ; | ||
2818 | default: | ||
2819 | assert(0 && "internal error"); | ||
2820 | }; | ||
2821 | return -1; | ||
2822 | } | ||
2823 | |||
2824 | static int getBranchZeroOpcode(int oldOpcode) { | ||
2825 | switch(oldOpcode) { | ||
2826 | case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32; | ||
2827 | ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ); | ||
2828 | case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z; | ||
2829 | default: | ||
2830 | assert(0 && "internal error"); | ||
2831 | }; | ||
2832 | return -1; | ||
2833 | } | ||
2834 | |||
2835 | static int getContinueNzeroOpcode(int oldOpcode) | ||
2836 | { | ||
2837 | switch(oldOpcode) { | ||
2838 | case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; | ||
2839 | default: | ||
2840 | assert(0 && "internal error"); | ||
2841 | }; | ||
2842 | return -1; | ||
2843 | } | ||
2844 | |||
2845 | static int getContinueZeroOpcode(int oldOpcode) { | ||
2846 | switch(oldOpcode) { | ||
2847 | case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; | ||
2848 | default: | ||
2849 | assert(0 && "internal error"); | ||
2850 | }; | ||
2851 | return -1; | ||
2852 | } | ||
2853 | |||
2854 | // the explicitly represented branch target is the true branch target | ||
2855 | #define getExplicitBranch getTrueBranch | ||
2856 | #define setExplicitBranch setTrueBranch | ||
2857 | |||
2858 | static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { | ||
2859 | return instr->getOperand(0).getMBB(); | ||
2860 | } | ||
2861 | |||
2862 | static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { | ||
2863 | instr->getOperand(0).setMBB(blk); | ||
2864 | } | ||
2865 | |||
2866 | static MachineBasicBlock * | ||
2867 | getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { | ||
2868 | assert(blk->succ_size() == 2); | ||
2869 | MachineBasicBlock *trueBranch = getTrueBranch(instr); | ||
2870 | MachineBasicBlock::succ_iterator iter = blk->succ_begin(); | ||
2871 | MachineBasicBlock::succ_iterator iterNext = iter; | ||
2872 | ++iterNext; | ||
2873 | |||
2874 | return (*iter == trueBranch) ? *iterNext : *iter; | ||
2875 | } | ||
2876 | |||
2877 | static bool isCondBranch(MachineInstr *instr) { | ||
2878 | switch (instr->getOpcode()) { | ||
2879 | case AMDGPU::JUMP: | ||
2880 | return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0; | ||
2881 | ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); | ||
2882 | case AMDGPU::SI_IF_NZ: | ||
2883 | case AMDGPU::SI_IF_Z: | ||
2884 | break; | ||
2885 | default: | ||
2886 | return false; | ||
2887 | } | ||
2888 | return true; | ||
2889 | } | ||
2890 | |||
2891 | static bool isUncondBranch(MachineInstr *instr) { | ||
2892 | switch (instr->getOpcode()) { | ||
2893 | case AMDGPU::JUMP: | ||
2894 | return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0; | ||
2895 | case AMDGPU::BRANCH: | ||
2896 | return true; | ||
2897 | default: | ||
2898 | return false; | ||
2899 | } | ||
2900 | return true; | ||
2901 | } | ||
2902 | |||
2903 | static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { | ||
2904 | //get DebugLoc from the first MachineBasicBlock instruction with debug info | ||
2905 | DebugLoc DL; | ||
2906 | for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { | ||
2907 | MachineInstr *instr = &(*iter); | ||
2908 | if (instr->getDebugLoc().isUnknown() == false) { | ||
2909 | DL = instr->getDebugLoc(); | ||
2910 | } | ||
2911 | } | ||
2912 | return DL; | ||
2913 | } | ||
2914 | |||
2915 | static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { | ||
2916 | MachineBasicBlock::reverse_iterator iter = blk->rbegin(); | ||
2917 | MachineInstr *instr = &*iter; | ||
2918 | if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { | ||
2919 | return instr; | ||
2920 | } | ||
2921 | return NULL; | ||
2922 | } | ||
2923 | |||
2924 | // The correct naming for this is getPossibleLoopendBlockBranchInstr. | ||
2925 | // | ||
2926 | // BB with backward-edge could have move instructions after the branch | ||
2927 | // instruction. Such move instruction "belong to" the loop backward-edge. | ||
2928 | // | ||
2929 | static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { | ||
2930 | const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>( | ||
2931 | blk->getParent()->getTarget().getInstrInfo()); | ||
2932 | |||
2933 | for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), | ||
2934 | iterEnd = blk->rend(); iter != iterEnd; ++iter) { | ||
2935 | // FIXME: Simplify | ||
2936 | MachineInstr *instr = &*iter; | ||
2937 | if (instr) { | ||
2938 | if (isCondBranch(instr) || isUncondBranch(instr)) { | ||
2939 | return instr; | ||
2940 | } else if (!TII->isMov(instr->getOpcode())) { | ||
2941 | break; | ||
2942 | } | ||
2943 | } | ||
2944 | } | ||
2945 | return NULL; | ||
2946 | } | ||
2947 | |||
2948 | static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { | ||
2949 | MachineBasicBlock::reverse_iterator iter = blk->rbegin(); | ||
2950 | if (iter != blk->rend()) { | ||
2951 | MachineInstr *instr = &(*iter); | ||
2952 | if (instr->getOpcode() == AMDGPU::RETURN) { | ||
2953 | return instr; | ||
2954 | } | ||
2955 | } | ||
2956 | return NULL; | ||
2957 | } | ||
2958 | |||
2959 | static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { | ||
2960 | MachineBasicBlock::reverse_iterator iter = blk->rbegin(); | ||
2961 | if (iter != blk->rend()) { | ||
2962 | MachineInstr *instr = &(*iter); | ||
2963 | if (instr->getOpcode() == AMDGPU::CONTINUE) { | ||
2964 | return instr; | ||
2965 | } | ||
2966 | } | ||
2967 | return NULL; | ||
2968 | } | ||
2969 | |||
2970 | static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { | ||
2971 | for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { | ||
2972 | MachineInstr *instr = &(*iter); | ||
2973 | if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) { | ||
2974 | return instr; | ||
2975 | } | ||
2976 | } | ||
2977 | return NULL; | ||
2978 | } | ||
2979 | |||
2980 | static bool isReturnBlock(MachineBasicBlock *blk) { | ||
2981 | MachineInstr *instr = getReturnInstr(blk); | ||
2982 | bool isReturn = (blk->succ_size() == 0); | ||
2983 | if (instr) { | ||
2984 | assert(isReturn); | ||
2985 | } else if (isReturn) { | ||
2986 | if (DEBUGME) { | ||
2987 | errs() << "BB" << blk->getNumber() | ||
2988 | <<" is return block without RETURN instr\n"; | ||
2989 | } | ||
2990 | } | ||
2991 | |||
2992 | return isReturn; | ||
2993 | } | ||
2994 | |||
2995 | static MachineBasicBlock::iterator | ||
2996 | getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { | ||
2997 | assert(instr->getParent() == blk && "instruction doesn't belong to block"); | ||
2998 | MachineBasicBlock::iterator iter = blk->begin(); | ||
2999 | MachineBasicBlock::iterator iterEnd = blk->end(); | ||
3000 | while (&(*iter) != instr && iter != iterEnd) { | ||
3001 | ++iter; | ||
3002 | } | ||
3003 | |||
3004 | assert(iter != iterEnd); | ||
3005 | return iter; | ||
3006 | }//getInstrPos | ||
3007 | |||
3008 | static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, | ||
3009 | AMDGPUCFGStructurizer *passRep) { | ||
3010 | return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); | ||
3011 | } //insertInstrBefore | ||
3012 | |||
3013 | static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, | ||
3014 | AMDGPUCFGStructurizer *passRep, DebugLoc DL) { | ||
3015 | const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); | ||
3016 | MachineInstr *newInstr = | ||
3017 | blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); | ||
3018 | |||
3019 | MachineBasicBlock::iterator res; | ||
3020 | if (blk->begin() != blk->end()) { | ||
3021 | blk->insert(blk->begin(), newInstr); | ||
3022 | } else { | ||
3023 | blk->push_back(newInstr); | ||
3024 | } | ||
3025 | |||
3026 | SHOWNEWINSTR(newInstr); | ||
3027 | |||
3028 | return newInstr; | ||
3029 | } //insertInstrBefore | ||
3030 | |||
3031 | static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, | ||
3032 | AMDGPUCFGStructurizer *passRep) { | ||
3033 | insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); | ||
3034 | } //insertInstrEnd | ||
3035 | |||
3036 | static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, | ||
3037 | AMDGPUCFGStructurizer *passRep, DebugLoc DL) { | ||
3038 | const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); | ||
3039 | MachineInstr *newInstr = blk->getParent() | ||
3040 | ->CreateMachineInstr(tii->get(newOpcode), DL); | ||
3041 | |||
3042 | blk->push_back(newInstr); | ||
3043 | //assume the instruction doesn't take any reg operand ... | ||
3044 | |||
3045 | SHOWNEWINSTR(newInstr); | ||
3046 | } //insertInstrEnd | ||
3047 | |||
3048 | static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, | ||
3049 | int newOpcode, | ||
3050 | AMDGPUCFGStructurizer *passRep) { | ||
3051 | MachineInstr *oldInstr = &(*instrPos); | ||
3052 | const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); | ||
3053 | MachineBasicBlock *blk = oldInstr->getParent(); | ||
3054 | MachineInstr *newInstr = | ||
3055 | blk->getParent()->CreateMachineInstr(tii->get(newOpcode), | ||
3056 | DebugLoc()); | ||
3057 | |||
3058 | blk->insert(instrPos, newInstr); | ||
3059 | //assume the instruction doesn't take any reg operand ... | ||
3060 | |||
3061 | SHOWNEWINSTR(newInstr); | ||
3062 | return newInstr; | ||
3063 | } //insertInstrBefore | ||
3064 | |||
3065 | static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, | ||
3066 | int newOpcode, | ||
3067 | AMDGPUCFGStructurizer *passRep, | ||
3068 | DebugLoc DL) { | ||
3069 | MachineInstr *oldInstr = &(*instrPos); | ||
3070 | const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); | ||
3071 | MachineBasicBlock *blk = oldInstr->getParent(); | ||
3072 | MachineInstr *newInstr = | ||
3073 | blk->getParent()->CreateMachineInstr(tii->get(newOpcode), | ||
3074 | DL); | ||
3075 | |||
3076 | blk->insert(instrPos, newInstr); | ||
3077 | MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(), | ||
3078 | false); | ||
3079 | |||
3080 | SHOWNEWINSTR(newInstr); | ||
3081 | //erase later oldInstr->eraseFromParent(); | ||
3082 | } //insertCondBranchBefore | ||
3083 | |||
3084 | static void insertCondBranchBefore(MachineBasicBlock *blk, | ||
3085 | MachineBasicBlock::iterator insertPos, | ||
3086 | int newOpcode, | ||
3087 | AMDGPUCFGStructurizer *passRep, | ||
3088 | RegiT regNum, | ||
3089 | DebugLoc DL) { | ||
3090 | const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); | ||
3091 | |||
3092 | MachineInstr *newInstr = | ||
3093 | blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); | ||
3094 | |||
3095 | //insert before | ||
3096 | blk->insert(insertPos, newInstr); | ||
3097 | MachineInstrBuilder(newInstr).addReg(regNum, false); | ||
3098 | |||
3099 | SHOWNEWINSTR(newInstr); | ||
3100 | } //insertCondBranchBefore | ||
3101 | |||
3102 | static void insertCondBranchEnd(MachineBasicBlock *blk, | ||
3103 | int newOpcode, | ||
3104 | AMDGPUCFGStructurizer *passRep, | ||
3105 | RegiT regNum) { | ||
3106 | const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); | ||
3107 | MachineInstr *newInstr = | ||
3108 | blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); | ||
3109 | |||
3110 | blk->push_back(newInstr); | ||
3111 | MachineInstrBuilder(newInstr).addReg(regNum, false); | ||
3112 | |||
3113 | SHOWNEWINSTR(newInstr); | ||
3114 | } //insertCondBranchEnd | ||
3115 | |||
3116 | |||
3117 | static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, | ||
3118 | AMDGPUCFGStructurizer *passRep, | ||
3119 | RegiT regNum, int regVal) { | ||
3120 | MachineInstr *oldInstr = &(*instrPos); | ||
3121 | const AMDGPUInstrInfo *tii = | ||
3122 | static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); | ||
3123 | MachineBasicBlock *blk = oldInstr->getParent(); | ||
3124 | MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, | ||
3125 | regVal); | ||
3126 | blk->insert(instrPos, newInstr); | ||
3127 | |||
3128 | SHOWNEWINSTR(newInstr); | ||
3129 | } //insertAssignInstrBefore | ||
3130 | |||
3131 | static void insertAssignInstrBefore(MachineBasicBlock *blk, | ||
3132 | AMDGPUCFGStructurizer *passRep, | ||
3133 | RegiT regNum, int regVal) { | ||
3134 | const AMDGPUInstrInfo *tii = | ||
3135 | static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); | ||
3136 | |||
3137 | MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, | ||
3138 | regVal); | ||
3139 | if (blk->begin() != blk->end()) { | ||
3140 | blk->insert(blk->begin(), newInstr); | ||
3141 | } else { | ||
3142 | blk->push_back(newInstr); | ||
3143 | } | ||
3144 | |||
3145 | SHOWNEWINSTR(newInstr); | ||
3146 | |||
3147 | } //insertInstrBefore | ||
3148 | |||
3149 | static void insertCompareInstrBefore(MachineBasicBlock *blk, | ||
3150 | MachineBasicBlock::iterator instrPos, | ||
3151 | AMDGPUCFGStructurizer *passRep, | ||
3152 | RegiT dstReg, RegiT src1Reg, | ||
3153 | RegiT src2Reg) { | ||
3154 | const AMDGPUInstrInfo *tii = | ||
3155 | static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); | ||
3156 | MachineInstr *newInstr = | ||
3157 | blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc()); | ||
3158 | |||
3159 | MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target | ||
3160 | MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value | ||
3161 | MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value | ||
3162 | |||
3163 | blk->insert(instrPos, newInstr); | ||
3164 | SHOWNEWINSTR(newInstr); | ||
3165 | |||
3166 | } //insertCompareInstrBefore | ||
3167 | |||
3168 | static void cloneSuccessorList(MachineBasicBlock *dstBlk, | ||
3169 | MachineBasicBlock *srcBlk) { | ||
3170 | for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), | ||
3171 | iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { | ||
3172 | dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of | ||
3173 | } | ||
3174 | } //cloneSuccessorList | ||
3175 | |||
3176 | static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { | ||
3177 | MachineFunction *func = srcBlk->getParent(); | ||
3178 | MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); | ||
3179 | func->push_back(newBlk); //insert to function | ||
3180 | //newBlk->setNumber(srcBlk->getNumber()); | ||
3181 | for (MachineBasicBlock::iterator iter = srcBlk->begin(), | ||
3182 | iterEnd = srcBlk->end(); | ||
3183 | iter != iterEnd; ++iter) { | ||
3184 | MachineInstr *instr = func->CloneMachineInstr(iter); | ||
3185 | newBlk->push_back(instr); | ||
3186 | } | ||
3187 | return newBlk; | ||
3188 | } | ||
3189 | |||
3190 | //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because | ||
3191 | //the AMDGPU instruction is not recognized as terminator fix this and retire | ||
3192 | //this routine | ||
3193 | static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, | ||
3194 | MachineBasicBlock *oldBlk, | ||
3195 | MachineBasicBlock *newBlk) { | ||
3196 | MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); | ||
3197 | if (branchInstr && isCondBranch(branchInstr) && | ||
3198 | getExplicitBranch(branchInstr) == oldBlk) { | ||
3199 | setExplicitBranch(branchInstr, newBlk); | ||
3200 | } | ||
3201 | } | ||
3202 | |||
3203 | static void wrapup(MachineBasicBlock *entryBlk) { | ||
3204 | assert((!entryBlk->getParent()->getJumpTableInfo() | ||
3205 | || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) | ||
3206 | && "found a jump table"); | ||
3207 | |||
3208 | //collect continue right before endloop | ||
3209 | SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr; | ||
3210 | MachineBasicBlock::iterator pre = entryBlk->begin(); | ||
3211 | MachineBasicBlock::iterator iterEnd = entryBlk->end(); | ||
3212 | MachineBasicBlock::iterator iter = pre; | ||
3213 | while (iter != iterEnd) { | ||
3214 | if (pre->getOpcode() == AMDGPU::CONTINUE | ||
3215 | && iter->getOpcode() == AMDGPU::ENDLOOP) { | ||
3216 | contInstr.push_back(pre); | ||
3217 | } | ||
3218 | pre = iter; | ||
3219 | ++iter; | ||
3220 | } //end while | ||
3221 | |||
3222 | //delete continue right before endloop | ||
3223 | for (unsigned i = 0; i < contInstr.size(); ++i) { | ||
3224 | contInstr[i]->eraseFromParent(); | ||
3225 | } | ||
3226 | |||
3227 | // TODO to fix up jump table so later phase won't be confused. if | ||
3228 | // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but | ||
3229 | // there isn't such an interface yet. alternatively, replace all the other | ||
3230 | // blocks in the jump table with the entryBlk //} | ||
3231 | |||
3232 | } //wrapup | ||
3233 | |||
3234 | static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) { | ||
3235 | return &pass.getAnalysis<MachineDominatorTree>(); | ||
3236 | } | ||
3237 | |||
3238 | static MachinePostDominatorTree* | ||
3239 | getPostDominatorTree(AMDGPUCFGStructurizer &pass) { | ||
3240 | return &pass.getAnalysis<MachinePostDominatorTree>(); | ||
3241 | } | ||
3242 | |||
3243 | static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) { | ||
3244 | return &pass.getAnalysis<MachineLoopInfo>(); | ||
3245 | } | ||
3246 | }; // template class CFGStructTraits | ||
3247 | } //end of namespace llvm | ||
3248 | |||
3249 | // createAMDGPUCFGPreparationPass- Returns a pass | ||
3250 | FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm | ||
3251 | ) { | ||
3252 | return new AMDGPUCFGPrepare(tm ); | ||
3253 | } | ||
3254 | |||
3255 | bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) { | ||
3256 | return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func, | ||
3257 | *this, | ||
3258 | TRI); | ||
3259 | } | ||
3260 | |||
3261 | // createAMDGPUCFGStructurizerPass- Returns a pass | ||
3262 | FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm | ||
3263 | ) { | ||
3264 | return new AMDGPUCFGPerform(tm ); | ||
3265 | } | ||
3266 | |||
3267 | bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) { | ||
3268 | return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func, | ||
3269 | *this, | ||
3270 | TRI); | ||
3271 | } | ||
3272 | |||
3273 | //end of file newline goes below | ||
3274 | |||
diff --git a/src/gallium/drivers/radeon/AMDILDevice.cpp b/src/gallium/drivers/radeon/AMDILDevice.cpp deleted file mode 100644 index 3955828ec31..00000000000 --- a/src/gallium/drivers/radeon/AMDILDevice.cpp +++ /dev/null | |||
@@ -1,137 +0,0 @@ | |||
1 | //===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | #include "AMDILDevice.h" | ||
10 | #include "AMDGPUSubtarget.h" | ||
11 | |||
12 | using namespace llvm; | ||
13 | // Default implementation for all of the classes. | ||
14 | AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) | ||
15 | { | ||
16 | mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities); | ||
17 | mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities); | ||
18 | setCaps(); | ||
19 | mDeviceFlag = OCL_DEVICE_ALL; | ||
20 | } | ||
21 | |||
22 | AMDGPUDevice::~AMDGPUDevice() | ||
23 | { | ||
24 | mHWBits.clear(); | ||
25 | mSWBits.clear(); | ||
26 | } | ||
27 | |||
28 | size_t AMDGPUDevice::getMaxGDSSize() const | ||
29 | { | ||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | uint32_t | ||
34 | AMDGPUDevice::getDeviceFlag() const | ||
35 | { | ||
36 | return mDeviceFlag; | ||
37 | } | ||
38 | |||
39 | size_t AMDGPUDevice::getMaxNumCBs() const | ||
40 | { | ||
41 | if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) { | ||
42 | return HW_MAX_NUM_CB; | ||
43 | } | ||
44 | |||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | size_t AMDGPUDevice::getMaxCBSize() const | ||
49 | { | ||
50 | if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) { | ||
51 | return MAX_CB_SIZE; | ||
52 | } | ||
53 | |||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | size_t AMDGPUDevice::getMaxScratchSize() const | ||
58 | { | ||
59 | return 65536; | ||
60 | } | ||
61 | |||
62 | uint32_t AMDGPUDevice::getStackAlignment() const | ||
63 | { | ||
64 | return 16; | ||
65 | } | ||
66 | |||
67 | void AMDGPUDevice::setCaps() | ||
68 | { | ||
69 | mSWBits.set(AMDGPUDeviceInfo::HalfOps); | ||
70 | mSWBits.set(AMDGPUDeviceInfo::ByteOps); | ||
71 | mSWBits.set(AMDGPUDeviceInfo::ShortOps); | ||
72 | mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod); | ||
73 | if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) { | ||
74 | mSWBits.set(AMDGPUDeviceInfo::NoInline); | ||
75 | } | ||
76 | if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) { | ||
77 | mSWBits.set(AMDGPUDeviceInfo::MacroDB); | ||
78 | } | ||
79 | if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { | ||
80 | mSWBits.set(AMDGPUDeviceInfo::ConstantMem); | ||
81 | } else { | ||
82 | mHWBits.set(AMDGPUDeviceInfo::ConstantMem); | ||
83 | } | ||
84 | if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { | ||
85 | mSWBits.set(AMDGPUDeviceInfo::PrivateMem); | ||
86 | } else { | ||
87 | mHWBits.set(AMDGPUDeviceInfo::PrivateMem); | ||
88 | } | ||
89 | if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) { | ||
90 | mSWBits.set(AMDGPUDeviceInfo::BarrierDetect); | ||
91 | } | ||
92 | mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps); | ||
93 | mSWBits.set(AMDGPUDeviceInfo::LongOps); | ||
94 | } | ||
95 | |||
96 | AMDGPUDeviceInfo::ExecutionMode | ||
97 | AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const | ||
98 | { | ||
99 | if (mHWBits[Caps]) { | ||
100 | assert(!mSWBits[Caps] && "Cannot set both SW and HW caps"); | ||
101 | return AMDGPUDeviceInfo::Hardware; | ||
102 | } | ||
103 | |||
104 | if (mSWBits[Caps]) { | ||
105 | assert(!mHWBits[Caps] && "Cannot set both SW and HW caps"); | ||
106 | return AMDGPUDeviceInfo::Software; | ||
107 | } | ||
108 | |||
109 | return AMDGPUDeviceInfo::Unsupported; | ||
110 | |||
111 | } | ||
112 | |||
113 | bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const | ||
114 | { | ||
115 | return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported; | ||
116 | } | ||
117 | |||
118 | bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const | ||
119 | { | ||
120 | return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware; | ||
121 | } | ||
122 | |||
123 | bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const | ||
124 | { | ||
125 | return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software; | ||
126 | } | ||
127 | |||
128 | std::string | ||
129 | AMDGPUDevice::getDataLayout() const | ||
130 | { | ||
131 | return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" | ||
132 | "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" | ||
133 | "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" | ||
134 | "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" | ||
135 | "-v512:512:512-v1024:1024:1024-v2048:2048:2048" | ||
136 | "-n8:16:32:64"); | ||
137 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILDevice.h b/src/gallium/drivers/radeon/AMDILDevice.h deleted file mode 100644 index 864fa0a3455..00000000000 --- a/src/gallium/drivers/radeon/AMDILDevice.h +++ /dev/null | |||
@@ -1,115 +0,0 @@ | |||
1 | //===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface for the subtarget data classes. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | // This file will define the interface that each generation needs to | ||
14 | // implement in order to correctly answer queries on the capabilities of the | ||
15 | // specific hardware. | ||
16 | //===----------------------------------------------------------------------===// | ||
17 | #ifndef _AMDILDEVICEIMPL_H_ | ||
18 | #define _AMDILDEVICEIMPL_H_ | ||
19 | #include "AMDIL.h" | ||
20 | #include "llvm/ADT/BitVector.h" | ||
21 | |||
22 | namespace llvm { | ||
23 | class AMDGPUSubtarget; | ||
24 | class MCStreamer; | ||
25 | //===----------------------------------------------------------------------===// | ||
26 | // Interface for data that is specific to a single device | ||
27 | //===----------------------------------------------------------------------===// | ||
28 | class AMDGPUDevice { | ||
29 | public: | ||
30 | AMDGPUDevice(AMDGPUSubtarget *ST); | ||
31 | virtual ~AMDGPUDevice(); | ||
32 | |||
33 | // Enum values for the various memory types. | ||
34 | enum { | ||
35 | RAW_UAV_ID = 0, | ||
36 | ARENA_UAV_ID = 1, | ||
37 | LDS_ID = 2, | ||
38 | GDS_ID = 3, | ||
39 | SCRATCH_ID = 4, | ||
40 | CONSTANT_ID = 5, | ||
41 | GLOBAL_ID = 6, | ||
42 | MAX_IDS = 7 | ||
43 | } IO_TYPE_IDS; | ||
44 | |||
45 | // Returns the max LDS size that the hardware supports. Size is in | ||
46 | // bytes. | ||
47 | virtual size_t getMaxLDSSize() const = 0; | ||
48 | |||
49 | // Returns the max GDS size that the hardware supports if the GDS is | ||
50 | // supported by the hardware. Size is in bytes. | ||
51 | virtual size_t getMaxGDSSize() const; | ||
52 | |||
53 | // Returns the max number of hardware constant address spaces that | ||
54 | // are supported by this device. | ||
55 | virtual size_t getMaxNumCBs() const; | ||
56 | |||
57 | // Returns the max number of bytes a single hardware constant buffer | ||
58 | // can support. Size is in bytes. | ||
59 | virtual size_t getMaxCBSize() const; | ||
60 | |||
61 | // Returns the max number of bytes allowed by the hardware scratch | ||
62 | // buffer. Size is in bytes. | ||
63 | virtual size_t getMaxScratchSize() const; | ||
64 | |||
65 | // Get the flag that corresponds to the device. | ||
66 | virtual uint32_t getDeviceFlag() const; | ||
67 | |||
68 | // Returns the number of work-items that exist in a single hardware | ||
69 | // wavefront. | ||
70 | virtual size_t getWavefrontSize() const = 0; | ||
71 | |||
72 | // Get the generational name of this specific device. | ||
73 | virtual uint32_t getGeneration() const = 0; | ||
74 | |||
75 | // Get the stack alignment of this specific device. | ||
76 | virtual uint32_t getStackAlignment() const; | ||
77 | |||
78 | // Get the resource ID for this specific device. | ||
79 | virtual uint32_t getResourceID(uint32_t DeviceID) const = 0; | ||
80 | |||
81 | // Get the max number of UAV's for this device. | ||
82 | virtual uint32_t getMaxNumUAVs() const = 0; | ||
83 | |||
84 | // API utilizing more detailed capabilities of each family of | ||
85 | // cards. If a capability is supported, then either usesHardware or | ||
86 | // usesSoftware returned true. If usesHardware returned true, then | ||
87 | // usesSoftware must return false for the same capability. Hardware | ||
88 | // execution means that the feature is done natively by the hardware | ||
89 | // and is not emulated by the softare. Software execution means | ||
90 | // that the feature could be done in the hardware, but there is | ||
91 | // software that emulates it with possibly using the hardware for | ||
92 | // support since the hardware does not fully comply with OpenCL | ||
93 | // specs. | ||
94 | bool isSupported(AMDGPUDeviceInfo::Caps Mode) const; | ||
95 | bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const; | ||
96 | bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const; | ||
97 | virtual std::string getDataLayout() const; | ||
98 | static const unsigned int MAX_LDS_SIZE_700 = 16384; | ||
99 | static const unsigned int MAX_LDS_SIZE_800 = 32768; | ||
100 | static const unsigned int WavefrontSize = 64; | ||
101 | static const unsigned int HalfWavefrontSize = 32; | ||
102 | static const unsigned int QuarterWavefrontSize = 16; | ||
103 | protected: | ||
104 | virtual void setCaps(); | ||
105 | llvm::BitVector mHWBits; | ||
106 | llvm::BitVector mSWBits; | ||
107 | AMDGPUSubtarget *mSTM; | ||
108 | uint32_t mDeviceFlag; | ||
109 | private: | ||
110 | AMDGPUDeviceInfo::ExecutionMode | ||
111 | getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const; | ||
112 | }; // AMDILDevice | ||
113 | |||
114 | } // namespace llvm | ||
115 | #endif // _AMDILDEVICEIMPL_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp deleted file mode 100644 index b2f7cfb3092..00000000000 --- a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | //===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Function that creates DeviceInfo from a device name and other information. | ||
11 | // | ||
12 | //==-----------------------------------------------------------------------===// | ||
13 | #include "AMDILDevices.h" | ||
14 | #include "AMDGPUSubtarget.h" | ||
15 | |||
16 | using namespace llvm; | ||
17 | namespace llvm { | ||
18 | namespace AMDGPUDeviceInfo { | ||
19 | AMDGPUDevice* | ||
20 | getDeviceFromName(const std::string &deviceName, AMDGPUSubtarget *ptr, | ||
21 | bool is64bit, bool is64on32bit) | ||
22 | { | ||
23 | if (deviceName.c_str()[2] == '7') { | ||
24 | switch (deviceName.c_str()[3]) { | ||
25 | case '1': | ||
26 | return new AMDGPU710Device(ptr); | ||
27 | case '7': | ||
28 | return new AMDGPU770Device(ptr); | ||
29 | default: | ||
30 | return new AMDGPU7XXDevice(ptr); | ||
31 | }; | ||
32 | } else if (deviceName == "cypress") { | ||
33 | #if DEBUG | ||
34 | assert(!is64bit && "This device does not support 64bit pointers!"); | ||
35 | assert(!is64on32bit && "This device does not support 64bit" | ||
36 | " on 32bit pointers!"); | ||
37 | #endif | ||
38 | return new AMDGPUCypressDevice(ptr); | ||
39 | } else if (deviceName == "juniper") { | ||
40 | #if DEBUG | ||
41 | assert(!is64bit && "This device does not support 64bit pointers!"); | ||
42 | assert(!is64on32bit && "This device does not support 64bit" | ||
43 | " on 32bit pointers!"); | ||
44 | #endif | ||
45 | return new AMDGPUEvergreenDevice(ptr); | ||
46 | } else if (deviceName == "redwood") { | ||
47 | #if DEBUG | ||
48 | assert(!is64bit && "This device does not support 64bit pointers!"); | ||
49 | assert(!is64on32bit && "This device does not support 64bit" | ||
50 | " on 32bit pointers!"); | ||
51 | #endif | ||
52 | return new AMDGPURedwoodDevice(ptr); | ||
53 | } else if (deviceName == "cedar") { | ||
54 | #if DEBUG | ||
55 | assert(!is64bit && "This device does not support 64bit pointers!"); | ||
56 | assert(!is64on32bit && "This device does not support 64bit" | ||
57 | " on 32bit pointers!"); | ||
58 | #endif | ||
59 | return new AMDGPUCedarDevice(ptr); | ||
60 | } else if (deviceName == "barts" | ||
61 | || deviceName == "turks") { | ||
62 | #if DEBUG | ||
63 | assert(!is64bit && "This device does not support 64bit pointers!"); | ||
64 | assert(!is64on32bit && "This device does not support 64bit" | ||
65 | " on 32bit pointers!"); | ||
66 | #endif | ||
67 | return new AMDGPUNIDevice(ptr); | ||
68 | } else if (deviceName == "cayman") { | ||
69 | #if DEBUG | ||
70 | assert(!is64bit && "This device does not support 64bit pointers!"); | ||
71 | assert(!is64on32bit && "This device does not support 64bit" | ||
72 | " on 32bit pointers!"); | ||
73 | #endif | ||
74 | return new AMDGPUCaymanDevice(ptr); | ||
75 | } else if (deviceName == "caicos") { | ||
76 | #if DEBUG | ||
77 | assert(!is64bit && "This device does not support 64bit pointers!"); | ||
78 | assert(!is64on32bit && "This device does not support 64bit" | ||
79 | " on 32bit pointers!"); | ||
80 | #endif | ||
81 | return new AMDGPUNIDevice(ptr); | ||
82 | } else if (deviceName == "SI") { | ||
83 | return new AMDGPUSIDevice(ptr); | ||
84 | } else { | ||
85 | #if DEBUG | ||
86 | assert(!is64bit && "This device does not support 64bit pointers!"); | ||
87 | assert(!is64on32bit && "This device does not support 64bit" | ||
88 | " on 32bit pointers!"); | ||
89 | #endif | ||
90 | return new AMDGPU7XXDevice(ptr); | ||
91 | } | ||
92 | } | ||
93 | } // End namespace AMDGPUDeviceInfo | ||
94 | } // End namespace llvm | ||
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.h b/src/gallium/drivers/radeon/AMDILDeviceInfo.h deleted file mode 100644 index 4fa021e3599..00000000000 --- a/src/gallium/drivers/radeon/AMDILDeviceInfo.h +++ /dev/null | |||
@@ -1,90 +0,0 @@ | |||
1 | //===-- AMDILDeviceInfo.h - Constants for describing devices --------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | #ifndef _AMDILDEVICEINFO_H_ | ||
10 | #define _AMDILDEVICEINFO_H_ | ||
11 | |||
12 | |||
13 | #include <string> | ||
14 | |||
15 | namespace llvm | ||
16 | { | ||
17 | class AMDGPUDevice; | ||
18 | class AMDGPUSubtarget; | ||
19 | namespace AMDGPUDeviceInfo | ||
20 | { | ||
21 | // Each Capabilities can be executed using a hardware instruction, | ||
22 | // emulated with a sequence of software instructions, or not | ||
23 | // supported at all. | ||
24 | enum ExecutionMode { | ||
25 | Unsupported = 0, // Unsupported feature on the card(Default value) | ||
26 | Software, // This is the execution mode that is set if the | ||
27 | // feature is emulated in software | ||
28 | Hardware // This execution mode is set if the feature exists | ||
29 | // natively in hardware | ||
30 | }; | ||
31 | |||
32 | // Any changes to this needs to have a corresponding update to the | ||
33 | // twiki page GPUMetadataABI | ||
34 | enum Caps { | ||
35 | HalfOps = 0x1, // Half float is supported or not. | ||
36 | DoubleOps = 0x2, // Double is supported or not. | ||
37 | ByteOps = 0x3, // Byte(char) is support or not. | ||
38 | ShortOps = 0x4, // Short is supported or not. | ||
39 | LongOps = 0x5, // Long is supported or not. | ||
40 | Images = 0x6, // Images are supported or not. | ||
41 | ByteStores = 0x7, // ByteStores available(!HD4XXX). | ||
42 | ConstantMem = 0x8, // Constant/CB memory. | ||
43 | LocalMem = 0x9, // Local/LDS memory. | ||
44 | PrivateMem = 0xA, // Scratch/Private/Stack memory. | ||
45 | RegionMem = 0xB, // OCL GDS Memory Extension. | ||
46 | FMA = 0xC, // Use HW FMA or SW FMA. | ||
47 | ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023. | ||
48 | MultiUAV = 0xE, // Use for UAV per Pointer 0-7. | ||
49 | Reserved0 = 0xF, // ReservedFlag | ||
50 | NoAlias = 0x10, // Cached loads. | ||
51 | Signed24BitOps = 0x11, // Peephole Optimization. | ||
52 | // Debug mode implies that no hardware features or optimizations | ||
53 | // are performned and that all memory access go through a single | ||
54 | // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX). | ||
55 | Debug = 0x12, // Debug mode is enabled. | ||
56 | CachedMem = 0x13, // Cached mem is available or not. | ||
57 | BarrierDetect = 0x14, // Detect duplicate barriers. | ||
58 | Reserved1 = 0x15, // Reserved flag | ||
59 | ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available. | ||
60 | ArenaVectors = 0x17, // Flag to specify if vector loads from arena work. | ||
61 | TmrReg = 0x18, // Flag to specify if Tmr register is supported. | ||
62 | NoInline = 0x19, // Flag to specify that no inlining should occur. | ||
63 | MacroDB = 0x1A, // Flag to specify that backend handles macrodb. | ||
64 | HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod. | ||
65 | ArenaUAV = 0x1C, // Flag to specify that arena uav is supported. | ||
66 | PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's. | ||
67 | // If more capabilities are required, then | ||
68 | // this number needs to be increased. | ||
69 | // All capabilities must come before this | ||
70 | // number. | ||
71 | MaxNumberCapabilities = 0x20 | ||
72 | }; | ||
73 | // These have to be in order with the older generations | ||
74 | // having the lower number enumerations. | ||
75 | enum Generation { | ||
76 | HD4XXX = 0, // 7XX based devices. | ||
77 | HD5XXX, // Evergreen based devices. | ||
78 | HD6XXX, // NI/Evergreen+ based devices. | ||
79 | HD7XXX, | ||
80 | HDTEST, // Experimental feature testing device. | ||
81 | HDNUMGEN | ||
82 | }; | ||
83 | |||
84 | |||
85 | AMDGPUDevice* | ||
86 | getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr, | ||
87 | bool is64bit = false, bool is64on32bit = false); | ||
88 | } // namespace AMDILDeviceInfo | ||
89 | } // namespace llvm | ||
90 | #endif // _AMDILDEVICEINFO_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDILDevices.h b/src/gallium/drivers/radeon/AMDILDevices.h deleted file mode 100644 index cfcc3304b4b..00000000000 --- a/src/gallium/drivers/radeon/AMDILDevices.h +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | //===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | #ifndef __AMDIL_DEVICES_H_ | ||
10 | #define __AMDIL_DEVICES_H_ | ||
11 | // Include all of the device specific header files | ||
12 | // This file is for Internal use only! | ||
13 | #include "AMDIL7XXDevice.h" | ||
14 | #include "AMDILDevice.h" | ||
15 | #include "AMDILEvergreenDevice.h" | ||
16 | #include "AMDILNIDevice.h" | ||
17 | #include "AMDILSIDevice.h" | ||
18 | |||
19 | #endif // _AMDIL_DEVICES_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp deleted file mode 100644 index 3532a28fe0f..00000000000 --- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp +++ /dev/null | |||
@@ -1,169 +0,0 @@ | |||
1 | //===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | #include "AMDILEvergreenDevice.h" | ||
10 | |||
11 | using namespace llvm; | ||
12 | |||
13 | AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST) | ||
14 | : AMDGPUDevice(ST) { | ||
15 | setCaps(); | ||
16 | std::string name = ST->getDeviceName(); | ||
17 | if (name == "cedar") { | ||
18 | mDeviceFlag = OCL_DEVICE_CEDAR; | ||
19 | } else if (name == "redwood") { | ||
20 | mDeviceFlag = OCL_DEVICE_REDWOOD; | ||
21 | } else if (name == "cypress") { | ||
22 | mDeviceFlag = OCL_DEVICE_CYPRESS; | ||
23 | } else { | ||
24 | mDeviceFlag = OCL_DEVICE_JUNIPER; | ||
25 | } | ||
26 | } | ||
27 | |||
28 | AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() { | ||
29 | } | ||
30 | |||
31 | size_t AMDGPUEvergreenDevice::getMaxLDSSize() const { | ||
32 | if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { | ||
33 | return MAX_LDS_SIZE_800; | ||
34 | } else { | ||
35 | return 0; | ||
36 | } | ||
37 | } | ||
38 | size_t AMDGPUEvergreenDevice::getMaxGDSSize() const { | ||
39 | if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { | ||
40 | return MAX_LDS_SIZE_800; | ||
41 | } else { | ||
42 | return 0; | ||
43 | } | ||
44 | } | ||
45 | uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const { | ||
46 | return 12; | ||
47 | } | ||
48 | |||
49 | uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const { | ||
50 | switch(id) { | ||
51 | default: | ||
52 | assert(0 && "ID type passed in is unknown!"); | ||
53 | break; | ||
54 | case CONSTANT_ID: | ||
55 | case RAW_UAV_ID: | ||
56 | return GLOBAL_RETURN_RAW_UAV_ID; | ||
57 | case GLOBAL_ID: | ||
58 | case ARENA_UAV_ID: | ||
59 | return DEFAULT_ARENA_UAV_ID; | ||
60 | case LDS_ID: | ||
61 | if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { | ||
62 | return DEFAULT_LDS_ID; | ||
63 | } else { | ||
64 | return DEFAULT_ARENA_UAV_ID; | ||
65 | } | ||
66 | case GDS_ID: | ||
67 | if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { | ||
68 | return DEFAULT_GDS_ID; | ||
69 | } else { | ||
70 | return DEFAULT_ARENA_UAV_ID; | ||
71 | } | ||
72 | case SCRATCH_ID: | ||
73 | if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) { | ||
74 | return DEFAULT_SCRATCH_ID; | ||
75 | } else { | ||
76 | return DEFAULT_ARENA_UAV_ID; | ||
77 | } | ||
78 | }; | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | size_t AMDGPUEvergreenDevice::getWavefrontSize() const { | ||
83 | return AMDGPUDevice::WavefrontSize; | ||
84 | } | ||
85 | |||
86 | uint32_t AMDGPUEvergreenDevice::getGeneration() const { | ||
87 | return AMDGPUDeviceInfo::HD5XXX; | ||
88 | } | ||
89 | |||
90 | void AMDGPUEvergreenDevice::setCaps() { | ||
91 | mSWBits.set(AMDGPUDeviceInfo::ArenaSegment); | ||
92 | mHWBits.set(AMDGPUDeviceInfo::ArenaUAV); | ||
93 | mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod); | ||
94 | mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod); | ||
95 | mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps); | ||
96 | if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) { | ||
97 | mHWBits.set(AMDGPUDeviceInfo::ByteStores); | ||
98 | } | ||
99 | if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { | ||
100 | mSWBits.set(AMDGPUDeviceInfo::LocalMem); | ||
101 | mSWBits.set(AMDGPUDeviceInfo::RegionMem); | ||
102 | } else { | ||
103 | mHWBits.set(AMDGPUDeviceInfo::LocalMem); | ||
104 | mHWBits.set(AMDGPUDeviceInfo::RegionMem); | ||
105 | } | ||
106 | mHWBits.set(AMDGPUDeviceInfo::Images); | ||
107 | if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) { | ||
108 | mHWBits.set(AMDGPUDeviceInfo::NoAlias); | ||
109 | } | ||
110 | mHWBits.set(AMDGPUDeviceInfo::CachedMem); | ||
111 | if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) { | ||
112 | mHWBits.set(AMDGPUDeviceInfo::MultiUAV); | ||
113 | } | ||
114 | mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps); | ||
115 | mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps); | ||
116 | mHWBits.set(AMDGPUDeviceInfo::ArenaVectors); | ||
117 | mHWBits.set(AMDGPUDeviceInfo::LongOps); | ||
118 | mSWBits.reset(AMDGPUDeviceInfo::LongOps); | ||
119 | mHWBits.set(AMDGPUDeviceInfo::TmrReg); | ||
120 | } | ||
121 | |||
122 | AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST) | ||
123 | : AMDGPUEvergreenDevice(ST) { | ||
124 | setCaps(); | ||
125 | } | ||
126 | |||
127 | AMDGPUCypressDevice::~AMDGPUCypressDevice() { | ||
128 | } | ||
129 | |||
130 | void AMDGPUCypressDevice::setCaps() { | ||
131 | if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { | ||
132 | mHWBits.set(AMDGPUDeviceInfo::DoubleOps); | ||
133 | mHWBits.set(AMDGPUDeviceInfo::FMA); | ||
134 | } | ||
135 | } | ||
136 | |||
137 | |||
138 | AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST) | ||
139 | : AMDGPUEvergreenDevice(ST) { | ||
140 | setCaps(); | ||
141 | } | ||
142 | |||
143 | AMDGPUCedarDevice::~AMDGPUCedarDevice() { | ||
144 | } | ||
145 | |||
146 | void AMDGPUCedarDevice::setCaps() { | ||
147 | mSWBits.set(AMDGPUDeviceInfo::FMA); | ||
148 | } | ||
149 | |||
150 | size_t AMDGPUCedarDevice::getWavefrontSize() const { | ||
151 | return AMDGPUDevice::QuarterWavefrontSize; | ||
152 | } | ||
153 | |||
154 | AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST) | ||
155 | : AMDGPUEvergreenDevice(ST) { | ||
156 | setCaps(); | ||
157 | } | ||
158 | |||
159 | AMDGPURedwoodDevice::~AMDGPURedwoodDevice() | ||
160 | { | ||
161 | } | ||
162 | |||
163 | void AMDGPURedwoodDevice::setCaps() { | ||
164 | mSWBits.set(AMDGPUDeviceInfo::FMA); | ||
165 | } | ||
166 | |||
167 | size_t AMDGPURedwoodDevice::getWavefrontSize() const { | ||
168 | return AMDGPUDevice::HalfWavefrontSize; | ||
169 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h deleted file mode 100644 index cde1472f934..00000000000 --- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h +++ /dev/null | |||
@@ -1,87 +0,0 @@ | |||
1 | //==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface for the subtarget data classes. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | // This file will define the interface that each generation needs to | ||
14 | // implement in order to correctly answer queries on the capabilities of the | ||
15 | // specific hardware. | ||
16 | //===----------------------------------------------------------------------===// | ||
17 | #ifndef _AMDILEVERGREENDEVICE_H_ | ||
18 | #define _AMDILEVERGREENDEVICE_H_ | ||
19 | #include "AMDILDevice.h" | ||
20 | #include "AMDGPUSubtarget.h" | ||
21 | |||
22 | namespace llvm { | ||
23 | class AMDGPUSubtarget; | ||
24 | //===----------------------------------------------------------------------===// | ||
25 | // Evergreen generation of devices and their respective sub classes | ||
26 | //===----------------------------------------------------------------------===// | ||
27 | |||
28 | |||
29 | // The AMDGPUEvergreenDevice is the base device class for all of the Evergreen | ||
30 | // series of cards. This class contains information required to differentiate | ||
31 | // the Evergreen device from the generic AMDGPUDevice. This device represents | ||
32 | // that capabilities of the 'Juniper' cards, also known as the HD57XX. | ||
33 | class AMDGPUEvergreenDevice : public AMDGPUDevice { | ||
34 | public: | ||
35 | AMDGPUEvergreenDevice(AMDGPUSubtarget *ST); | ||
36 | virtual ~AMDGPUEvergreenDevice(); | ||
37 | virtual size_t getMaxLDSSize() const; | ||
38 | virtual size_t getMaxGDSSize() const; | ||
39 | virtual size_t getWavefrontSize() const; | ||
40 | virtual uint32_t getGeneration() const; | ||
41 | virtual uint32_t getMaxNumUAVs() const; | ||
42 | virtual uint32_t getResourceID(uint32_t) const; | ||
43 | protected: | ||
44 | virtual void setCaps(); | ||
45 | }; // AMDGPUEvergreenDevice | ||
46 | |||
47 | // The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has | ||
48 | // support for double precision operations. This device is used to represent | ||
49 | // both the Cypress and Hemlock cards, which are commercially known as HD58XX | ||
50 | // and HD59XX cards. | ||
51 | class AMDGPUCypressDevice : public AMDGPUEvergreenDevice { | ||
52 | public: | ||
53 | AMDGPUCypressDevice(AMDGPUSubtarget *ST); | ||
54 | virtual ~AMDGPUCypressDevice(); | ||
55 | private: | ||
56 | virtual void setCaps(); | ||
57 | }; // AMDGPUCypressDevice | ||
58 | |||
59 | |||
60 | // The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based | ||
61 | // devices. This class differs from the base AMDGPUEvergreenDevice in that the | ||
62 | // device is a ~quarter of the 'Juniper'. These are commercially known as the | ||
63 | // HD54XX and HD53XX series of cards. | ||
64 | class AMDGPUCedarDevice : public AMDGPUEvergreenDevice { | ||
65 | public: | ||
66 | AMDGPUCedarDevice(AMDGPUSubtarget *ST); | ||
67 | virtual ~AMDGPUCedarDevice(); | ||
68 | virtual size_t getWavefrontSize() const; | ||
69 | private: | ||
70 | virtual void setCaps(); | ||
71 | }; // AMDGPUCedarDevice | ||
72 | |||
73 | // The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based | ||
74 | // devices. This class differs from the base class, in that these devices are | ||
75 | // considered about half of a 'Juniper' device. These are commercially known as | ||
76 | // the HD55XX and HD56XX series of cards. | ||
77 | class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice { | ||
78 | public: | ||
79 | AMDGPURedwoodDevice(AMDGPUSubtarget *ST); | ||
80 | virtual ~AMDGPURedwoodDevice(); | ||
81 | virtual size_t getWavefrontSize() const; | ||
82 | private: | ||
83 | virtual void setCaps(); | ||
84 | }; // AMDGPURedwoodDevice | ||
85 | |||
86 | } // namespace llvm | ||
87 | #endif // _AMDGPUEVERGREENDEVICE_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDILFrameLowering.cpp b/src/gallium/drivers/radeon/AMDILFrameLowering.cpp deleted file mode 100644 index f2a0fe5a39c..00000000000 --- a/src/gallium/drivers/radeon/AMDILFrameLowering.cpp +++ /dev/null | |||
@@ -1,53 +0,0 @@ | |||
1 | //===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface to describe a layout of a stack frame on a AMDIL target machine | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | #include "AMDILFrameLowering.h" | ||
14 | #include "llvm/CodeGen/MachineFrameInfo.h" | ||
15 | |||
16 | using namespace llvm; | ||
17 | AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, | ||
18 | int LAO, unsigned TransAl) | ||
19 | : TargetFrameLowering(D, StackAl, LAO, TransAl) | ||
20 | { | ||
21 | } | ||
22 | |||
23 | AMDGPUFrameLowering::~AMDGPUFrameLowering() | ||
24 | { | ||
25 | } | ||
26 | |||
27 | /// getFrameIndexOffset - Returns the displacement from the frame register to | ||
28 | /// the stack frame of the specified index. | ||
29 | int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, | ||
30 | int FI) const { | ||
31 | const MachineFrameInfo *MFI = MF.getFrameInfo(); | ||
32 | return MFI->getObjectOffset(FI); | ||
33 | } | ||
34 | |||
35 | const TargetFrameLowering::SpillSlot * | ||
36 | AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const | ||
37 | { | ||
38 | NumEntries = 0; | ||
39 | return 0; | ||
40 | } | ||
41 | void | ||
42 | AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const | ||
43 | { | ||
44 | } | ||
45 | void | ||
46 | AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const | ||
47 | { | ||
48 | } | ||
49 | bool | ||
50 | AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const | ||
51 | { | ||
52 | return false; | ||
53 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILFrameLowering.h b/src/gallium/drivers/radeon/AMDILFrameLowering.h deleted file mode 100644 index 934ee46821d..00000000000 --- a/src/gallium/drivers/radeon/AMDILFrameLowering.h +++ /dev/null | |||
@@ -1,46 +0,0 @@ | |||
1 | //===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // The LLVM Compiler Infrastructure | ||
11 | // | ||
12 | // This file is distributed under the University of Illinois Open Source | ||
13 | // License. See LICENSE.TXT for details. | ||
14 | // | ||
15 | //===----------------------------------------------------------------------===// | ||
16 | // | ||
17 | // Interface to describe a layout of a stack frame on a AMDIL target machine | ||
18 | // | ||
19 | //===----------------------------------------------------------------------===// | ||
20 | #ifndef _AMDILFRAME_LOWERING_H_ | ||
21 | #define _AMDILFRAME_LOWERING_H_ | ||
22 | |||
23 | #include "llvm/CodeGen/MachineFunction.h" | ||
24 | #include "llvm/Target/TargetFrameLowering.h" | ||
25 | |||
26 | /// Information about the stack frame layout on the AMDGPU targets. It holds | ||
27 | /// the direction of the stack growth, the known stack alignment on entry to | ||
28 | /// each function, and the offset to the locals area. | ||
29 | /// See TargetFrameInfo for more comments. | ||
30 | |||
31 | namespace llvm { | ||
32 | class AMDGPUFrameLowering : public TargetFrameLowering { | ||
33 | public: | ||
34 | AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned | ||
35 | TransAl = 1); | ||
36 | virtual ~AMDGPUFrameLowering(); | ||
37 | virtual int getFrameIndexOffset(const MachineFunction &MF, | ||
38 | int FI) const; | ||
39 | virtual const SpillSlot * | ||
40 | getCalleeSavedSpillSlots(unsigned &NumEntries) const; | ||
41 | virtual void emitPrologue(MachineFunction &MF) const; | ||
42 | virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; | ||
43 | virtual bool hasFP(const MachineFunction &MF) const; | ||
44 | }; // class AMDGPUFrameLowering | ||
45 | } // namespace llvm | ||
46 | #endif // _AMDILFRAME_LOWERING_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp deleted file mode 100644 index 807113134d2..00000000000 --- a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp +++ /dev/null | |||
@@ -1,395 +0,0 @@ | |||
1 | //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file defines an instruction selector for the AMDIL target. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | #include "AMDGPUInstrInfo.h" | ||
14 | #include "AMDGPUISelLowering.h" // For AMDGPUISD | ||
15 | #include "AMDGPURegisterInfo.h" | ||
16 | #include "AMDILDevices.h" | ||
17 | #include "AMDILUtilityFunctions.h" | ||
18 | #include "llvm/ADT/ValueMap.h" | ||
19 | #include "llvm/CodeGen/PseudoSourceValue.h" | ||
20 | #include "llvm/CodeGen/SelectionDAGISel.h" | ||
21 | #include "llvm/Support/Compiler.h" | ||
22 | #include <list> | ||
23 | #include <queue> | ||
24 | |||
25 | using namespace llvm; | ||
26 | |||
27 | //===----------------------------------------------------------------------===// | ||
28 | // Instruction Selector Implementation | ||
29 | //===----------------------------------------------------------------------===// | ||
30 | |||
31 | //===----------------------------------------------------------------------===// | ||
32 | // AMDGPUDAGToDAGISel - AMDGPU specific code to select AMDGPU machine instructions | ||
33 | // //for SelectionDAG operations. | ||
34 | // | ||
35 | namespace { | ||
36 | class AMDGPUDAGToDAGISel : public SelectionDAGISel { | ||
37 | // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can | ||
38 | // make the right decision when generating code for different targets. | ||
39 | const AMDGPUSubtarget &Subtarget; | ||
40 | public: | ||
41 | AMDGPUDAGToDAGISel(TargetMachine &TM); | ||
42 | virtual ~AMDGPUDAGToDAGISel(); | ||
43 | |||
44 | SDNode *Select(SDNode *N); | ||
45 | virtual const char *getPassName() const; | ||
46 | |||
47 | private: | ||
48 | inline SDValue getSmallIPtrImm(unsigned Imm); | ||
49 | |||
50 | // Complex pattern selectors | ||
51 | bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); | ||
52 | bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); | ||
53 | bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); | ||
54 | |||
55 | static bool checkType(const Value *ptr, unsigned int addrspace); | ||
56 | static const Value *getBasePointerValue(const Value *V); | ||
57 | |||
58 | static bool isGlobalStore(const StoreSDNode *N); | ||
59 | static bool isPrivateStore(const StoreSDNode *N); | ||
60 | static bool isLocalStore(const StoreSDNode *N); | ||
61 | static bool isRegionStore(const StoreSDNode *N); | ||
62 | |||
63 | static bool isCPLoad(const LoadSDNode *N); | ||
64 | static bool isConstantLoad(const LoadSDNode *N, int cbID); | ||
65 | static bool isGlobalLoad(const LoadSDNode *N); | ||
66 | static bool isPrivateLoad(const LoadSDNode *N); | ||
67 | static bool isLocalLoad(const LoadSDNode *N); | ||
68 | static bool isRegionLoad(const LoadSDNode *N); | ||
69 | |||
70 | bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); | ||
71 | bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); | ||
72 | bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); | ||
73 | |||
74 | // Include the pieces autogenerated from the target description. | ||
75 | #include "AMDGPUGenDAGISel.inc" | ||
76 | }; | ||
77 | } // end anonymous namespace | ||
78 | |||
79 | // createAMDGPUISelDag - This pass converts a legalized DAG into a AMDGPU-specific | ||
80 | // DAG, ready for instruction scheduling. | ||
81 | // | ||
82 | FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM | ||
83 | ) { | ||
84 | return new AMDGPUDAGToDAGISel(TM); | ||
85 | } | ||
86 | |||
87 | AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM | ||
88 | ) | ||
89 | : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) | ||
90 | { | ||
91 | } | ||
92 | |||
93 | AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { | ||
94 | } | ||
95 | |||
96 | SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { | ||
97 | return CurDAG->getTargetConstant(Imm, MVT::i32); | ||
98 | } | ||
99 | |||
100 | bool AMDGPUDAGToDAGISel::SelectADDRParam( | ||
101 | SDValue Addr, SDValue& R1, SDValue& R2) { | ||
102 | |||
103 | if (Addr.getOpcode() == ISD::FrameIndex) { | ||
104 | if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { | ||
105 | R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); | ||
106 | R2 = CurDAG->getTargetConstant(0, MVT::i32); | ||
107 | } else { | ||
108 | R1 = Addr; | ||
109 | R2 = CurDAG->getTargetConstant(0, MVT::i32); | ||
110 | } | ||
111 | } else if (Addr.getOpcode() == ISD::ADD) { | ||
112 | R1 = Addr.getOperand(0); | ||
113 | R2 = Addr.getOperand(1); | ||
114 | } else { | ||
115 | R1 = Addr; | ||
116 | R2 = CurDAG->getTargetConstant(0, MVT::i32); | ||
117 | } | ||
118 | return true; | ||
119 | } | ||
120 | |||
121 | bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { | ||
122 | if (Addr.getOpcode() == ISD::TargetExternalSymbol || | ||
123 | Addr.getOpcode() == ISD::TargetGlobalAddress) { | ||
124 | return false; | ||
125 | } | ||
126 | return SelectADDRParam(Addr, R1, R2); | ||
127 | } | ||
128 | |||
129 | |||
130 | bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { | ||
131 | if (Addr.getOpcode() == ISD::TargetExternalSymbol || | ||
132 | Addr.getOpcode() == ISD::TargetGlobalAddress) { | ||
133 | return false; | ||
134 | } | ||
135 | |||
136 | if (Addr.getOpcode() == ISD::FrameIndex) { | ||
137 | if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { | ||
138 | R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); | ||
139 | R2 = CurDAG->getTargetConstant(0, MVT::i64); | ||
140 | } else { | ||
141 | R1 = Addr; | ||
142 | R2 = CurDAG->getTargetConstant(0, MVT::i64); | ||
143 | } | ||
144 | } else if (Addr.getOpcode() == ISD::ADD) { | ||
145 | R1 = Addr.getOperand(0); | ||
146 | R2 = Addr.getOperand(1); | ||
147 | } else { | ||
148 | R1 = Addr; | ||
149 | R2 = CurDAG->getTargetConstant(0, MVT::i64); | ||
150 | } | ||
151 | return true; | ||
152 | } | ||
153 | |||
154 | SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { | ||
155 | unsigned int Opc = N->getOpcode(); | ||
156 | if (N->isMachineOpcode()) { | ||
157 | return NULL; // Already selected. | ||
158 | } | ||
159 | switch (Opc) { | ||
160 | default: break; | ||
161 | case ISD::FrameIndex: | ||
162 | { | ||
163 | if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) { | ||
164 | unsigned int FI = FIN->getIndex(); | ||
165 | EVT OpVT = N->getValueType(0); | ||
166 | unsigned int NewOpc = AMDGPU::COPY; | ||
167 | SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); | ||
168 | return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI); | ||
169 | } | ||
170 | } | ||
171 | break; | ||
172 | } | ||
173 | return SelectCode(N); | ||
174 | } | ||
175 | |||
176 | bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { | ||
177 | if (!ptr) { | ||
178 | return false; | ||
179 | } | ||
180 | Type *ptrType = ptr->getType(); | ||
181 | return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace; | ||
182 | } | ||
183 | |||
184 | const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) | ||
185 | { | ||
186 | if (!V) { | ||
187 | return NULL; | ||
188 | } | ||
189 | const Value *ret = NULL; | ||
190 | ValueMap<const Value *, bool> ValueBitMap; | ||
191 | std::queue<const Value *, std::list<const Value *> > ValueQueue; | ||
192 | ValueQueue.push(V); | ||
193 | while (!ValueQueue.empty()) { | ||
194 | V = ValueQueue.front(); | ||
195 | if (ValueBitMap.find(V) == ValueBitMap.end()) { | ||
196 | ValueBitMap[V] = true; | ||
197 | if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) { | ||
198 | ret = V; | ||
199 | break; | ||
200 | } else if (dyn_cast<GlobalVariable>(V)) { | ||
201 | ret = V; | ||
202 | break; | ||
203 | } else if (dyn_cast<Constant>(V)) { | ||
204 | const ConstantExpr *CE = dyn_cast<ConstantExpr>(V); | ||
205 | if (CE) { | ||
206 | ValueQueue.push(CE->getOperand(0)); | ||
207 | } | ||
208 | } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { | ||
209 | ret = AI; | ||
210 | break; | ||
211 | } else if (const Instruction *I = dyn_cast<Instruction>(V)) { | ||
212 | uint32_t numOps = I->getNumOperands(); | ||
213 | for (uint32_t x = 0; x < numOps; ++x) { | ||
214 | ValueQueue.push(I->getOperand(x)); | ||
215 | } | ||
216 | } else { | ||
217 | // assert(0 && "Found a Value that we didn't know how to handle!"); | ||
218 | } | ||
219 | } | ||
220 | ValueQueue.pop(); | ||
221 | } | ||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { | ||
226 | return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); | ||
227 | } | ||
228 | |||
229 | bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { | ||
230 | return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS) | ||
231 | && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS) | ||
232 | && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)); | ||
233 | } | ||
234 | |||
235 | bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { | ||
236 | return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); | ||
237 | } | ||
238 | |||
239 | bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { | ||
240 | return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); | ||
241 | } | ||
242 | |||
243 | bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { | ||
244 | if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) { | ||
245 | return true; | ||
246 | } | ||
247 | MachineMemOperand *MMO = N->getMemOperand(); | ||
248 | const Value *V = MMO->getValue(); | ||
249 | const Value *BV = getBasePointerValue(V); | ||
250 | if (MMO | ||
251 | && MMO->getValue() | ||
252 | && ((V && dyn_cast<GlobalValue>(V)) | ||
253 | || (BV && dyn_cast<GlobalValue>( | ||
254 | getBasePointerValue(MMO->getValue()))))) { | ||
255 | return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS); | ||
256 | } else { | ||
257 | return false; | ||
258 | } | ||
259 | } | ||
260 | |||
261 | bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) { | ||
262 | return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); | ||
263 | } | ||
264 | |||
265 | bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) { | ||
266 | return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); | ||
267 | } | ||
268 | |||
269 | bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) { | ||
270 | return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); | ||
271 | } | ||
272 | |||
273 | bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) { | ||
274 | MachineMemOperand *MMO = N->getMemOperand(); | ||
275 | if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { | ||
276 | if (MMO) { | ||
277 | const Value *V = MMO->getValue(); | ||
278 | const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V); | ||
279 | if (PSV && PSV == PseudoSourceValue::getConstantPool()) { | ||
280 | return true; | ||
281 | } | ||
282 | } | ||
283 | } | ||
284 | return false; | ||
285 | } | ||
286 | |||
287 | bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) { | ||
288 | if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { | ||
289 | // Check to make sure we are not a constant pool load or a constant load | ||
290 | // that is marked as a private load | ||
291 | if (isCPLoad(N) || isConstantLoad(N, -1)) { | ||
292 | return false; | ||
293 | } | ||
294 | } | ||
295 | if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS) | ||
296 | && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS) | ||
297 | && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS) | ||
298 | && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS) | ||
299 | && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS) | ||
300 | && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) | ||
301 | { | ||
302 | return true; | ||
303 | } | ||
304 | return false; | ||
305 | } | ||
306 | |||
307 | const char *AMDGPUDAGToDAGISel::getPassName() const { | ||
308 | return "AMDGPU DAG->DAG Pattern Instruction Selection"; | ||
309 | } | ||
310 | |||
311 | #ifdef DEBUGTMP | ||
312 | #undef INT64_C | ||
313 | #endif | ||
314 | #undef DEBUGTMP | ||
315 | |||
316 | ///==== AMDGPU Functions ====/// | ||
317 | |||
318 | bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, | ||
319 | SDValue& Offset) { | ||
320 | if (Addr.getOpcode() == ISD::TargetExternalSymbol || | ||
321 | Addr.getOpcode() == ISD::TargetGlobalAddress) { | ||
322 | return false; | ||
323 | } | ||
324 | |||
325 | |||
326 | if (Addr.getOpcode() == ISD::ADD) { | ||
327 | bool Match = false; | ||
328 | |||
329 | // Find the base ptr and the offset | ||
330 | for (unsigned i = 0; i < Addr.getNumOperands(); i++) { | ||
331 | SDValue Arg = Addr.getOperand(i); | ||
332 | ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg); | ||
333 | // This arg isn't a constant so it must be the base PTR. | ||
334 | if (!OffsetNode) { | ||
335 | Base = Addr.getOperand(i); | ||
336 | continue; | ||
337 | } | ||
338 | // Check if the constant argument fits in 8-bits. The offset is in bytes | ||
339 | // so we need to convert it to dwords. | ||
340 | if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) { | ||
341 | Match = true; | ||
342 | Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2, | ||
343 | MVT::i32); | ||
344 | } | ||
345 | } | ||
346 | return Match; | ||
347 | } | ||
348 | |||
349 | // Default case, no offset | ||
350 | Base = Addr; | ||
351 | Offset = CurDAG->getTargetConstant(0, MVT::i32); | ||
352 | return true; | ||
353 | } | ||
354 | |||
355 | bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, | ||
356 | SDValue &Offset) | ||
357 | { | ||
358 | ConstantSDNode * IMMOffset; | ||
359 | |||
360 | if (Addr.getOpcode() == ISD::ADD | ||
361 | && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) | ||
362 | && isInt<16>(IMMOffset->getZExtValue())) { | ||
363 | |||
364 | Base = Addr.getOperand(0); | ||
365 | Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); | ||
366 | return true; | ||
367 | // If the pointer address is constant, we can move it to the offset field. | ||
368 | } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) | ||
369 | && isInt<16>(IMMOffset->getZExtValue())) { | ||
370 | Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), | ||
371 | CurDAG->getEntryNode().getDebugLoc(), | ||
372 | AMDGPU::ZERO, MVT::i32); | ||
373 | Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); | ||
374 | return true; | ||
375 | } | ||
376 | |||
377 | // Default case, no offset | ||
378 | Base = Addr; | ||
379 | Offset = CurDAG->getTargetConstant(0, MVT::i32); | ||
380 | return true; | ||
381 | } | ||
382 | |||
383 | bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, | ||
384 | SDValue& Offset) { | ||
385 | if (Addr.getOpcode() == ISD::TargetExternalSymbol || | ||
386 | Addr.getOpcode() == ISD::TargetGlobalAddress || | ||
387 | Addr.getOpcode() != ISD::ADD) { | ||
388 | return false; | ||
389 | } | ||
390 | |||
391 | Base = Addr.getOperand(0); | ||
392 | Offset = Addr.getOperand(1); | ||
393 | |||
394 | return true; | ||
395 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp deleted file mode 100644 index 993025c3d47..00000000000 --- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp +++ /dev/null | |||
@@ -1,677 +0,0 @@ | |||
1 | //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains TargetLowering functions borrowed from AMDLI. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #include "AMDGPUISelLowering.h" | ||
15 | #include "AMDGPURegisterInfo.h" | ||
16 | #include "AMDILDevices.h" | ||
17 | #include "AMDILIntrinsicInfo.h" | ||
18 | #include "AMDGPUSubtarget.h" | ||
19 | #include "AMDILUtilityFunctions.h" | ||
20 | #include "llvm/CallingConv.h" | ||
21 | #include "llvm/CodeGen/MachineFrameInfo.h" | ||
22 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
23 | #include "llvm/CodeGen/PseudoSourceValue.h" | ||
24 | #include "llvm/CodeGen/SelectionDAG.h" | ||
25 | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||
26 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" | ||
27 | #include "llvm/DerivedTypes.h" | ||
28 | #include "llvm/Instructions.h" | ||
29 | #include "llvm/Intrinsics.h" | ||
30 | #include "llvm/Support/raw_ostream.h" | ||
31 | #include "llvm/Target/TargetInstrInfo.h" | ||
32 | #include "llvm/Target/TargetOptions.h" | ||
33 | |||
34 | using namespace llvm; | ||
35 | //===----------------------------------------------------------------------===// | ||
36 | // Calling Convention Implementation | ||
37 | //===----------------------------------------------------------------------===// | ||
38 | #include "AMDGPUGenCallingConv.inc" | ||
39 | |||
40 | //===----------------------------------------------------------------------===// | ||
41 | // TargetLowering Implementation Help Functions End | ||
42 | //===----------------------------------------------------------------------===// | ||
43 | |||
44 | //===----------------------------------------------------------------------===// | ||
45 | // TargetLowering Class Implementation Begins | ||
46 | //===----------------------------------------------------------------------===// | ||
47 | void AMDGPUTargetLowering::InitAMDILLowering() | ||
48 | { | ||
49 | int types[] = | ||
50 | { | ||
51 | (int)MVT::i8, | ||
52 | (int)MVT::i16, | ||
53 | (int)MVT::i32, | ||
54 | (int)MVT::f32, | ||
55 | (int)MVT::f64, | ||
56 | (int)MVT::i64, | ||
57 | (int)MVT::v2i8, | ||
58 | (int)MVT::v4i8, | ||
59 | (int)MVT::v2i16, | ||
60 | (int)MVT::v4i16, | ||
61 | (int)MVT::v4f32, | ||
62 | (int)MVT::v4i32, | ||
63 | (int)MVT::v2f32, | ||
64 | (int)MVT::v2i32, | ||
65 | (int)MVT::v2f64, | ||
66 | (int)MVT::v2i64 | ||
67 | }; | ||
68 | |||
69 | int IntTypes[] = | ||
70 | { | ||
71 | (int)MVT::i8, | ||
72 | (int)MVT::i16, | ||
73 | (int)MVT::i32, | ||
74 | (int)MVT::i64 | ||
75 | }; | ||
76 | |||
77 | int FloatTypes[] = | ||
78 | { | ||
79 | (int)MVT::f32, | ||
80 | (int)MVT::f64 | ||
81 | }; | ||
82 | |||
83 | int VectorTypes[] = | ||
84 | { | ||
85 | (int)MVT::v2i8, | ||
86 | (int)MVT::v4i8, | ||
87 | (int)MVT::v2i16, | ||
88 | (int)MVT::v4i16, | ||
89 | (int)MVT::v4f32, | ||
90 | (int)MVT::v4i32, | ||
91 | (int)MVT::v2f32, | ||
92 | (int)MVT::v2i32, | ||
93 | (int)MVT::v2f64, | ||
94 | (int)MVT::v2i64 | ||
95 | }; | ||
96 | size_t numTypes = sizeof(types) / sizeof(*types); | ||
97 | size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); | ||
98 | size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); | ||
99 | size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); | ||
100 | |||
101 | const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>(); | ||
102 | // These are the current register classes that are | ||
103 | // supported | ||
104 | |||
105 | for (unsigned int x = 0; x < numTypes; ++x) { | ||
106 | MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; | ||
107 | |||
108 | //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types | ||
109 | // We cannot sextinreg, expand to shifts | ||
110 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); | ||
111 | setOperationAction(ISD::SUBE, VT, Expand); | ||
112 | setOperationAction(ISD::SUBC, VT, Expand); | ||
113 | setOperationAction(ISD::ADDE, VT, Expand); | ||
114 | setOperationAction(ISD::ADDC, VT, Expand); | ||
115 | setOperationAction(ISD::BRCOND, VT, Custom); | ||
116 | setOperationAction(ISD::BR_JT, VT, Expand); | ||
117 | setOperationAction(ISD::BRIND, VT, Expand); | ||
118 | // TODO: Implement custom UREM/SREM routines | ||
119 | setOperationAction(ISD::SREM, VT, Expand); | ||
120 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); | ||
121 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); | ||
122 | if (VT != MVT::i64 && VT != MVT::v2i64) { | ||
123 | setOperationAction(ISD::SDIV, VT, Custom); | ||
124 | } | ||
125 | } | ||
126 | for (unsigned int x = 0; x < numFloatTypes; ++x) { | ||
127 | MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; | ||
128 | |||
129 | // IL does not have these operations for floating point types | ||
130 | setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); | ||
131 | setOperationAction(ISD::SETOLT, VT, Expand); | ||
132 | setOperationAction(ISD::SETOGE, VT, Expand); | ||
133 | setOperationAction(ISD::SETOGT, VT, Expand); | ||
134 | setOperationAction(ISD::SETOLE, VT, Expand); | ||
135 | setOperationAction(ISD::SETULT, VT, Expand); | ||
136 | setOperationAction(ISD::SETUGE, VT, Expand); | ||
137 | setOperationAction(ISD::SETUGT, VT, Expand); | ||
138 | setOperationAction(ISD::SETULE, VT, Expand); | ||
139 | } | ||
140 | |||
141 | for (unsigned int x = 0; x < numIntTypes; ++x) { | ||
142 | MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; | ||
143 | |||
144 | // GPU also does not have divrem function for signed or unsigned | ||
145 | setOperationAction(ISD::SDIVREM, VT, Expand); | ||
146 | |||
147 | // GPU does not have [S|U]MUL_LOHI functions as a single instruction | ||
148 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); | ||
149 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); | ||
150 | |||
151 | // GPU doesn't have a rotl, rotr, or byteswap instruction | ||
152 | setOperationAction(ISD::ROTR, VT, Expand); | ||
153 | setOperationAction(ISD::BSWAP, VT, Expand); | ||
154 | |||
155 | // GPU doesn't have any counting operators | ||
156 | setOperationAction(ISD::CTPOP, VT, Expand); | ||
157 | setOperationAction(ISD::CTTZ, VT, Expand); | ||
158 | setOperationAction(ISD::CTLZ, VT, Expand); | ||
159 | } | ||
160 | |||
161 | for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) | ||
162 | { | ||
163 | MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; | ||
164 | |||
165 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); | ||
166 | setOperationAction(ISD::SDIVREM, VT, Expand); | ||
167 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); | ||
168 | // setOperationAction(ISD::VSETCC, VT, Expand); | ||
169 | setOperationAction(ISD::SELECT_CC, VT, Expand); | ||
170 | |||
171 | } | ||
172 | if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) { | ||
173 | setOperationAction(ISD::MULHU, MVT::i64, Expand); | ||
174 | setOperationAction(ISD::MULHU, MVT::v2i64, Expand); | ||
175 | setOperationAction(ISD::MULHS, MVT::i64, Expand); | ||
176 | setOperationAction(ISD::MULHS, MVT::v2i64, Expand); | ||
177 | setOperationAction(ISD::ADD, MVT::v2i64, Expand); | ||
178 | setOperationAction(ISD::SREM, MVT::v2i64, Expand); | ||
179 | setOperationAction(ISD::Constant , MVT::i64 , Legal); | ||
180 | setOperationAction(ISD::SDIV, MVT::v2i64, Expand); | ||
181 | setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); | ||
182 | setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); | ||
183 | setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); | ||
184 | setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); | ||
185 | } | ||
186 | if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) { | ||
187 | // we support loading/storing v2f64 but not operations on the type | ||
188 | setOperationAction(ISD::FADD, MVT::v2f64, Expand); | ||
189 | setOperationAction(ISD::FSUB, MVT::v2f64, Expand); | ||
190 | setOperationAction(ISD::FMUL, MVT::v2f64, Expand); | ||
191 | setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); | ||
192 | setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); | ||
193 | setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); | ||
194 | // We want to expand vector conversions into their scalar | ||
195 | // counterparts. | ||
196 | setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); | ||
197 | setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); | ||
198 | setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); | ||
199 | setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); | ||
200 | setOperationAction(ISD::FABS, MVT::f64, Expand); | ||
201 | setOperationAction(ISD::FABS, MVT::v2f64, Expand); | ||
202 | } | ||
203 | // TODO: Fix the UDIV24 algorithm so it works for these | ||
204 | // types correctly. This needs vector comparisons | ||
205 | // for this to work correctly. | ||
206 | setOperationAction(ISD::UDIV, MVT::v2i8, Expand); | ||
207 | setOperationAction(ISD::UDIV, MVT::v4i8, Expand); | ||
208 | setOperationAction(ISD::UDIV, MVT::v2i16, Expand); | ||
209 | setOperationAction(ISD::UDIV, MVT::v4i16, Expand); | ||
210 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); | ||
211 | setOperationAction(ISD::SUBC, MVT::Other, Expand); | ||
212 | setOperationAction(ISD::ADDE, MVT::Other, Expand); | ||
213 | setOperationAction(ISD::ADDC, MVT::Other, Expand); | ||
214 | setOperationAction(ISD::BRCOND, MVT::Other, Custom); | ||
215 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); | ||
216 | setOperationAction(ISD::BRIND, MVT::Other, Expand); | ||
217 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); | ||
218 | |||
219 | |||
220 | // Use the default implementation. | ||
221 | setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); | ||
222 | setOperationAction(ISD::Constant , MVT::i32 , Legal); | ||
223 | |||
224 | setSchedulingPreference(Sched::RegPressure); | ||
225 | setPow2DivIsCheap(false); | ||
226 | setPrefLoopAlignment(16); | ||
227 | setSelectIsExpensive(true); | ||
228 | setJumpIsExpensive(true); | ||
229 | |||
230 | maxStoresPerMemcpy = 4096; | ||
231 | maxStoresPerMemmove = 4096; | ||
232 | maxStoresPerMemset = 4096; | ||
233 | |||
234 | #undef numTypes | ||
235 | #undef numIntTypes | ||
236 | #undef numVectorTypes | ||
237 | #undef numFloatTypes | ||
238 | } | ||
239 | |||
240 | bool | ||
241 | AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, | ||
242 | const CallInst &I, unsigned Intrinsic) const | ||
243 | { | ||
244 | return false; | ||
245 | } | ||
246 | // The backend supports 32 and 64 bit floating point immediates | ||
247 | bool | ||
248 | AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const | ||
249 | { | ||
250 | if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 | ||
251 | || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { | ||
252 | return true; | ||
253 | } else { | ||
254 | return false; | ||
255 | } | ||
256 | } | ||
257 | |||
258 | bool | ||
259 | AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const | ||
260 | { | ||
261 | if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 | ||
262 | || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { | ||
263 | return false; | ||
264 | } else { | ||
265 | return true; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | |||
270 | // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to | ||
271 | // be zero. Op is expected to be a target specific node. Used by DAG | ||
272 | // combiner. | ||
273 | |||
274 | void | ||
275 | AMDGPUTargetLowering::computeMaskedBitsForTargetNode( | ||
276 | const SDValue Op, | ||
277 | APInt &KnownZero, | ||
278 | APInt &KnownOne, | ||
279 | const SelectionDAG &DAG, | ||
280 | unsigned Depth) const | ||
281 | { | ||
282 | APInt KnownZero2; | ||
283 | APInt KnownOne2; | ||
284 | KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything | ||
285 | switch (Op.getOpcode()) { | ||
286 | default: break; | ||
287 | case ISD::SELECT_CC: | ||
288 | DAG.ComputeMaskedBits( | ||
289 | Op.getOperand(1), | ||
290 | KnownZero, | ||
291 | KnownOne, | ||
292 | Depth + 1 | ||
293 | ); | ||
294 | DAG.ComputeMaskedBits( | ||
295 | Op.getOperand(0), | ||
296 | KnownZero2, | ||
297 | KnownOne2 | ||
298 | ); | ||
299 | assert((KnownZero & KnownOne) == 0 | ||
300 | && "Bits known to be one AND zero?"); | ||
301 | assert((KnownZero2 & KnownOne2) == 0 | ||
302 | && "Bits known to be one AND zero?"); | ||
303 | // Only known if known in both the LHS and RHS | ||
304 | KnownOne &= KnownOne2; | ||
305 | KnownZero &= KnownZero2; | ||
306 | break; | ||
307 | }; | ||
308 | } | ||
309 | |||
310 | //===----------------------------------------------------------------------===// | ||
311 | // Other Lowering Hooks | ||
312 | //===----------------------------------------------------------------------===// | ||
313 | |||
314 | SDValue | ||
315 | AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const | ||
316 | { | ||
317 | EVT OVT = Op.getValueType(); | ||
318 | SDValue DST; | ||
319 | if (OVT.getScalarType() == MVT::i64) { | ||
320 | DST = LowerSDIV64(Op, DAG); | ||
321 | } else if (OVT.getScalarType() == MVT::i32) { | ||
322 | DST = LowerSDIV32(Op, DAG); | ||
323 | } else if (OVT.getScalarType() == MVT::i16 | ||
324 | || OVT.getScalarType() == MVT::i8) { | ||
325 | DST = LowerSDIV24(Op, DAG); | ||
326 | } else { | ||
327 | DST = SDValue(Op.getNode(), 0); | ||
328 | } | ||
329 | return DST; | ||
330 | } | ||
331 | |||
332 | SDValue | ||
333 | AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const | ||
334 | { | ||
335 | EVT OVT = Op.getValueType(); | ||
336 | SDValue DST; | ||
337 | if (OVT.getScalarType() == MVT::i64) { | ||
338 | DST = LowerSREM64(Op, DAG); | ||
339 | } else if (OVT.getScalarType() == MVT::i32) { | ||
340 | DST = LowerSREM32(Op, DAG); | ||
341 | } else if (OVT.getScalarType() == MVT::i16) { | ||
342 | DST = LowerSREM16(Op, DAG); | ||
343 | } else if (OVT.getScalarType() == MVT::i8) { | ||
344 | DST = LowerSREM8(Op, DAG); | ||
345 | } else { | ||
346 | DST = SDValue(Op.getNode(), 0); | ||
347 | } | ||
348 | return DST; | ||
349 | } | ||
350 | |||
351 | SDValue | ||
352 | AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const | ||
353 | { | ||
354 | SDValue Data = Op.getOperand(0); | ||
355 | VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); | ||
356 | DebugLoc DL = Op.getDebugLoc(); | ||
357 | EVT DVT = Data.getValueType(); | ||
358 | EVT BVT = BaseType->getVT(); | ||
359 | unsigned baseBits = BVT.getScalarType().getSizeInBits(); | ||
360 | unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; | ||
361 | unsigned shiftBits = srcBits - baseBits; | ||
362 | if (srcBits < 32) { | ||
363 | // If the op is less than 32 bits, then it needs to extend to 32bits | ||
364 | // so it can properly keep the upper bits valid. | ||
365 | EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); | ||
366 | Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); | ||
367 | shiftBits = 32 - baseBits; | ||
368 | DVT = IVT; | ||
369 | } | ||
370 | SDValue Shift = DAG.getConstant(shiftBits, DVT); | ||
371 | // Shift left by 'Shift' bits. | ||
372 | Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); | ||
373 | // Signed shift Right by 'Shift' bits. | ||
374 | Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); | ||
375 | if (srcBits < 32) { | ||
376 | // Once the sign extension is done, the op needs to be converted to | ||
377 | // its original type. | ||
378 | Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); | ||
379 | } | ||
380 | return Data; | ||
381 | } | ||
382 | EVT | ||
383 | AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const | ||
384 | { | ||
385 | int iSize = (size * numEle); | ||
386 | int vEle = (iSize >> ((size == 64) ? 6 : 5)); | ||
387 | if (!vEle) { | ||
388 | vEle = 1; | ||
389 | } | ||
390 | if (size == 64) { | ||
391 | if (vEle == 1) { | ||
392 | return EVT(MVT::i64); | ||
393 | } else { | ||
394 | return EVT(MVT::getVectorVT(MVT::i64, vEle)); | ||
395 | } | ||
396 | } else { | ||
397 | if (vEle == 1) { | ||
398 | return EVT(MVT::i32); | ||
399 | } else { | ||
400 | return EVT(MVT::getVectorVT(MVT::i32, vEle)); | ||
401 | } | ||
402 | } | ||
403 | } | ||
404 | |||
405 | SDValue | ||
406 | AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const | ||
407 | { | ||
408 | SDValue Chain = Op.getOperand(0); | ||
409 | SDValue Cond = Op.getOperand(1); | ||
410 | SDValue Jump = Op.getOperand(2); | ||
411 | SDValue Result; | ||
412 | Result = DAG.getNode( | ||
413 | AMDGPUISD::BRANCH_COND, | ||
414 | Op.getDebugLoc(), | ||
415 | Op.getValueType(), | ||
416 | Chain, Jump, Cond); | ||
417 | return Result; | ||
418 | } | ||
419 | |||
420 | SDValue | ||
421 | AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const | ||
422 | { | ||
423 | DebugLoc DL = Op.getDebugLoc(); | ||
424 | EVT OVT = Op.getValueType(); | ||
425 | SDValue LHS = Op.getOperand(0); | ||
426 | SDValue RHS = Op.getOperand(1); | ||
427 | MVT INTTY; | ||
428 | MVT FLTTY; | ||
429 | if (!OVT.isVector()) { | ||
430 | INTTY = MVT::i32; | ||
431 | FLTTY = MVT::f32; | ||
432 | } else if (OVT.getVectorNumElements() == 2) { | ||
433 | INTTY = MVT::v2i32; | ||
434 | FLTTY = MVT::v2f32; | ||
435 | } else if (OVT.getVectorNumElements() == 4) { | ||
436 | INTTY = MVT::v4i32; | ||
437 | FLTTY = MVT::v4f32; | ||
438 | } | ||
439 | unsigned bitsize = OVT.getScalarType().getSizeInBits(); | ||
440 | // char|short jq = ia ^ ib; | ||
441 | SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); | ||
442 | |||
443 | // jq = jq >> (bitsize - 2) | ||
444 | jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); | ||
445 | |||
446 | // jq = jq | 0x1 | ||
447 | jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); | ||
448 | |||
449 | // jq = (int)jq | ||
450 | jq = DAG.getSExtOrTrunc(jq, DL, INTTY); | ||
451 | |||
452 | // int ia = (int)LHS; | ||
453 | SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); | ||
454 | |||
455 | // int ib, (int)RHS; | ||
456 | SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); | ||
457 | |||
458 | // float fa = (float)ia; | ||
459 | SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); | ||
460 | |||
461 | // float fb = (float)ib; | ||
462 | SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); | ||
463 | |||
464 | // float fq = native_divide(fa, fb); | ||
465 | SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); | ||
466 | |||
467 | // fq = trunc(fq); | ||
468 | fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); | ||
469 | |||
470 | // float fqneg = -fq; | ||
471 | SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); | ||
472 | |||
473 | // float fr = mad(fqneg, fb, fa); | ||
474 | SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa); | ||
475 | |||
476 | // int iq = (int)fq; | ||
477 | SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); | ||
478 | |||
479 | // fr = fabs(fr); | ||
480 | fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); | ||
481 | |||
482 | // fb = fabs(fb); | ||
483 | fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); | ||
484 | |||
485 | // int cv = fr >= fb; | ||
486 | SDValue cv; | ||
487 | if (INTTY == MVT::i32) { | ||
488 | cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); | ||
489 | } else { | ||
490 | cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); | ||
491 | } | ||
492 | // jq = (cv ? jq : 0); | ||
493 | jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, | ||
494 | DAG.getConstant(0, OVT)); | ||
495 | // dst = iq + jq; | ||
496 | iq = DAG.getSExtOrTrunc(iq, DL, OVT); | ||
497 | iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); | ||
498 | return iq; | ||
499 | } | ||
500 | |||
501 | SDValue | ||
502 | AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const | ||
503 | { | ||
504 | DebugLoc DL = Op.getDebugLoc(); | ||
505 | EVT OVT = Op.getValueType(); | ||
506 | SDValue LHS = Op.getOperand(0); | ||
507 | SDValue RHS = Op.getOperand(1); | ||
508 | // The LowerSDIV32 function generates equivalent to the following IL. | ||
509 | // mov r0, LHS | ||
510 | // mov r1, RHS | ||
511 | // ilt r10, r0, 0 | ||
512 | // ilt r11, r1, 0 | ||
513 | // iadd r0, r0, r10 | ||
514 | // iadd r1, r1, r11 | ||
515 | // ixor r0, r0, r10 | ||
516 | // ixor r1, r1, r11 | ||
517 | // udiv r0, r0, r1 | ||
518 | // ixor r10, r10, r11 | ||
519 | // iadd r0, r0, r10 | ||
520 | // ixor DST, r0, r10 | ||
521 | |||
522 | // mov r0, LHS | ||
523 | SDValue r0 = LHS; | ||
524 | |||
525 | // mov r1, RHS | ||
526 | SDValue r1 = RHS; | ||
527 | |||
528 | // ilt r10, r0, 0 | ||
529 | SDValue r10 = DAG.getSelectCC(DL, | ||
530 | r0, DAG.getConstant(0, OVT), | ||
531 | DAG.getConstant(-1, MVT::i32), | ||
532 | DAG.getConstant(0, MVT::i32), | ||
533 | ISD::SETLT); | ||
534 | |||
535 | // ilt r11, r1, 0 | ||
536 | SDValue r11 = DAG.getSelectCC(DL, | ||
537 | r1, DAG.getConstant(0, OVT), | ||
538 | DAG.getConstant(-1, MVT::i32), | ||
539 | DAG.getConstant(0, MVT::i32), | ||
540 | ISD::SETLT); | ||
541 | |||
542 | // iadd r0, r0, r10 | ||
543 | r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); | ||
544 | |||
545 | // iadd r1, r1, r11 | ||
546 | r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); | ||
547 | |||
548 | // ixor r0, r0, r10 | ||
549 | r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); | ||
550 | |||
551 | // ixor r1, r1, r11 | ||
552 | r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); | ||
553 | |||
554 | // udiv r0, r0, r1 | ||
555 | r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); | ||
556 | |||
557 | // ixor r10, r10, r11 | ||
558 | r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); | ||
559 | |||
560 | // iadd r0, r0, r10 | ||
561 | r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); | ||
562 | |||
563 | // ixor DST, r0, r10 | ||
564 | SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); | ||
565 | return DST; | ||
566 | } | ||
567 | |||
568 | SDValue | ||
569 | AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const | ||
570 | { | ||
571 | return SDValue(Op.getNode(), 0); | ||
572 | } | ||
573 | |||
574 | SDValue | ||
575 | AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const | ||
576 | { | ||
577 | DebugLoc DL = Op.getDebugLoc(); | ||
578 | EVT OVT = Op.getValueType(); | ||
579 | MVT INTTY = MVT::i32; | ||
580 | if (OVT == MVT::v2i8) { | ||
581 | INTTY = MVT::v2i32; | ||
582 | } else if (OVT == MVT::v4i8) { | ||
583 | INTTY = MVT::v4i32; | ||
584 | } | ||
585 | SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); | ||
586 | SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); | ||
587 | LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); | ||
588 | LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); | ||
589 | return LHS; | ||
590 | } | ||
591 | |||
592 | SDValue | ||
593 | AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const | ||
594 | { | ||
595 | DebugLoc DL = Op.getDebugLoc(); | ||
596 | EVT OVT = Op.getValueType(); | ||
597 | MVT INTTY = MVT::i32; | ||
598 | if (OVT == MVT::v2i16) { | ||
599 | INTTY = MVT::v2i32; | ||
600 | } else if (OVT == MVT::v4i16) { | ||
601 | INTTY = MVT::v4i32; | ||
602 | } | ||
603 | SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); | ||
604 | SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); | ||
605 | LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); | ||
606 | LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); | ||
607 | return LHS; | ||
608 | } | ||
609 | |||
610 | SDValue | ||
611 | AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const | ||
612 | { | ||
613 | DebugLoc DL = Op.getDebugLoc(); | ||
614 | EVT OVT = Op.getValueType(); | ||
615 | SDValue LHS = Op.getOperand(0); | ||
616 | SDValue RHS = Op.getOperand(1); | ||
617 | // The LowerSREM32 function generates equivalent to the following IL. | ||
618 | // mov r0, LHS | ||
619 | // mov r1, RHS | ||
620 | // ilt r10, r0, 0 | ||
621 | // ilt r11, r1, 0 | ||
622 | // iadd r0, r0, r10 | ||
623 | // iadd r1, r1, r11 | ||
624 | // ixor r0, r0, r10 | ||
625 | // ixor r1, r1, r11 | ||
626 | // udiv r20, r0, r1 | ||
627 | // umul r20, r20, r1 | ||
628 | // sub r0, r0, r20 | ||
629 | // iadd r0, r0, r10 | ||
630 | // ixor DST, r0, r10 | ||
631 | |||
632 | // mov r0, LHS | ||
633 | SDValue r0 = LHS; | ||
634 | |||
635 | // mov r1, RHS | ||
636 | SDValue r1 = RHS; | ||
637 | |||
638 | // ilt r10, r0, 0 | ||
639 | SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); | ||
640 | |||
641 | // ilt r11, r1, 0 | ||
642 | SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); | ||
643 | |||
644 | // iadd r0, r0, r10 | ||
645 | r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); | ||
646 | |||
647 | // iadd r1, r1, r11 | ||
648 | r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); | ||
649 | |||
650 | // ixor r0, r0, r10 | ||
651 | r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); | ||
652 | |||
653 | // ixor r1, r1, r11 | ||
654 | r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); | ||
655 | |||
656 | // udiv r20, r0, r1 | ||
657 | SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); | ||
658 | |||
659 | // umul r20, r20, r1 | ||
660 | r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); | ||
661 | |||
662 | // sub r0, r0, r20 | ||
663 | r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); | ||
664 | |||
665 | // iadd r0, r0, r10 | ||
666 | r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); | ||
667 | |||
668 | // ixor DST, r0, r10 | ||
669 | SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); | ||
670 | return DST; | ||
671 | } | ||
672 | |||
673 | SDValue | ||
674 | AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const | ||
675 | { | ||
676 | return SDValue(Op.getNode(), 0); | ||
677 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.td b/src/gallium/drivers/radeon/AMDILInstrInfo.td deleted file mode 100644 index 050a5bd874f..00000000000 --- a/src/gallium/drivers/radeon/AMDILInstrInfo.td +++ /dev/null | |||
@@ -1,270 +0,0 @@ | |||
1 | //===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file describes the AMDIL instructions in TableGen format. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | // AMDIL Instruction Predicate Definitions | ||
14 | // Predicate that is set to true if the hardware supports double precision | ||
15 | // divide | ||
16 | def HasHWDDiv : Predicate<"Subtarget.device()" | ||
17 | "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && " | ||
18 | "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">; | ||
19 | |||
20 | // Predicate that is set to true if the hardware supports double, but not double | ||
21 | // precision divide in hardware | ||
22 | def HasSWDDiv : Predicate<"Subtarget.device()" | ||
23 | "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" | ||
24 | "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">; | ||
25 | |||
26 | // Predicate that is set to true if the hardware support 24bit signed | ||
27 | // math ops. Otherwise a software expansion to 32bit math ops is used instead. | ||
28 | def HasHWSign24Bit : Predicate<"Subtarget.device()" | ||
29 | "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">; | ||
30 | |||
31 | // Predicate that is set to true if 64bit operations are supported or not | ||
32 | def HasHW64Bit : Predicate<"Subtarget.device()" | ||
33 | "->usesHardware(AMDGPUDeviceInfo::LongOps)">; | ||
34 | def HasSW64Bit : Predicate<"Subtarget.device()" | ||
35 | "->usesSoftware(AMDGPUDeviceInfo::LongOps)">; | ||
36 | |||
37 | // Predicate that is set to true if the timer register is supported | ||
38 | def HasTmrRegister : Predicate<"Subtarget.device()" | ||
39 | "->isSupported(AMDGPUDeviceInfo::TmrReg)">; | ||
40 | // Predicate that is true if we are at least evergreen series | ||
41 | def HasDeviceIDInst : Predicate<"Subtarget.device()" | ||
42 | "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">; | ||
43 | |||
44 | // Predicate that is true if we have region address space. | ||
45 | def hasRegionAS : Predicate<"Subtarget.device()" | ||
46 | "->usesHardware(AMDGPUDeviceInfo::RegionMem)">; | ||
47 | |||
48 | // Predicate that is false if we don't have region address space. | ||
49 | def noRegionAS : Predicate<"!Subtarget.device()" | ||
50 | "->isSupported(AMDGPUDeviceInfo::RegionMem)">; | ||
51 | |||
52 | |||
53 | // Predicate that is set to true if 64bit Mul is supported in the IL or not | ||
54 | def HasHW64Mul : Predicate<"Subtarget.calVersion()" | ||
55 | ">= CAL_VERSION_SC_139" | ||
56 | "&& Subtarget.device()" | ||
57 | "->getGeneration() >=" | ||
58 | "AMDGPUDeviceInfo::HD5XXX">; | ||
59 | def HasSW64Mul : Predicate<"Subtarget.calVersion()" | ||
60 | "< CAL_VERSION_SC_139">; | ||
61 | // Predicate that is set to true if 64bit Div/Mod is supported in the IL or not | ||
62 | def HasHW64DivMod : Predicate<"Subtarget.device()" | ||
63 | "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">; | ||
64 | def HasSW64DivMod : Predicate<"Subtarget.device()" | ||
65 | "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">; | ||
66 | |||
67 | // Predicate that is set to true if 64bit pointer are used. | ||
68 | def Has64BitPtr : Predicate<"Subtarget.is64bit()">; | ||
69 | def Has32BitPtr : Predicate<"!Subtarget.is64bit()">; | ||
70 | //===--------------------------------------------------------------------===// | ||
71 | // Custom Operands | ||
72 | //===--------------------------------------------------------------------===// | ||
73 | def brtarget : Operand<OtherVT>; | ||
74 | |||
75 | //===--------------------------------------------------------------------===// | ||
76 | // Custom Selection DAG Type Profiles | ||
77 | //===--------------------------------------------------------------------===// | ||
78 | //===----------------------------------------------------------------------===// | ||
79 | // Generic Profile Types | ||
80 | //===----------------------------------------------------------------------===// | ||
81 | |||
82 | def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [ | ||
83 | SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> | ||
84 | ]>; | ||
85 | def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [ | ||
86 | SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3> | ||
87 | ]>; | ||
88 | def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [ | ||
89 | SDTCisEltOfVec<1, 0> | ||
90 | ]>; | ||
91 | |||
92 | //===----------------------------------------------------------------------===// | ||
93 | // Flow Control Profile Types | ||
94 | //===----------------------------------------------------------------------===// | ||
95 | // Branch instruction where second and third are basic blocks | ||
96 | def SDTIL_BRCond : SDTypeProfile<0, 2, [ | ||
97 | SDTCisVT<0, OtherVT> | ||
98 | ]>; | ||
99 | |||
100 | //===--------------------------------------------------------------------===// | ||
101 | // Custom Selection DAG Nodes | ||
102 | //===--------------------------------------------------------------------===// | ||
103 | //===----------------------------------------------------------------------===// | ||
104 | // Flow Control DAG Nodes | ||
105 | //===----------------------------------------------------------------------===// | ||
106 | def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; | ||
107 | |||
108 | //===----------------------------------------------------------------------===// | ||
109 | // Call/Return DAG Nodes | ||
110 | //===----------------------------------------------------------------------===// | ||
111 | def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, | ||
112 | [SDNPHasChain, SDNPOptInGlue]>; | ||
113 | |||
114 | //===--------------------------------------------------------------------===// | ||
115 | // Instructions | ||
116 | //===--------------------------------------------------------------------===// | ||
117 | // Floating point math functions | ||
118 | def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>; | ||
119 | def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>; | ||
120 | |||
121 | //===----------------------------------------------------------------------===// | ||
122 | // Integer functions | ||
123 | //===----------------------------------------------------------------------===// | ||
124 | def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp, | ||
125 | [SDNPCommutative, SDNPAssociative]>; | ||
126 | |||
127 | //===--------------------------------------------------------------------===// | ||
128 | // Custom Pattern DAG Nodes | ||
129 | //===--------------------------------------------------------------------===// | ||
130 | def global_store : PatFrag<(ops node:$val, node:$ptr), | ||
131 | (store node:$val, node:$ptr), [{ | ||
132 | return isGlobalStore(dyn_cast<StoreSDNode>(N)); | ||
133 | }]>; | ||
134 | |||
135 | //===----------------------------------------------------------------------===// | ||
136 | // Load pattern fragments | ||
137 | //===----------------------------------------------------------------------===// | ||
138 | // Global address space loads | ||
139 | def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ | ||
140 | return isGlobalLoad(dyn_cast<LoadSDNode>(N)); | ||
141 | }]>; | ||
142 | // Constant address space loads | ||
143 | def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ | ||
144 | return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); | ||
145 | }]>; | ||
146 | |||
147 | //===----------------------------------------------------------------------===// | ||
148 | // Complex addressing mode patterns | ||
149 | //===----------------------------------------------------------------------===// | ||
150 | def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>; | ||
151 | def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>; | ||
152 | def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>; | ||
153 | def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>; | ||
154 | |||
155 | //===----------------------------------------------------------------------===// | ||
156 | // Instruction format classes | ||
157 | //===----------------------------------------------------------------------===// | ||
158 | class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> | ||
159 | : Instruction { | ||
160 | |||
161 | let Namespace = "AMDGPU"; | ||
162 | dag OutOperandList = outs; | ||
163 | dag InOperandList = ins; | ||
164 | let Pattern = pattern; | ||
165 | let AsmString = !strconcat(asmstr, "\n"); | ||
166 | let isPseudo = 1; | ||
167 | let Itinerary = NullALU; | ||
168 | bit hasIEEEFlag = 0; | ||
169 | bit hasZeroOpFlag = 0; | ||
170 | } | ||
171 | |||
172 | //===--------------------------------------------------------------------===// | ||
173 | // Multiclass Instruction formats | ||
174 | //===--------------------------------------------------------------------===// | ||
175 | // Multiclass that handles branch instructions | ||
176 | multiclass BranchConditional<SDNode Op> { | ||
177 | def _i32 : ILFormat<(outs), | ||
178 | (ins brtarget:$target, GPRI32:$src0), | ||
179 | "; i32 Pseudo branch instruction", | ||
180 | [(Op bb:$target, GPRI32:$src0)]>; | ||
181 | def _f32 : ILFormat<(outs), | ||
182 | (ins brtarget:$target, GPRF32:$src0), | ||
183 | "; f32 Pseudo branch instruction", | ||
184 | [(Op bb:$target, GPRF32:$src0)]>; | ||
185 | } | ||
186 | |||
187 | // Only scalar types should generate flow control | ||
188 | multiclass BranchInstr<string name> { | ||
189 | def _i32 : ILFormat<(outs), (ins GPRI32:$src), | ||
190 | !strconcat(name, " $src"), []>; | ||
191 | def _f32 : ILFormat<(outs), (ins GPRF32:$src), | ||
192 | !strconcat(name, " $src"), []>; | ||
193 | } | ||
194 | // Only scalar types should generate flow control | ||
195 | multiclass BranchInstr2<string name> { | ||
196 | def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1), | ||
197 | !strconcat(name, " $src0, $src1"), []>; | ||
198 | def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1), | ||
199 | !strconcat(name, " $src0, $src1"), []>; | ||
200 | } | ||
201 | |||
202 | //===--------------------------------------------------------------------===// | ||
203 | // Intrinsics support | ||
204 | //===--------------------------------------------------------------------===// | ||
205 | include "AMDILIntrinsics.td" | ||
206 | |||
207 | //===--------------------------------------------------------------------===// | ||
208 | // Instructions support | ||
209 | //===--------------------------------------------------------------------===// | ||
210 | //===---------------------------------------------------------------------===// | ||
211 | // Custom Inserter for Branches and returns, this eventually will be a | ||
212 | // seperate pass | ||
213 | //===---------------------------------------------------------------------===// | ||
214 | let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { | ||
215 | def BRANCH : ILFormat<(outs), (ins brtarget:$target), | ||
216 | "; Pseudo unconditional branch instruction", | ||
217 | [(br bb:$target)]>; | ||
218 | defm BRANCH_COND : BranchConditional<IL_brcond>; | ||
219 | } | ||
220 | |||
221 | //===---------------------------------------------------------------------===// | ||
222 | // Flow and Program control Instructions | ||
223 | //===---------------------------------------------------------------------===// | ||
224 | let isTerminator=1 in { | ||
225 | def SWITCH : ILFormat< (outs), (ins GPRI32:$src), | ||
226 | !strconcat("SWITCH", " $src"), []>; | ||
227 | def CASE : ILFormat< (outs), (ins GPRI32:$src), | ||
228 | !strconcat("CASE", " $src"), []>; | ||
229 | def BREAK : ILFormat< (outs), (ins), | ||
230 | "BREAK", []>; | ||
231 | def CONTINUE : ILFormat< (outs), (ins), | ||
232 | "CONTINUE", []>; | ||
233 | def DEFAULT : ILFormat< (outs), (ins), | ||
234 | "DEFAULT", []>; | ||
235 | def ELSE : ILFormat< (outs), (ins), | ||
236 | "ELSE", []>; | ||
237 | def ENDSWITCH : ILFormat< (outs), (ins), | ||
238 | "ENDSWITCH", []>; | ||
239 | def ENDMAIN : ILFormat< (outs), (ins), | ||
240 | "ENDMAIN", []>; | ||
241 | def END : ILFormat< (outs), (ins), | ||
242 | "END", []>; | ||
243 | def ENDFUNC : ILFormat< (outs), (ins), | ||
244 | "ENDFUNC", []>; | ||
245 | def ENDIF : ILFormat< (outs), (ins), | ||
246 | "ENDIF", []>; | ||
247 | def WHILELOOP : ILFormat< (outs), (ins), | ||
248 | "WHILE", []>; | ||
249 | def ENDLOOP : ILFormat< (outs), (ins), | ||
250 | "ENDLOOP", []>; | ||
251 | def FUNC : ILFormat< (outs), (ins), | ||
252 | "FUNC", []>; | ||
253 | def RETDYN : ILFormat< (outs), (ins), | ||
254 | "RET_DYN", []>; | ||
255 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||
256 | defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">; | ||
257 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||
258 | defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">; | ||
259 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||
260 | defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; | ||
261 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||
262 | defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; | ||
263 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||
264 | defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; | ||
265 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||
266 | defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; | ||
267 | defm IFC : BranchInstr2<"IFC">; | ||
268 | defm BREAKC : BranchInstr2<"BREAKC">; | ||
269 | defm CONTINUEC : BranchInstr2<"CONTINUEC">; | ||
270 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp b/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp deleted file mode 100644 index 23df3822a5a..00000000000 --- a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp +++ /dev/null | |||
@@ -1,93 +0,0 @@ | |||
1 | //===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains the AMDIL Implementation of the IntrinsicInfo class. | ||
11 | // | ||
12 | //===-----------------------------------------------------------------------===// | ||
13 | |||
14 | #include "AMDILIntrinsicInfo.h" | ||
15 | #include "AMDIL.h" | ||
16 | #include "AMDGPUSubtarget.h" | ||
17 | #include "llvm/DerivedTypes.h" | ||
18 | #include "llvm/Intrinsics.h" | ||
19 | #include "llvm/Module.h" | ||
20 | |||
21 | using namespace llvm; | ||
22 | |||
23 | #define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN | ||
24 | #include "AMDGPUGenIntrinsics.inc" | ||
25 | #undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN | ||
26 | |||
27 | AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm) | ||
28 | : TargetIntrinsicInfo() | ||
29 | { | ||
30 | } | ||
31 | |||
32 | std::string | ||
33 | AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, | ||
34 | unsigned int numTys) const | ||
35 | { | ||
36 | static const char* const names[] = { | ||
37 | #define GET_INTRINSIC_NAME_TABLE | ||
38 | #include "AMDGPUGenIntrinsics.inc" | ||
39 | #undef GET_INTRINSIC_NAME_TABLE | ||
40 | }; | ||
41 | |||
42 | //assert(!isOverloaded(IntrID) | ||
43 | //&& "AMDGPU Intrinsics are not overloaded"); | ||
44 | if (IntrID < Intrinsic::num_intrinsics) { | ||
45 | return 0; | ||
46 | } | ||
47 | assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics | ||
48 | && "Invalid intrinsic ID"); | ||
49 | |||
50 | std::string Result(names[IntrID - Intrinsic::num_intrinsics]); | ||
51 | return Result; | ||
52 | } | ||
53 | |||
54 | unsigned int | ||
55 | AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const | ||
56 | { | ||
57 | #define GET_FUNCTION_RECOGNIZER | ||
58 | #include "AMDGPUGenIntrinsics.inc" | ||
59 | #undef GET_FUNCTION_RECOGNIZER | ||
60 | AMDGPUIntrinsic::ID IntrinsicID | ||
61 | = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic; | ||
62 | IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name); | ||
63 | |||
64 | if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) { | ||
65 | return IntrinsicID; | ||
66 | } | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | bool | ||
71 | AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const | ||
72 | { | ||
73 | // Overload Table | ||
74 | #define GET_INTRINSIC_OVERLOAD_TABLE | ||
75 | #include "AMDGPUGenIntrinsics.inc" | ||
76 | #undef GET_INTRINSIC_OVERLOAD_TABLE | ||
77 | } | ||
78 | |||
79 | /// This defines the "getAttributes(ID id)" method. | ||
80 | #define GET_INTRINSIC_ATTRIBUTES | ||
81 | #include "AMDGPUGenIntrinsics.inc" | ||
82 | #undef GET_INTRINSIC_ATTRIBUTES | ||
83 | |||
84 | Function* | ||
85 | AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, | ||
86 | Type **Tys, | ||
87 | unsigned numTys) const | ||
88 | { | ||
89 | //Silence a warning | ||
90 | AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID); | ||
91 | (void)List; | ||
92 | assert(!"Not implemented"); | ||
93 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h b/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h deleted file mode 100644 index 9ae3d4dc112..00000000000 --- a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | //===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface for the AMDIL Implementation of the Intrinsic Info class. | ||
11 | // | ||
12 | //===-----------------------------------------------------------------------===// | ||
13 | #ifndef _AMDIL_INTRINSICS_H_ | ||
14 | #define _AMDIL_INTRINSICS_H_ | ||
15 | |||
16 | #include "llvm/Intrinsics.h" | ||
17 | #include "llvm/Target/TargetIntrinsicInfo.h" | ||
18 | |||
19 | namespace llvm { | ||
20 | class TargetMachine; | ||
21 | namespace AMDGPUIntrinsic { | ||
22 | enum ID { | ||
23 | last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1, | ||
24 | #define GET_INTRINSIC_ENUM_VALUES | ||
25 | #include "AMDGPUGenIntrinsics.inc" | ||
26 | #undef GET_INTRINSIC_ENUM_VALUES | ||
27 | , num_AMDGPU_intrinsics | ||
28 | }; | ||
29 | |||
30 | } | ||
31 | |||
32 | |||
33 | class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo { | ||
34 | public: | ||
35 | AMDGPUIntrinsicInfo(TargetMachine *tm); | ||
36 | std::string getName(unsigned int IntrId, Type **Tys = 0, | ||
37 | unsigned int numTys = 0) const; | ||
38 | unsigned int lookupName(const char *Name, unsigned int Len) const; | ||
39 | bool isOverloaded(unsigned int IID) const; | ||
40 | Function *getDeclaration(Module *M, unsigned int ID, | ||
41 | Type **Tys = 0, | ||
42 | unsigned int numTys = 0) const; | ||
43 | }; // AMDGPUIntrinsicInfo | ||
44 | } | ||
45 | |||
46 | #endif // _AMDIL_INTRINSICS_H_ | ||
47 | |||
diff --git a/src/gallium/drivers/radeon/AMDILIntrinsics.td b/src/gallium/drivers/radeon/AMDILIntrinsics.td deleted file mode 100644 index 3f9e20f0c85..00000000000 --- a/src/gallium/drivers/radeon/AMDILIntrinsics.td +++ /dev/null | |||
@@ -1,242 +0,0 @@ | |||
1 | //===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file defines all of the amdil-specific intrinsics | ||
11 | // | ||
12 | //===---------------------------------------------------------------===// | ||
13 | //===--------------------------------------------------------------------===// | ||
14 | // Intrinsic classes | ||
15 | // Generic versions of the above classes but for Target specific intrinsics | ||
16 | // instead of SDNode patterns. | ||
17 | //===--------------------------------------------------------------------===// | ||
18 | let TargetPrefix = "AMDIL", isTarget = 1 in { | ||
19 | class VoidIntLong : | ||
20 | Intrinsic<[llvm_i64_ty], [], []>; | ||
21 | class VoidIntInt : | ||
22 | Intrinsic<[llvm_i32_ty], [], []>; | ||
23 | class VoidIntBool : | ||
24 | Intrinsic<[llvm_i32_ty], [], []>; | ||
25 | class UnaryIntInt : | ||
26 | Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; | ||
27 | class UnaryIntFloat : | ||
28 | Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; | ||
29 | class ConvertIntFTOI : | ||
30 | Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; | ||
31 | class ConvertIntITOF : | ||
32 | Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; | ||
33 | class UnaryIntNoRetInt : | ||
34 | Intrinsic<[], [llvm_anyint_ty], []>; | ||
35 | class UnaryIntNoRetFloat : | ||
36 | Intrinsic<[], [llvm_anyfloat_ty], []>; | ||
37 | class BinaryIntInt : | ||
38 | Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; | ||
39 | class BinaryIntFloat : | ||
40 | Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; | ||
41 | class BinaryIntNoRetInt : | ||
42 | Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>; | ||
43 | class BinaryIntNoRetFloat : | ||
44 | Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>; | ||
45 | class TernaryIntInt : | ||
46 | Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, | ||
47 | LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; | ||
48 | class TernaryIntFloat : | ||
49 | Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, | ||
50 | LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; | ||
51 | class QuaternaryIntInt : | ||
52 | Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, | ||
53 | LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; | ||
54 | class UnaryAtomicInt : | ||
55 | Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; | ||
56 | class BinaryAtomicInt : | ||
57 | Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; | ||
58 | class TernaryAtomicInt : | ||
59 | Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; | ||
60 | class UnaryAtomicIntNoRet : | ||
61 | Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; | ||
62 | class BinaryAtomicIntNoRet : | ||
63 | Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; | ||
64 | class TernaryAtomicIntNoRet : | ||
65 | Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; | ||
66 | } | ||
67 | |||
68 | let TargetPrefix = "AMDIL", isTarget = 1 in { | ||
69 | def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt; | ||
70 | |||
71 | def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">, | ||
72 | TernaryIntInt; | ||
73 | def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">, | ||
74 | TernaryIntInt; | ||
75 | def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">, | ||
76 | UnaryIntInt; | ||
77 | def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">, | ||
78 | UnaryIntInt; | ||
79 | def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">, | ||
80 | UnaryIntInt; | ||
81 | def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">, | ||
82 | UnaryIntInt; | ||
83 | def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">, | ||
84 | UnaryIntInt; | ||
85 | def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">, | ||
86 | TernaryIntInt; | ||
87 | def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">, | ||
88 | TernaryIntInt; | ||
89 | def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">, | ||
90 | QuaternaryIntInt; | ||
91 | def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">, | ||
92 | TernaryIntInt; | ||
93 | def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">, | ||
94 | BinaryIntInt; | ||
95 | def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">, | ||
96 | TernaryIntInt; | ||
97 | def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">, | ||
98 | TernaryIntInt; | ||
99 | def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">, | ||
100 | TernaryIntFloat; | ||
101 | def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">, | ||
102 | BinaryIntInt; | ||
103 | def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">, | ||
104 | BinaryIntInt; | ||
105 | def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">, | ||
106 | BinaryIntInt; | ||
107 | def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">, | ||
108 | BinaryIntInt; | ||
109 | def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">, | ||
110 | BinaryIntInt; | ||
111 | def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">, | ||
112 | BinaryIntInt; | ||
113 | def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">, | ||
114 | TernaryIntInt; | ||
115 | def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">, | ||
116 | TernaryIntInt; | ||
117 | def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">, | ||
118 | BinaryIntInt; | ||
119 | def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">, | ||
120 | BinaryIntInt; | ||
121 | def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">, | ||
122 | BinaryIntInt; | ||
123 | def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">, | ||
124 | BinaryIntInt; | ||
125 | def int_AMDIL_min : GCCBuiltin<"__amdil_min">, | ||
126 | BinaryIntFloat; | ||
127 | def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">, | ||
128 | BinaryIntInt; | ||
129 | def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">, | ||
130 | BinaryIntInt; | ||
131 | def int_AMDIL_max : GCCBuiltin<"__amdil_max">, | ||
132 | BinaryIntFloat; | ||
133 | def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">, | ||
134 | TernaryIntInt; | ||
135 | def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">, | ||
136 | TernaryIntInt; | ||
137 | def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">, | ||
138 | TernaryIntInt; | ||
139 | def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">, | ||
140 | UnaryIntFloat; | ||
141 | def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">, | ||
142 | TernaryIntFloat; | ||
143 | def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">, | ||
144 | UnaryIntFloat; | ||
145 | def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">, | ||
146 | UnaryIntFloat; | ||
147 | def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">, | ||
148 | UnaryIntFloat; | ||
149 | def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">, | ||
150 | UnaryIntFloat; | ||
151 | def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">, | ||
152 | UnaryIntFloat; | ||
153 | def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">, | ||
154 | UnaryIntFloat; | ||
155 | def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">, | ||
156 | UnaryIntFloat; | ||
157 | def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">, | ||
158 | UnaryIntFloat; | ||
159 | def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">, | ||
160 | UnaryIntFloat; | ||
161 | def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">, | ||
162 | UnaryIntFloat; | ||
163 | def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">, | ||
164 | UnaryIntFloat; | ||
165 | def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">, | ||
166 | UnaryIntFloat; | ||
167 | def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat; | ||
168 | def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat; | ||
169 | def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt; | ||
170 | def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">, | ||
171 | UnaryIntFloat; | ||
172 | def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">, | ||
173 | UnaryIntFloat; | ||
174 | def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">, | ||
175 | UnaryIntFloat; | ||
176 | def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">, | ||
177 | UnaryIntFloat; | ||
178 | def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">, | ||
179 | UnaryIntFloat; | ||
180 | def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">, | ||
181 | UnaryIntFloat; | ||
182 | def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">, | ||
183 | UnaryIntFloat; | ||
184 | def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">, | ||
185 | UnaryIntFloat; | ||
186 | def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">, | ||
187 | TernaryIntFloat; | ||
188 | def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">, | ||
189 | UnaryIntFloat; | ||
190 | def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">, | ||
191 | UnaryIntFloat; | ||
192 | def int_AMDIL_length : GCCBuiltin<"__amdil_length">, | ||
193 | UnaryIntFloat; | ||
194 | def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">, | ||
195 | TernaryIntFloat; | ||
196 | def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">, | ||
197 | Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, | ||
198 | llvm_v4i32_ty, llvm_i32_ty], []>; | ||
199 | |||
200 | def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">, | ||
201 | Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>; | ||
202 | def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">, | ||
203 | Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>; | ||
204 | def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">, | ||
205 | Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; | ||
206 | def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">, | ||
207 | ConvertIntITOF; | ||
208 | def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">, | ||
209 | ConvertIntFTOI; | ||
210 | def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">, | ||
211 | ConvertIntFTOI; | ||
212 | def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">, | ||
213 | ConvertIntFTOI; | ||
214 | def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">, | ||
215 | ConvertIntFTOI; | ||
216 | def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">, | ||
217 | ConvertIntFTOI; | ||
218 | def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">, | ||
219 | ConvertIntFTOI; | ||
220 | def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">, | ||
221 | Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>; | ||
222 | def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">, | ||
223 | ConvertIntITOF; | ||
224 | def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">, | ||
225 | ConvertIntITOF; | ||
226 | def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">, | ||
227 | ConvertIntITOF; | ||
228 | def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">, | ||
229 | ConvertIntITOF; | ||
230 | def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">, | ||
231 | Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, | ||
232 | llvm_v2f32_ty, llvm_float_ty], []>; | ||
233 | def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">, | ||
234 | Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, | ||
235 | llvm_v2f32_ty], []>; | ||
236 | def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">, | ||
237 | Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, | ||
238 | llvm_v4f32_ty], []>; | ||
239 | def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">, | ||
240 | Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, | ||
241 | llvm_v4f32_ty], []>; | ||
242 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.cpp b/src/gallium/drivers/radeon/AMDILNIDevice.cpp deleted file mode 100644 index 0ebbc9d1e06..00000000000 --- a/src/gallium/drivers/radeon/AMDILNIDevice.cpp +++ /dev/null | |||
@@ -1,71 +0,0 @@ | |||
1 | //===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | #include "AMDILNIDevice.h" | ||
10 | #include "AMDILEvergreenDevice.h" | ||
11 | #include "AMDGPUSubtarget.h" | ||
12 | |||
13 | using namespace llvm; | ||
14 | |||
15 | AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST) | ||
16 | : AMDGPUEvergreenDevice(ST) | ||
17 | { | ||
18 | std::string name = ST->getDeviceName(); | ||
19 | if (name == "caicos") { | ||
20 | mDeviceFlag = OCL_DEVICE_CAICOS; | ||
21 | } else if (name == "turks") { | ||
22 | mDeviceFlag = OCL_DEVICE_TURKS; | ||
23 | } else if (name == "cayman") { | ||
24 | mDeviceFlag = OCL_DEVICE_CAYMAN; | ||
25 | } else { | ||
26 | mDeviceFlag = OCL_DEVICE_BARTS; | ||
27 | } | ||
28 | } | ||
29 | AMDGPUNIDevice::~AMDGPUNIDevice() | ||
30 | { | ||
31 | } | ||
32 | |||
33 | size_t | ||
34 | AMDGPUNIDevice::getMaxLDSSize() const | ||
35 | { | ||
36 | if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { | ||
37 | return MAX_LDS_SIZE_900; | ||
38 | } else { | ||
39 | return 0; | ||
40 | } | ||
41 | } | ||
42 | |||
43 | uint32_t | ||
44 | AMDGPUNIDevice::getGeneration() const | ||
45 | { | ||
46 | return AMDGPUDeviceInfo::HD6XXX; | ||
47 | } | ||
48 | |||
49 | |||
50 | AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST) | ||
51 | : AMDGPUNIDevice(ST) | ||
52 | { | ||
53 | setCaps(); | ||
54 | } | ||
55 | |||
56 | AMDGPUCaymanDevice::~AMDGPUCaymanDevice() | ||
57 | { | ||
58 | } | ||
59 | |||
60 | void | ||
61 | AMDGPUCaymanDevice::setCaps() | ||
62 | { | ||
63 | if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { | ||
64 | mHWBits.set(AMDGPUDeviceInfo::DoubleOps); | ||
65 | mHWBits.set(AMDGPUDeviceInfo::FMA); | ||
66 | } | ||
67 | mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps); | ||
68 | mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps); | ||
69 | mSWBits.set(AMDGPUDeviceInfo::ArenaSegment); | ||
70 | } | ||
71 | |||
diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.h b/src/gallium/drivers/radeon/AMDILNIDevice.h deleted file mode 100644 index 387f7d1c3b7..00000000000 --- a/src/gallium/drivers/radeon/AMDILNIDevice.h +++ /dev/null | |||
@@ -1,59 +0,0 @@ | |||
1 | //===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface for the subtarget data classes. | ||
11 | // | ||
12 | //===---------------------------------------------------------------------===// | ||
13 | // This file will define the interface that each generation needs to | ||
14 | // implement in order to correctly answer queries on the capabilities of the | ||
15 | // specific hardware. | ||
16 | //===---------------------------------------------------------------------===// | ||
17 | #ifndef _AMDILNIDEVICE_H_ | ||
18 | #define _AMDILNIDEVICE_H_ | ||
19 | #include "AMDILEvergreenDevice.h" | ||
20 | #include "AMDGPUSubtarget.h" | ||
21 | |||
22 | namespace llvm { | ||
23 | class AMDGPUSubtarget; | ||
24 | //===---------------------------------------------------------------------===// | ||
25 | // NI generation of devices and their respective sub classes | ||
26 | //===---------------------------------------------------------------------===// | ||
27 | |||
28 | // The AMDGPUNIDevice is the base class for all Northern Island series of | ||
29 | // cards. It is very similiar to the AMDGPUEvergreenDevice, with the major | ||
30 | // exception being differences in wavefront size and hardware capabilities. The | ||
31 | // NI devices are all 64 wide wavefronts and also add support for signed 24 bit | ||
32 | // integer operations | ||
33 | |||
34 | class AMDGPUNIDevice : public AMDGPUEvergreenDevice { | ||
35 | public: | ||
36 | AMDGPUNIDevice(AMDGPUSubtarget*); | ||
37 | virtual ~AMDGPUNIDevice(); | ||
38 | virtual size_t getMaxLDSSize() const; | ||
39 | virtual uint32_t getGeneration() const; | ||
40 | protected: | ||
41 | }; // AMDGPUNIDevice | ||
42 | |||
43 | // Just as the AMDGPUCypressDevice is the double capable version of the | ||
44 | // AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version of | ||
45 | // the AMDGPUNIDevice. The other major difference that is not as useful from | ||
46 | // standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the | ||
47 | // NI family is a 5 wide. | ||
48 | |||
49 | class AMDGPUCaymanDevice: public AMDGPUNIDevice { | ||
50 | public: | ||
51 | AMDGPUCaymanDevice(AMDGPUSubtarget*); | ||
52 | virtual ~AMDGPUCaymanDevice(); | ||
53 | private: | ||
54 | virtual void setCaps(); | ||
55 | }; // AMDGPUCaymanDevice | ||
56 | |||
57 | static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800; | ||
58 | } // namespace llvm | ||
59 | #endif // _AMDILNIDEVICE_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp deleted file mode 100644 index f869b332e53..00000000000 --- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp +++ /dev/null | |||
@@ -1,1275 +0,0 @@ | |||
1 | //===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | |||
10 | #include "AMDILDevices.h" | ||
11 | #include "AMDGPUInstrInfo.h" | ||
12 | #include "llvm/ADT/Statistic.h" | ||
13 | #include "llvm/ADT/StringExtras.h" | ||
14 | #include "llvm/ADT/StringRef.h" | ||
15 | #include "llvm/ADT/Twine.h" | ||
16 | #include "llvm/Constants.h" | ||
17 | #include "llvm/CodeGen/MachineFunction.h" | ||
18 | #include "llvm/CodeGen/MachineFunctionAnalysis.h" | ||
19 | #include "llvm/Function.h" | ||
20 | #include "llvm/Instructions.h" | ||
21 | #include "llvm/Module.h" | ||
22 | #include "llvm/Support/Debug.h" | ||
23 | #include "llvm/Support/MathExtras.h" | ||
24 | |||
25 | #include <sstream> | ||
26 | |||
27 | #if 0 | ||
28 | STATISTIC(PointerAssignments, "Number of dynamic pointer " | ||
29 | "assigments discovered"); | ||
30 | STATISTIC(PointerSubtract, "Number of pointer subtractions discovered"); | ||
31 | #endif | ||
32 | |||
33 | using namespace llvm; | ||
34 | // The Peephole optimization pass is used to do simple last minute optimizations | ||
35 | // that are required for correct code or to remove redundant functions | ||
36 | namespace { | ||
37 | |||
38 | class OpaqueType; | ||
39 | |||
40 | class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass { | ||
41 | public: | ||
42 | TargetMachine &TM; | ||
43 | static char ID; | ||
44 | AMDGPUPeepholeOpt(TargetMachine &tm); | ||
45 | ~AMDGPUPeepholeOpt(); | ||
46 | const char *getPassName() const; | ||
47 | bool runOnFunction(Function &F); | ||
48 | bool doInitialization(Module &M); | ||
49 | bool doFinalization(Module &M); | ||
50 | void getAnalysisUsage(AnalysisUsage &AU) const; | ||
51 | protected: | ||
52 | private: | ||
53 | // Function to initiate all of the instruction level optimizations. | ||
54 | bool instLevelOptimizations(BasicBlock::iterator *inst); | ||
55 | // Quick check to see if we need to dump all of the pointers into the | ||
56 | // arena. If this is correct, then we set all pointers to exist in arena. This | ||
57 | // is a workaround for aliasing of pointers in a struct/union. | ||
58 | bool dumpAllIntoArena(Function &F); | ||
59 | // Because I don't want to invalidate any pointers while in the | ||
60 | // safeNestedForEachFunction. I push atomic conversions to a vector and handle | ||
61 | // it later. This function does the conversions if required. | ||
62 | void doAtomicConversionIfNeeded(Function &F); | ||
63 | // Because __amdil_is_constant cannot be properly evaluated if | ||
64 | // optimizations are disabled, the call's are placed in a vector | ||
65 | // and evaluated after the __amdil_image* functions are evaluated | ||
66 | // which should allow the __amdil_is_constant function to be | ||
67 | // evaluated correctly. | ||
68 | void doIsConstCallConversionIfNeeded(); | ||
69 | bool mChanged; | ||
70 | bool mDebug; | ||
71 | bool mConvertAtomics; | ||
72 | CodeGenOpt::Level optLevel; | ||
73 | // Run a series of tests to see if we can optimize a CALL instruction. | ||
74 | bool optimizeCallInst(BasicBlock::iterator *bbb); | ||
75 | // A peephole optimization to optimize bit extract sequences. | ||
76 | bool optimizeBitExtract(Instruction *inst); | ||
77 | // A peephole optimization to optimize bit insert sequences. | ||
78 | bool optimizeBitInsert(Instruction *inst); | ||
79 | bool setupBitInsert(Instruction *base, | ||
80 | Instruction *&src, | ||
81 | Constant *&mask, | ||
82 | Constant *&shift); | ||
83 | // Expand the bit field insert instruction on versions of OpenCL that | ||
84 | // don't support it. | ||
85 | bool expandBFI(CallInst *CI); | ||
86 | // Expand the bit field mask instruction on version of OpenCL that | ||
87 | // don't support it. | ||
88 | bool expandBFM(CallInst *CI); | ||
89 | // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in | ||
90 | // this case we need to expand them. These functions check for 24bit functions | ||
91 | // and then expand. | ||
92 | bool isSigned24BitOps(CallInst *CI); | ||
93 | void expandSigned24BitOps(CallInst *CI); | ||
94 | // One optimization that can occur is that if the required workgroup size is | ||
95 | // specified then the result of get_local_size is known at compile time and | ||
96 | // can be returned accordingly. | ||
97 | bool isRWGLocalOpt(CallInst *CI); | ||
98 | // On northern island cards, the division is slightly less accurate than on | ||
99 | // previous generations, so we need to utilize a more accurate division. So we | ||
100 | // can translate the accurate divide to a normal divide on all other cards. | ||
101 | bool convertAccurateDivide(CallInst *CI); | ||
102 | void expandAccurateDivide(CallInst *CI); | ||
103 | // If the alignment is set incorrectly, it can produce really inefficient | ||
104 | // code. This checks for this scenario and fixes it if possible. | ||
105 | bool correctMisalignedMemOp(Instruction *inst); | ||
106 | |||
107 | // If we are in no opt mode, then we need to make sure that | ||
108 | // local samplers are properly propagated as constant propagation | ||
109 | // doesn't occur and we need to know the value of kernel defined | ||
110 | // samplers at compile time. | ||
111 | bool propagateSamplerInst(CallInst *CI); | ||
112 | |||
113 | // Helper functions | ||
114 | |||
115 | // Group of functions that recursively calculate the size of a structure based | ||
116 | // on it's sub-types. | ||
117 | size_t getTypeSize(Type * const T, bool dereferencePtr = false); | ||
118 | size_t getTypeSize(StructType * const ST, bool dereferencePtr = false); | ||
119 | size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false); | ||
120 | size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false); | ||
121 | size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false); | ||
122 | size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false); | ||
123 | size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false); | ||
124 | size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false); | ||
125 | |||
126 | LLVMContext *mCTX; | ||
127 | Function *mF; | ||
128 | const AMDGPUSubtarget *mSTM; | ||
129 | SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs; | ||
130 | SmallVector<CallInst *, 16> isConstVec; | ||
131 | }; // class AMDGPUPeepholeOpt | ||
132 | char AMDGPUPeepholeOpt::ID = 0; | ||
133 | |||
134 | // A template function that has two levels of looping before calling the | ||
135 | // function with a pointer to the current iterator. | ||
136 | template<class InputIterator, class SecondIterator, class Function> | ||
137 | Function safeNestedForEach(InputIterator First, InputIterator Last, | ||
138 | SecondIterator S, Function F) | ||
139 | { | ||
140 | for ( ; First != Last; ++First) { | ||
141 | SecondIterator sf, sl; | ||
142 | for (sf = First->begin(), sl = First->end(); | ||
143 | sf != sl; ) { | ||
144 | if (!F(&sf)) { | ||
145 | ++sf; | ||
146 | } | ||
147 | } | ||
148 | } | ||
149 | return F; | ||
150 | } | ||
151 | |||
152 | } // anonymous namespace | ||
153 | |||
154 | namespace llvm { | ||
155 | FunctionPass * | ||
156 | createAMDGPUPeepholeOpt(TargetMachine &tm) | ||
157 | { | ||
158 | return new AMDGPUPeepholeOpt(tm); | ||
159 | } | ||
160 | } // llvm namespace | ||
161 | |||
162 | AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm) | ||
163 | : FunctionPass(ID), TM(tm) | ||
164 | { | ||
165 | mDebug = false; | ||
166 | optLevel = TM.getOptLevel(); | ||
167 | |||
168 | } | ||
169 | |||
170 | AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() | ||
171 | { | ||
172 | } | ||
173 | |||
174 | const char * | ||
175 | AMDGPUPeepholeOpt::getPassName() const | ||
176 | { | ||
177 | return "AMDGPU PeepHole Optimization Pass"; | ||
178 | } | ||
179 | |||
180 | bool | ||
181 | containsPointerType(Type *Ty) | ||
182 | { | ||
183 | if (!Ty) { | ||
184 | return false; | ||
185 | } | ||
186 | switch(Ty->getTypeID()) { | ||
187 | default: | ||
188 | return false; | ||
189 | case Type::StructTyID: { | ||
190 | const StructType *ST = dyn_cast<StructType>(Ty); | ||
191 | for (StructType::element_iterator stb = ST->element_begin(), | ||
192 | ste = ST->element_end(); stb != ste; ++stb) { | ||
193 | if (!containsPointerType(*stb)) { | ||
194 | continue; | ||
195 | } | ||
196 | return true; | ||
197 | } | ||
198 | break; | ||
199 | } | ||
200 | case Type::VectorTyID: | ||
201 | case Type::ArrayTyID: | ||
202 | return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType()); | ||
203 | case Type::PointerTyID: | ||
204 | return true; | ||
205 | }; | ||
206 | return false; | ||
207 | } | ||
208 | |||
209 | bool | ||
210 | AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) | ||
211 | { | ||
212 | bool dumpAll = false; | ||
213 | for (Function::const_arg_iterator cab = F.arg_begin(), | ||
214 | cae = F.arg_end(); cab != cae; ++cab) { | ||
215 | const Argument *arg = cab; | ||
216 | const PointerType *PT = dyn_cast<PointerType>(arg->getType()); | ||
217 | if (!PT) { | ||
218 | continue; | ||
219 | } | ||
220 | Type *DereferencedType = PT->getElementType(); | ||
221 | if (!dyn_cast<StructType>(DereferencedType) | ||
222 | ) { | ||
223 | continue; | ||
224 | } | ||
225 | if (!containsPointerType(DereferencedType)) { | ||
226 | continue; | ||
227 | } | ||
228 | // FIXME: Because a pointer inside of a struct/union may be aliased to | ||
229 | // another pointer we need to take the conservative approach and place all | ||
230 | // pointers into the arena until more advanced detection is implemented. | ||
231 | dumpAll = true; | ||
232 | } | ||
233 | return dumpAll; | ||
234 | } | ||
235 | void | ||
236 | AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() | ||
237 | { | ||
238 | if (isConstVec.empty()) { | ||
239 | return; | ||
240 | } | ||
241 | for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) { | ||
242 | CallInst *CI = isConstVec[x]; | ||
243 | Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); | ||
244 | Type *aType = Type::getInt32Ty(*mCTX); | ||
245 | Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) | ||
246 | : ConstantInt::get(aType, 0); | ||
247 | CI->replaceAllUsesWith(Val); | ||
248 | CI->eraseFromParent(); | ||
249 | } | ||
250 | isConstVec.clear(); | ||
251 | } | ||
252 | void | ||
253 | AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) | ||
254 | { | ||
255 | // Don't do anything if we don't have any atomic operations. | ||
256 | if (atomicFuncs.empty()) { | ||
257 | return; | ||
258 | } | ||
259 | // Change the function name for the atomic if it is required | ||
260 | uint32_t size = atomicFuncs.size(); | ||
261 | for (uint32_t x = 0; x < size; ++x) { | ||
262 | atomicFuncs[x].first->setOperand( | ||
263 | atomicFuncs[x].first->getNumOperands()-1, | ||
264 | atomicFuncs[x].second); | ||
265 | |||
266 | } | ||
267 | mChanged = true; | ||
268 | if (mConvertAtomics) { | ||
269 | return; | ||
270 | } | ||
271 | } | ||
272 | |||
273 | bool | ||
274 | AMDGPUPeepholeOpt::runOnFunction(Function &MF) | ||
275 | { | ||
276 | mChanged = false; | ||
277 | mF = &MF; | ||
278 | mSTM = &TM.getSubtarget<AMDGPUSubtarget>(); | ||
279 | if (mDebug) { | ||
280 | MF.dump(); | ||
281 | } | ||
282 | mCTX = &MF.getType()->getContext(); | ||
283 | mConvertAtomics = true; | ||
284 | safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), | ||
285 | std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations), | ||
286 | this)); | ||
287 | |||
288 | doAtomicConversionIfNeeded(MF); | ||
289 | doIsConstCallConversionIfNeeded(); | ||
290 | |||
291 | if (mDebug) { | ||
292 | MF.dump(); | ||
293 | } | ||
294 | return mChanged; | ||
295 | } | ||
296 | |||
297 | bool | ||
298 | AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) | ||
299 | { | ||
300 | Instruction *inst = (*bbb); | ||
301 | CallInst *CI = dyn_cast<CallInst>(inst); | ||
302 | if (!CI) { | ||
303 | return false; | ||
304 | } | ||
305 | if (isSigned24BitOps(CI)) { | ||
306 | expandSigned24BitOps(CI); | ||
307 | ++(*bbb); | ||
308 | CI->eraseFromParent(); | ||
309 | return true; | ||
310 | } | ||
311 | if (propagateSamplerInst(CI)) { | ||
312 | return false; | ||
313 | } | ||
314 | if (expandBFI(CI) || expandBFM(CI)) { | ||
315 | ++(*bbb); | ||
316 | CI->eraseFromParent(); | ||
317 | return true; | ||
318 | } | ||
319 | if (convertAccurateDivide(CI)) { | ||
320 | expandAccurateDivide(CI); | ||
321 | ++(*bbb); | ||
322 | CI->eraseFromParent(); | ||
323 | return true; | ||
324 | } | ||
325 | |||
326 | StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName(); | ||
327 | if (calleeName.startswith("__amdil_is_constant")) { | ||
328 | // If we do not have optimizations, then this | ||
329 | // cannot be properly evaluated, so we add the | ||
330 | // call instruction to a vector and process | ||
331 | // them at the end of processing after the | ||
332 | // samplers have been correctly handled. | ||
333 | if (optLevel == CodeGenOpt::None) { | ||
334 | isConstVec.push_back(CI); | ||
335 | return false; | ||
336 | } else { | ||
337 | Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); | ||
338 | Type *aType = Type::getInt32Ty(*mCTX); | ||
339 | Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) | ||
340 | : ConstantInt::get(aType, 0); | ||
341 | CI->replaceAllUsesWith(Val); | ||
342 | ++(*bbb); | ||
343 | CI->eraseFromParent(); | ||
344 | return true; | ||
345 | } | ||
346 | } | ||
347 | |||
348 | if (calleeName.equals("__amdil_is_asic_id_i32")) { | ||
349 | ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0)); | ||
350 | Type *aType = Type::getInt32Ty(*mCTX); | ||
351 | Value *Val = CV; | ||
352 | if (Val) { | ||
353 | Val = ConstantInt::get(aType, | ||
354 | mSTM->device()->getDeviceFlag() & CV->getZExtValue()); | ||
355 | } else { | ||
356 | Val = ConstantInt::get(aType, 0); | ||
357 | } | ||
358 | CI->replaceAllUsesWith(Val); | ||
359 | ++(*bbb); | ||
360 | CI->eraseFromParent(); | ||
361 | return true; | ||
362 | } | ||
363 | Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1)); | ||
364 | if (!F) { | ||
365 | return false; | ||
366 | } | ||
367 | if (F->getName().startswith("__atom") && !CI->getNumUses() | ||
368 | && F->getName().find("_xchg") == StringRef::npos) { | ||
369 | std::string buffer(F->getName().str() + "_noret"); | ||
370 | F = dyn_cast<Function>( | ||
371 | F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); | ||
372 | atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F)); | ||
373 | } | ||
374 | |||
375 | if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment) | ||
376 | && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) { | ||
377 | return false; | ||
378 | } | ||
379 | if (!mConvertAtomics) { | ||
380 | return false; | ||
381 | } | ||
382 | StringRef name = F->getName(); | ||
383 | if (name.startswith("__atom") && name.find("_g") != StringRef::npos) { | ||
384 | mConvertAtomics = false; | ||
385 | } | ||
386 | return false; | ||
387 | } | ||
388 | |||
389 | bool | ||
390 | AMDGPUPeepholeOpt::setupBitInsert(Instruction *base, | ||
391 | Instruction *&src, | ||
392 | Constant *&mask, | ||
393 | Constant *&shift) | ||
394 | { | ||
395 | if (!base) { | ||
396 | if (mDebug) { | ||
397 | dbgs() << "Null pointer passed into function.\n"; | ||
398 | } | ||
399 | return false; | ||
400 | } | ||
401 | bool andOp = false; | ||
402 | if (base->getOpcode() == Instruction::Shl) { | ||
403 | shift = dyn_cast<Constant>(base->getOperand(1)); | ||
404 | } else if (base->getOpcode() == Instruction::And) { | ||
405 | mask = dyn_cast<Constant>(base->getOperand(1)); | ||
406 | andOp = true; | ||
407 | } else { | ||
408 | if (mDebug) { | ||
409 | dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n"; | ||
410 | } | ||
411 | // If the base is neither a Shl or a And, we don't fit any of the patterns above. | ||
412 | return false; | ||
413 | } | ||
414 | src = dyn_cast<Instruction>(base->getOperand(0)); | ||
415 | if (!src) { | ||
416 | if (mDebug) { | ||
417 | dbgs() << "Failed setup since the base operand is not an instruction!\n"; | ||
418 | } | ||
419 | return false; | ||
420 | } | ||
421 | // If we find an 'and' operation, then we don't need to | ||
422 | // find the next operation as we already know the | ||
423 | // bits that are valid at this point. | ||
424 | if (andOp) { | ||
425 | return true; | ||
426 | } | ||
427 | if (src->getOpcode() == Instruction::Shl && !shift) { | ||
428 | shift = dyn_cast<Constant>(src->getOperand(1)); | ||
429 | src = dyn_cast<Instruction>(src->getOperand(0)); | ||
430 | } else if (src->getOpcode() == Instruction::And && !mask) { | ||
431 | mask = dyn_cast<Constant>(src->getOperand(1)); | ||
432 | } | ||
433 | if (!mask && !shift) { | ||
434 | if (mDebug) { | ||
435 | dbgs() << "Failed setup since both mask and shift are NULL!\n"; | ||
436 | } | ||
437 | // Did not find a constant mask or a shift. | ||
438 | return false; | ||
439 | } | ||
440 | return true; | ||
441 | } | ||
442 | bool | ||
443 | AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) | ||
444 | { | ||
445 | if (!inst) { | ||
446 | return false; | ||
447 | } | ||
448 | if (!inst->isBinaryOp()) { | ||
449 | return false; | ||
450 | } | ||
451 | if (inst->getOpcode() != Instruction::Or) { | ||
452 | return false; | ||
453 | } | ||
454 | if (optLevel == CodeGenOpt::None) { | ||
455 | return false; | ||
456 | } | ||
457 | // We want to do an optimization on a sequence of ops that in the end equals a | ||
458 | // single ISA instruction. | ||
459 | // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F) | ||
460 | // Some simplified versions of this pattern are as follows: | ||
461 | // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0 | ||
462 | // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E | ||
463 | // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B | ||
464 | // (A & B) | (D << F) when (1 << F) >= B | ||
465 | // (A << C) | (D & E) when (1 << C) >= E | ||
466 | if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { | ||
467 | // The HD4XXX hardware doesn't support the ubit_insert instruction. | ||
468 | return false; | ||
469 | } | ||
470 | Type *aType = inst->getType(); | ||
471 | bool isVector = aType->isVectorTy(); | ||
472 | int numEle = 1; | ||
473 | // This optimization only works on 32bit integers. | ||
474 | if (aType->getScalarType() | ||
475 | != Type::getInt32Ty(inst->getContext())) { | ||
476 | return false; | ||
477 | } | ||
478 | if (isVector) { | ||
479 | const VectorType *VT = dyn_cast<VectorType>(aType); | ||
480 | numEle = VT->getNumElements(); | ||
481 | // We currently cannot support more than 4 elements in a intrinsic and we | ||
482 | // cannot support Vec3 types. | ||
483 | if (numEle > 4 || numEle == 3) { | ||
484 | return false; | ||
485 | } | ||
486 | } | ||
487 | // TODO: Handle vectors. | ||
488 | if (isVector) { | ||
489 | if (mDebug) { | ||
490 | dbgs() << "!!! Vectors are not supported yet!\n"; | ||
491 | } | ||
492 | return false; | ||
493 | } | ||
494 | Instruction *LHSSrc = NULL, *RHSSrc = NULL; | ||
495 | Constant *LHSMask = NULL, *RHSMask = NULL; | ||
496 | Constant *LHSShift = NULL, *RHSShift = NULL; | ||
497 | Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0)); | ||
498 | Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1)); | ||
499 | if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) { | ||
500 | if (mDebug) { | ||
501 | dbgs() << "Found an OR Operation that failed setup!\n"; | ||
502 | inst->dump(); | ||
503 | if (LHS) { LHS->dump(); } | ||
504 | if (LHSSrc) { LHSSrc->dump(); } | ||
505 | if (LHSMask) { LHSMask->dump(); } | ||
506 | if (LHSShift) { LHSShift->dump(); } | ||
507 | } | ||
508 | // There was an issue with the setup for BitInsert. | ||
509 | return false; | ||
510 | } | ||
511 | if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) { | ||
512 | if (mDebug) { | ||
513 | dbgs() << "Found an OR Operation that failed setup!\n"; | ||
514 | inst->dump(); | ||
515 | if (RHS) { RHS->dump(); } | ||
516 | if (RHSSrc) { RHSSrc->dump(); } | ||
517 | if (RHSMask) { RHSMask->dump(); } | ||
518 | if (RHSShift) { RHSShift->dump(); } | ||
519 | } | ||
520 | // There was an issue with the setup for BitInsert. | ||
521 | return false; | ||
522 | } | ||
523 | if (mDebug) { | ||
524 | dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n"; | ||
525 | dbgs() << "Op: "; inst->dump(); | ||
526 | dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; } | ||
527 | dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; } | ||
528 | dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; } | ||
529 | dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; } | ||
530 | dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; } | ||
531 | dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; } | ||
532 | dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; } | ||
533 | dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; } | ||
534 | } | ||
535 | Constant *offset = NULL; | ||
536 | Constant *width = NULL; | ||
537 | int32_t lhsMaskVal = 0, rhsMaskVal = 0; | ||
538 | int32_t lhsShiftVal = 0, rhsShiftVal = 0; | ||
539 | int32_t lhsMaskWidth = 0, rhsMaskWidth = 0; | ||
540 | int32_t lhsMaskOffset = 0, rhsMaskOffset = 0; | ||
541 | lhsMaskVal = (int32_t)(LHSMask | ||
542 | ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0); | ||
543 | rhsMaskVal = (int32_t)(RHSMask | ||
544 | ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0); | ||
545 | lhsShiftVal = (int32_t)(LHSShift | ||
546 | ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0); | ||
547 | rhsShiftVal = (int32_t)(RHSShift | ||
548 | ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0); | ||
549 | lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal; | ||
550 | rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal; | ||
551 | lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal; | ||
552 | rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal; | ||
553 | // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks). | ||
554 | if (mDebug) { | ||
555 | dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")"); | ||
556 | dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ; | ||
557 | dbgs() << (RHSMask ? " & E)" : ")"); | ||
558 | dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n"); | ||
559 | dbgs() << "A = LHSSrc\t\tD = RHSSrc \n"; | ||
560 | dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n"; | ||
561 | dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n"; | ||
562 | dbgs() << "width(B) = " << lhsMaskWidth; | ||
563 | dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n"; | ||
564 | dbgs() << "offset(B) = " << lhsMaskOffset; | ||
565 | dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n"; | ||
566 | dbgs() << "Constraints: \n"; | ||
567 | dbgs() << "\t(1) B ^ E == 0\n"; | ||
568 | dbgs() << "\t(2-LHS) B is a mask\n"; | ||
569 | dbgs() << "\t(2-LHS) E is a mask\n"; | ||
570 | dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n"; | ||
571 | dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n"; | ||
572 | } | ||
573 | if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) { | ||
574 | if (mDebug) { | ||
575 | dbgs() << lhsMaskVal << " ^ " << rhsMaskVal; | ||
576 | dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n"; | ||
577 | dbgs() << "Failed constraint 1!\n"; | ||
578 | } | ||
579 | return false; | ||
580 | } | ||
581 | if (mDebug) { | ||
582 | dbgs() << "LHS = " << lhsMaskOffset << ""; | ||
583 | dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = "; | ||
584 | dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)); | ||
585 | dbgs() << "\nRHS = " << rhsMaskOffset << ""; | ||
586 | dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = "; | ||
587 | dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)); | ||
588 | dbgs() << "\n"; | ||
589 | } | ||
590 | if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) { | ||
591 | offset = ConstantInt::get(aType, lhsMaskOffset, false); | ||
592 | width = ConstantInt::get(aType, lhsMaskWidth, false); | ||
593 | RHSSrc = RHS; | ||
594 | if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) { | ||
595 | if (mDebug) { | ||
596 | dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n"; | ||
597 | dbgs() << "Failed constraint 2!\n"; | ||
598 | } | ||
599 | return false; | ||
600 | } | ||
601 | if (!LHSShift) { | ||
602 | LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, | ||
603 | "MaskShr", LHS); | ||
604 | } else if (lhsShiftVal != lhsMaskOffset) { | ||
605 | LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, | ||
606 | "MaskShr", LHS); | ||
607 | } | ||
608 | if (mDebug) { | ||
609 | dbgs() << "Optimizing LHS!\n"; | ||
610 | } | ||
611 | } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) { | ||
612 | offset = ConstantInt::get(aType, rhsMaskOffset, false); | ||
613 | width = ConstantInt::get(aType, rhsMaskWidth, false); | ||
614 | LHSSrc = RHSSrc; | ||
615 | RHSSrc = LHS; | ||
616 | if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) { | ||
617 | if (mDebug) { | ||
618 | dbgs() << "Non-Mask: " << rhsMaskVal << "\n"; | ||
619 | dbgs() << "Failed constraint 2!\n"; | ||
620 | } | ||
621 | return false; | ||
622 | } | ||
623 | if (!RHSShift) { | ||
624 | LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, | ||
625 | "MaskShr", RHS); | ||
626 | } else if (rhsShiftVal != rhsMaskOffset) { | ||
627 | LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, | ||
628 | "MaskShr", RHS); | ||
629 | } | ||
630 | if (mDebug) { | ||
631 | dbgs() << "Optimizing RHS!\n"; | ||
632 | } | ||
633 | } else { | ||
634 | if (mDebug) { | ||
635 | dbgs() << "Failed constraint 3!\n"; | ||
636 | } | ||
637 | return false; | ||
638 | } | ||
639 | if (mDebug) { | ||
640 | dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; } | ||
641 | dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; } | ||
642 | dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; } | ||
643 | dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; } | ||
644 | } | ||
645 | if (!offset || !width) { | ||
646 | if (mDebug) { | ||
647 | dbgs() << "Either width or offset are NULL, failed detection!\n"; | ||
648 | } | ||
649 | return false; | ||
650 | } | ||
651 | // Lets create the function signature. | ||
652 | std::vector<Type *> callTypes; | ||
653 | callTypes.push_back(aType); | ||
654 | callTypes.push_back(aType); | ||
655 | callTypes.push_back(aType); | ||
656 | callTypes.push_back(aType); | ||
657 | FunctionType *funcType = FunctionType::get(aType, callTypes, false); | ||
658 | std::string name = "__amdil_ubit_insert"; | ||
659 | if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } | ||
660 | Function *Func = | ||
661 | dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> | ||
662 | getOrInsertFunction(llvm::StringRef(name), funcType)); | ||
663 | Value *Operands[4] = { | ||
664 | width, | ||
665 | offset, | ||
666 | LHSSrc, | ||
667 | RHSSrc | ||
668 | }; | ||
669 | CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt"); | ||
670 | if (mDebug) { | ||
671 | dbgs() << "Old Inst: "; | ||
672 | inst->dump(); | ||
673 | dbgs() << "New Inst: "; | ||
674 | CI->dump(); | ||
675 | dbgs() << "\n\n"; | ||
676 | } | ||
677 | CI->insertBefore(inst); | ||
678 | inst->replaceAllUsesWith(CI); | ||
679 | return true; | ||
680 | } | ||
681 | |||
682 | bool | ||
683 | AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) | ||
684 | { | ||
685 | if (!inst) { | ||
686 | return false; | ||
687 | } | ||
688 | if (!inst->isBinaryOp()) { | ||
689 | return false; | ||
690 | } | ||
691 | if (inst->getOpcode() != Instruction::And) { | ||
692 | return false; | ||
693 | } | ||
694 | if (optLevel == CodeGenOpt::None) { | ||
695 | return false; | ||
696 | } | ||
697 | // We want to do some simple optimizations on Shift right/And patterns. The | ||
698 | // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a | ||
699 | // value smaller than 32 and C is a mask. If C is a constant value, then the | ||
700 | // following transformation can occur. For signed integers, it turns into the | ||
701 | // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned | ||
702 | // integers, it turns into the function call dst = | ||
703 | // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract | ||
704 | // can be found in Section 7.9 of the ATI IL spec of the stream SDK for | ||
705 | // Evergreen hardware. | ||
706 | if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { | ||
707 | // This does not work on HD4XXX hardware. | ||
708 | return false; | ||
709 | } | ||
710 | Type *aType = inst->getType(); | ||
711 | bool isVector = aType->isVectorTy(); | ||
712 | |||
713 | // XXX Support vector types | ||
714 | if (isVector) { | ||
715 | return false; | ||
716 | } | ||
717 | int numEle = 1; | ||
718 | // This only works on 32bit integers | ||
719 | if (aType->getScalarType() | ||
720 | != Type::getInt32Ty(inst->getContext())) { | ||
721 | return false; | ||
722 | } | ||
723 | if (isVector) { | ||
724 | const VectorType *VT = dyn_cast<VectorType>(aType); | ||
725 | numEle = VT->getNumElements(); | ||
726 | // We currently cannot support more than 4 elements in a intrinsic and we | ||
727 | // cannot support Vec3 types. | ||
728 | if (numEle > 4 || numEle == 3) { | ||
729 | return false; | ||
730 | } | ||
731 | } | ||
732 | BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0)); | ||
733 | // If the first operand is not a shift instruction, then we can return as it | ||
734 | // doesn't match this pattern. | ||
735 | if (!ShiftInst || !ShiftInst->isShift()) { | ||
736 | return false; | ||
737 | } | ||
738 | // If we are a shift left, then we need don't match this pattern. | ||
739 | if (ShiftInst->getOpcode() == Instruction::Shl) { | ||
740 | return false; | ||
741 | } | ||
742 | bool isSigned = ShiftInst->isArithmeticShift(); | ||
743 | Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1)); | ||
744 | Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1)); | ||
745 | // Lets make sure that the shift value and the and mask are constant integers. | ||
746 | if (!AndMask || !ShrVal) { | ||
747 | return false; | ||
748 | } | ||
749 | Constant *newMaskConst; | ||
750 | Constant *shiftValConst; | ||
751 | if (isVector) { | ||
752 | // Handle the vector case | ||
753 | std::vector<Constant *> maskVals; | ||
754 | std::vector<Constant *> shiftVals; | ||
755 | ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask); | ||
756 | ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal); | ||
757 | Type *scalarType = AndMaskVec->getType()->getScalarType(); | ||
758 | assert(AndMaskVec->getNumOperands() == | ||
759 | ShrValVec->getNumOperands() && "cannot have a " | ||
760 | "combination where the number of elements to a " | ||
761 | "shift and an and are different!"); | ||
762 | for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) { | ||
763 | ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x)); | ||
764 | ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x)); | ||
765 | if (!AndCI || !ShiftIC) { | ||
766 | return false; | ||
767 | } | ||
768 | uint32_t maskVal = (uint32_t)AndCI->getZExtValue(); | ||
769 | if (!isMask_32(maskVal)) { | ||
770 | return false; | ||
771 | } | ||
772 | maskVal = (uint32_t)CountTrailingOnes_32(maskVal); | ||
773 | uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue(); | ||
774 | // If the mask or shiftval is greater than the bitcount, then break out. | ||
775 | if (maskVal >= 32 || shiftVal >= 32) { | ||
776 | return false; | ||
777 | } | ||
778 | // If the mask val is greater than the the number of original bits left | ||
779 | // then this optimization is invalid. | ||
780 | if (maskVal > (32 - shiftVal)) { | ||
781 | return false; | ||
782 | } | ||
783 | maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned)); | ||
784 | shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned)); | ||
785 | } | ||
786 | newMaskConst = ConstantVector::get(maskVals); | ||
787 | shiftValConst = ConstantVector::get(shiftVals); | ||
788 | } else { | ||
789 | // Handle the scalar case | ||
790 | uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue(); | ||
791 | // This must be a mask value where all lower bits are set to 1 and then any | ||
792 | // bit higher is set to 0. | ||
793 | if (!isMask_32(maskVal)) { | ||
794 | return false; | ||
795 | } | ||
796 | maskVal = (uint32_t)CountTrailingOnes_32(maskVal); | ||
797 | // Count the number of bits set in the mask, this is the width of the | ||
798 | // resulting bit set that is extracted from the source value. | ||
799 | uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue(); | ||
800 | // If the mask or shift val is greater than the bitcount, then break out. | ||
801 | if (maskVal >= 32 || shiftVal >= 32) { | ||
802 | return false; | ||
803 | } | ||
804 | // If the mask val is greater than the the number of original bits left then | ||
805 | // this optimization is invalid. | ||
806 | if (maskVal > (32 - shiftVal)) { | ||
807 | return false; | ||
808 | } | ||
809 | newMaskConst = ConstantInt::get(aType, maskVal, isSigned); | ||
810 | shiftValConst = ConstantInt::get(aType, shiftVal, isSigned); | ||
811 | } | ||
812 | // Lets create the function signature. | ||
813 | std::vector<Type *> callTypes; | ||
814 | callTypes.push_back(aType); | ||
815 | callTypes.push_back(aType); | ||
816 | callTypes.push_back(aType); | ||
817 | FunctionType *funcType = FunctionType::get(aType, callTypes, false); | ||
818 | std::string name = "llvm.AMDIL.bit.extract.u32"; | ||
819 | if (isVector) { | ||
820 | name += ".v" + itostr(numEle) + "i32"; | ||
821 | } else { | ||
822 | name += "."; | ||
823 | } | ||
824 | // Lets create the function. | ||
825 | Function *Func = | ||
826 | dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> | ||
827 | getOrInsertFunction(llvm::StringRef(name), funcType)); | ||
828 | Value *Operands[3] = { | ||
829 | ShiftInst->getOperand(0), | ||
830 | shiftValConst, | ||
831 | newMaskConst | ||
832 | }; | ||
833 | // Lets create the Call with the operands | ||
834 | CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); | ||
835 | CI->setDoesNotAccessMemory(); | ||
836 | CI->insertBefore(inst); | ||
837 | inst->replaceAllUsesWith(CI); | ||
838 | return true; | ||
839 | } | ||
840 | |||
841 | bool | ||
842 | AMDGPUPeepholeOpt::expandBFI(CallInst *CI) | ||
843 | { | ||
844 | if (!CI) { | ||
845 | return false; | ||
846 | } | ||
847 | Value *LHS = CI->getOperand(CI->getNumOperands() - 1); | ||
848 | if (!LHS->getName().startswith("__amdil_bfi")) { | ||
849 | return false; | ||
850 | } | ||
851 | Type* type = CI->getOperand(0)->getType(); | ||
852 | Constant *negOneConst = NULL; | ||
853 | if (type->isVectorTy()) { | ||
854 | std::vector<Constant *> negOneVals; | ||
855 | negOneConst = ConstantInt::get(CI->getContext(), | ||
856 | APInt(32, StringRef("-1"), 10)); | ||
857 | for (size_t x = 0, | ||
858 | y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { | ||
859 | negOneVals.push_back(negOneConst); | ||
860 | } | ||
861 | negOneConst = ConstantVector::get(negOneVals); | ||
862 | } else { | ||
863 | negOneConst = ConstantInt::get(CI->getContext(), | ||
864 | APInt(32, StringRef("-1"), 10)); | ||
865 | } | ||
866 | // __amdil_bfi => (A & B) | (~A & C) | ||
867 | BinaryOperator *lhs = | ||
868 | BinaryOperator::Create(Instruction::And, CI->getOperand(0), | ||
869 | CI->getOperand(1), "bfi_and", CI); | ||
870 | BinaryOperator *rhs = | ||
871 | BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst, | ||
872 | "bfi_not", CI); | ||
873 | rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2), | ||
874 | "bfi_and", CI); | ||
875 | lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI); | ||
876 | CI->replaceAllUsesWith(lhs); | ||
877 | return true; | ||
878 | } | ||
879 | |||
880 | bool | ||
881 | AMDGPUPeepholeOpt::expandBFM(CallInst *CI) | ||
882 | { | ||
883 | if (!CI) { | ||
884 | return false; | ||
885 | } | ||
886 | Value *LHS = CI->getOperand(CI->getNumOperands() - 1); | ||
887 | if (!LHS->getName().startswith("__amdil_bfm")) { | ||
888 | return false; | ||
889 | } | ||
890 | // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f) | ||
891 | Constant *newMaskConst = NULL; | ||
892 | Constant *newShiftConst = NULL; | ||
893 | Type* type = CI->getOperand(0)->getType(); | ||
894 | if (type->isVectorTy()) { | ||
895 | std::vector<Constant*> newMaskVals, newShiftVals; | ||
896 | newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); | ||
897 | newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); | ||
898 | for (size_t x = 0, | ||
899 | y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { | ||
900 | newMaskVals.push_back(newMaskConst); | ||
901 | newShiftVals.push_back(newShiftConst); | ||
902 | } | ||
903 | newMaskConst = ConstantVector::get(newMaskVals); | ||
904 | newShiftConst = ConstantVector::get(newShiftVals); | ||
905 | } else { | ||
906 | newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); | ||
907 | newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); | ||
908 | } | ||
909 | BinaryOperator *lhs = | ||
910 | BinaryOperator::Create(Instruction::And, CI->getOperand(0), | ||
911 | newMaskConst, "bfm_mask", CI); | ||
912 | lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst, | ||
913 | lhs, "bfm_shl", CI); | ||
914 | lhs = BinaryOperator::Create(Instruction::Sub, lhs, | ||
915 | newShiftConst, "bfm_sub", CI); | ||
916 | BinaryOperator *rhs = | ||
917 | BinaryOperator::Create(Instruction::And, CI->getOperand(1), | ||
918 | newMaskConst, "bfm_mask", CI); | ||
919 | lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI); | ||
920 | CI->replaceAllUsesWith(lhs); | ||
921 | return true; | ||
922 | } | ||
923 | |||
924 | bool | ||
925 | AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) | ||
926 | { | ||
927 | Instruction *inst = (*bbb); | ||
928 | if (optimizeCallInst(bbb)) { | ||
929 | return true; | ||
930 | } | ||
931 | if (optimizeBitExtract(inst)) { | ||
932 | return false; | ||
933 | } | ||
934 | if (optimizeBitInsert(inst)) { | ||
935 | return false; | ||
936 | } | ||
937 | if (correctMisalignedMemOp(inst)) { | ||
938 | return false; | ||
939 | } | ||
940 | return false; | ||
941 | } | ||
942 | bool | ||
943 | AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) | ||
944 | { | ||
945 | LoadInst *linst = dyn_cast<LoadInst>(inst); | ||
946 | StoreInst *sinst = dyn_cast<StoreInst>(inst); | ||
947 | unsigned alignment; | ||
948 | Type* Ty = inst->getType(); | ||
949 | if (linst) { | ||
950 | alignment = linst->getAlignment(); | ||
951 | Ty = inst->getType(); | ||
952 | } else if (sinst) { | ||
953 | alignment = sinst->getAlignment(); | ||
954 | Ty = sinst->getValueOperand()->getType(); | ||
955 | } else { | ||
956 | return false; | ||
957 | } | ||
958 | unsigned size = getTypeSize(Ty); | ||
959 | if (size == alignment || size < alignment) { | ||
960 | return false; | ||
961 | } | ||
962 | if (!Ty->isStructTy()) { | ||
963 | return false; | ||
964 | } | ||
965 | if (alignment < 4) { | ||
966 | if (linst) { | ||
967 | linst->setAlignment(0); | ||
968 | return true; | ||
969 | } else if (sinst) { | ||
970 | sinst->setAlignment(0); | ||
971 | return true; | ||
972 | } | ||
973 | } | ||
974 | return false; | ||
975 | } | ||
976 | bool | ||
977 | AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) | ||
978 | { | ||
979 | if (!CI) { | ||
980 | return false; | ||
981 | } | ||
982 | Value *LHS = CI->getOperand(CI->getNumOperands() - 1); | ||
983 | std::string namePrefix = LHS->getName().substr(0, 14); | ||
984 | if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24" | ||
985 | && namePrefix != "__amdil__imul24_high") { | ||
986 | return false; | ||
987 | } | ||
988 | if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) { | ||
989 | return false; | ||
990 | } | ||
991 | return true; | ||
992 | } | ||
993 | |||
994 | void | ||
995 | AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) | ||
996 | { | ||
997 | assert(isSigned24BitOps(CI) && "Must be a " | ||
998 | "signed 24 bit operation to call this function!"); | ||
999 | Value *LHS = CI->getOperand(CI->getNumOperands()-1); | ||
1000 | // On 7XX and 8XX we do not have signed 24bit, so we need to | ||
1001 | // expand it to the following: | ||
1002 | // imul24 turns into 32bit imul | ||
1003 | // imad24 turns into 32bit imad | ||
1004 | // imul24_high turns into 32bit imulhigh | ||
1005 | if (LHS->getName().substr(0, 14) == "__amdil_imad24") { | ||
1006 | Type *aType = CI->getOperand(0)->getType(); | ||
1007 | bool isVector = aType->isVectorTy(); | ||
1008 | int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; | ||
1009 | std::vector<Type*> callTypes; | ||
1010 | callTypes.push_back(CI->getOperand(0)->getType()); | ||
1011 | callTypes.push_back(CI->getOperand(1)->getType()); | ||
1012 | callTypes.push_back(CI->getOperand(2)->getType()); | ||
1013 | FunctionType *funcType = | ||
1014 | FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); | ||
1015 | std::string name = "__amdil_imad"; | ||
1016 | if (isVector) { | ||
1017 | name += "_v" + itostr(numEle) + "i32"; | ||
1018 | } else { | ||
1019 | name += "_i32"; | ||
1020 | } | ||
1021 | Function *Func = dyn_cast<Function>( | ||
1022 | CI->getParent()->getParent()->getParent()-> | ||
1023 | getOrInsertFunction(llvm::StringRef(name), funcType)); | ||
1024 | Value *Operands[3] = { | ||
1025 | CI->getOperand(0), | ||
1026 | CI->getOperand(1), | ||
1027 | CI->getOperand(2) | ||
1028 | }; | ||
1029 | CallInst *nCI = CallInst::Create(Func, Operands, "imad24"); | ||
1030 | nCI->insertBefore(CI); | ||
1031 | CI->replaceAllUsesWith(nCI); | ||
1032 | } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") { | ||
1033 | BinaryOperator *mulOp = | ||
1034 | BinaryOperator::Create(Instruction::Mul, CI->getOperand(0), | ||
1035 | CI->getOperand(1), "imul24", CI); | ||
1036 | CI->replaceAllUsesWith(mulOp); | ||
1037 | } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") { | ||
1038 | Type *aType = CI->getOperand(0)->getType(); | ||
1039 | |||
1040 | bool isVector = aType->isVectorTy(); | ||
1041 | int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; | ||
1042 | std::vector<Type*> callTypes; | ||
1043 | callTypes.push_back(CI->getOperand(0)->getType()); | ||
1044 | callTypes.push_back(CI->getOperand(1)->getType()); | ||
1045 | FunctionType *funcType = | ||
1046 | FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); | ||
1047 | std::string name = "__amdil_imul_high"; | ||
1048 | if (isVector) { | ||
1049 | name += "_v" + itostr(numEle) + "i32"; | ||
1050 | } else { | ||
1051 | name += "_i32"; | ||
1052 | } | ||
1053 | Function *Func = dyn_cast<Function>( | ||
1054 | CI->getParent()->getParent()->getParent()-> | ||
1055 | getOrInsertFunction(llvm::StringRef(name), funcType)); | ||
1056 | Value *Operands[2] = { | ||
1057 | CI->getOperand(0), | ||
1058 | CI->getOperand(1) | ||
1059 | }; | ||
1060 | CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high"); | ||
1061 | nCI->insertBefore(CI); | ||
1062 | CI->replaceAllUsesWith(nCI); | ||
1063 | } | ||
1064 | } | ||
1065 | |||
1066 | bool | ||
1067 | AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) | ||
1068 | { | ||
1069 | return (CI != NULL | ||
1070 | && CI->getOperand(CI->getNumOperands() - 1)->getName() | ||
1071 | == "__amdil_get_local_size_int"); | ||
1072 | } | ||
1073 | |||
1074 | bool | ||
1075 | AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) | ||
1076 | { | ||
1077 | if (!CI) { | ||
1078 | return false; | ||
1079 | } | ||
1080 | if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX | ||
1081 | && (mSTM->getDeviceName() == "cayman")) { | ||
1082 | return false; | ||
1083 | } | ||
1084 | return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) | ||
1085 | == "__amdil_improved_div"; | ||
1086 | } | ||
1087 | |||
1088 | void | ||
1089 | AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) | ||
1090 | { | ||
1091 | assert(convertAccurateDivide(CI) | ||
1092 | && "expanding accurate divide can only happen if it is expandable!"); | ||
1093 | BinaryOperator *divOp = | ||
1094 | BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0), | ||
1095 | CI->getOperand(1), "fdiv32", CI); | ||
1096 | CI->replaceAllUsesWith(divOp); | ||
1097 | } | ||
1098 | |||
1099 | bool | ||
1100 | AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) | ||
1101 | { | ||
1102 | if (optLevel != CodeGenOpt::None) { | ||
1103 | return false; | ||
1104 | } | ||
1105 | |||
1106 | if (!CI) { | ||
1107 | return false; | ||
1108 | } | ||
1109 | |||
1110 | unsigned funcNameIdx = 0; | ||
1111 | funcNameIdx = CI->getNumOperands() - 1; | ||
1112 | StringRef calleeName = CI->getOperand(funcNameIdx)->getName(); | ||
1113 | if (calleeName != "__amdil_image2d_read_norm" | ||
1114 | && calleeName != "__amdil_image2d_read_unnorm" | ||
1115 | && calleeName != "__amdil_image3d_read_norm" | ||
1116 | && calleeName != "__amdil_image3d_read_unnorm") { | ||
1117 | return false; | ||
1118 | } | ||
1119 | |||
1120 | unsigned samplerIdx = 2; | ||
1121 | samplerIdx = 1; | ||
1122 | Value *sampler = CI->getOperand(samplerIdx); | ||
1123 | LoadInst *lInst = dyn_cast<LoadInst>(sampler); | ||
1124 | if (!lInst) { | ||
1125 | return false; | ||
1126 | } | ||
1127 | |||
1128 | if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { | ||
1129 | return false; | ||
1130 | } | ||
1131 | |||
1132 | GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand()); | ||
1133 | // If we are loading from what is not a global value, then we | ||
1134 | // fail and return. | ||
1135 | if (!gv) { | ||
1136 | return false; | ||
1137 | } | ||
1138 | |||
1139 | // If we don't have an initializer or we have an initializer and | ||
1140 | // the initializer is not a 32bit integer, we fail. | ||
1141 | if (!gv->hasInitializer() | ||
1142 | || !gv->getInitializer()->getType()->isIntegerTy(32)) { | ||
1143 | return false; | ||
1144 | } | ||
1145 | |||
1146 | // Now that we have the global variable initializer, lets replace | ||
1147 | // all uses of the load instruction with the samplerVal and | ||
1148 | // reparse the __amdil_is_constant() function. | ||
1149 | Constant *samplerVal = gv->getInitializer(); | ||
1150 | lInst->replaceAllUsesWith(samplerVal); | ||
1151 | return true; | ||
1152 | } | ||
1153 | |||
1154 | bool | ||
1155 | AMDGPUPeepholeOpt::doInitialization(Module &M) | ||
1156 | { | ||
1157 | return false; | ||
1158 | } | ||
1159 | |||
1160 | bool | ||
1161 | AMDGPUPeepholeOpt::doFinalization(Module &M) | ||
1162 | { | ||
1163 | return false; | ||
1164 | } | ||
1165 | |||
1166 | void | ||
1167 | AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const | ||
1168 | { | ||
1169 | AU.addRequired<MachineFunctionAnalysis>(); | ||
1170 | FunctionPass::getAnalysisUsage(AU); | ||
1171 | AU.setPreservesAll(); | ||
1172 | } | ||
1173 | |||
1174 | size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) { | ||
1175 | size_t size = 0; | ||
1176 | if (!T) { | ||
1177 | return size; | ||
1178 | } | ||
1179 | switch (T->getTypeID()) { | ||
1180 | case Type::X86_FP80TyID: | ||
1181 | case Type::FP128TyID: | ||
1182 | case Type::PPC_FP128TyID: | ||
1183 | case Type::LabelTyID: | ||
1184 | assert(0 && "These types are not supported by this backend"); | ||
1185 | default: | ||
1186 | case Type::FloatTyID: | ||
1187 | case Type::DoubleTyID: | ||
1188 | size = T->getPrimitiveSizeInBits() >> 3; | ||
1189 | break; | ||
1190 | case Type::PointerTyID: | ||
1191 | size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr); | ||
1192 | break; | ||
1193 | case Type::IntegerTyID: | ||
1194 | size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr); | ||
1195 | break; | ||
1196 | case Type::StructTyID: | ||
1197 | size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr); | ||
1198 | break; | ||
1199 | case Type::ArrayTyID: | ||
1200 | size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr); | ||
1201 | break; | ||
1202 | case Type::FunctionTyID: | ||
1203 | size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr); | ||
1204 | break; | ||
1205 | case Type::VectorTyID: | ||
1206 | size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr); | ||
1207 | break; | ||
1208 | }; | ||
1209 | return size; | ||
1210 | } | ||
1211 | |||
1212 | size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST, | ||
1213 | bool dereferencePtr) { | ||
1214 | size_t size = 0; | ||
1215 | if (!ST) { | ||
1216 | return size; | ||
1217 | } | ||
1218 | Type *curType; | ||
1219 | StructType::element_iterator eib; | ||
1220 | StructType::element_iterator eie; | ||
1221 | for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { | ||
1222 | curType = *eib; | ||
1223 | size += getTypeSize(curType, dereferencePtr); | ||
1224 | } | ||
1225 | return size; | ||
1226 | } | ||
1227 | |||
1228 | size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT, | ||
1229 | bool dereferencePtr) { | ||
1230 | return IT ? (IT->getBitWidth() >> 3) : 0; | ||
1231 | } | ||
1232 | |||
1233 | size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT, | ||
1234 | bool dereferencePtr) { | ||
1235 | assert(0 && "Should not be able to calculate the size of an function type"); | ||
1236 | return 0; | ||
1237 | } | ||
1238 | |||
1239 | size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT, | ||
1240 | bool dereferencePtr) { | ||
1241 | return (size_t)(AT ? (getTypeSize(AT->getElementType(), | ||
1242 | dereferencePtr) * AT->getNumElements()) | ||
1243 | : 0); | ||
1244 | } | ||
1245 | |||
1246 | size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT, | ||
1247 | bool dereferencePtr) { | ||
1248 | return VT ? (VT->getBitWidth() >> 3) : 0; | ||
1249 | } | ||
1250 | |||
1251 | size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT, | ||
1252 | bool dereferencePtr) { | ||
1253 | if (!PT) { | ||
1254 | return 0; | ||
1255 | } | ||
1256 | Type *CT = PT->getElementType(); | ||
1257 | if (CT->getTypeID() == Type::StructTyID && | ||
1258 | PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { | ||
1259 | return getTypeSize(dyn_cast<StructType>(CT)); | ||
1260 | } else if (dereferencePtr) { | ||
1261 | size_t size = 0; | ||
1262 | for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { | ||
1263 | size += getTypeSize(PT->getContainedType(x), dereferencePtr); | ||
1264 | } | ||
1265 | return size; | ||
1266 | } else { | ||
1267 | return 4; | ||
1268 | } | ||
1269 | } | ||
1270 | |||
1271 | size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT, | ||
1272 | bool dereferencePtr) { | ||
1273 | //assert(0 && "Should not be able to calculate the size of an opaque type"); | ||
1274 | return 4; | ||
1275 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.td b/src/gallium/drivers/radeon/AMDILRegisterInfo.td deleted file mode 100644 index 42235ff37a1..00000000000 --- a/src/gallium/drivers/radeon/AMDILRegisterInfo.td +++ /dev/null | |||
@@ -1,110 +0,0 @@ | |||
1 | //===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Declarations that describe the AMDIL register file | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | class AMDILReg<bits<16> num, string n> : Register<n> { | ||
15 | field bits<16> Value; | ||
16 | let Value = num; | ||
17 | let Namespace = "AMDGPU"; | ||
18 | } | ||
19 | |||
20 | // We will start with 8 registers for each class before expanding to more | ||
21 | // Since the swizzle is added based on the register class, we can leave it | ||
22 | // off here and just specify different registers for different register classes | ||
23 | def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>; | ||
24 | def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>; | ||
25 | def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>; | ||
26 | def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>; | ||
27 | def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>; | ||
28 | def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>; | ||
29 | def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>; | ||
30 | def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>; | ||
31 | def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>; | ||
32 | def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>; | ||
33 | def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>; | ||
34 | def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>; | ||
35 | def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>; | ||
36 | def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>; | ||
37 | def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>; | ||
38 | def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>; | ||
39 | def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>; | ||
40 | def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>; | ||
41 | def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>; | ||
42 | def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>; | ||
43 | |||
44 | // All registers between 1000 and 1024 are reserved and cannot be used | ||
45 | // unless commented in this section | ||
46 | // r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's | ||
47 | // r1020 is used to hold the frame index for local arrays | ||
48 | // r1019 is used to hold the dynamic stack allocation pointer | ||
49 | // r1018 is used as a temporary register for handwritten code | ||
50 | // r1017 is used as a temporary register for handwritten code | ||
51 | // r1016 is used as a temporary register for load/store code | ||
52 | // r1015 is used as a temporary register for data segment offset | ||
53 | // r1014 is used as a temporary register for store code | ||
54 | // r1013 is used as the section data pointer register | ||
55 | // r1012-r1010 and r1001-r1008 are used for temporary I/O registers | ||
56 | // r1009 is used as the frame pointer register | ||
57 | // r999 is used as the mem register. | ||
58 | // r998 is used as the return address register. | ||
59 | //def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>; | ||
60 | //def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>; | ||
61 | //def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>; | ||
62 | //def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>; | ||
63 | //def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>; | ||
64 | //def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>; | ||
65 | def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>; | ||
66 | def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>; | ||
67 | def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>; | ||
68 | def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>; | ||
69 | def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>; | ||
70 | def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>; | ||
71 | def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>; | ||
72 | def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>; | ||
73 | def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>; | ||
74 | def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>; | ||
75 | def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>; | ||
76 | def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>; | ||
77 | def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>; | ||
78 | def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>; | ||
79 | def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>; | ||
80 | def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>; | ||
81 | def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>; | ||
82 | def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>; | ||
83 | def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>; | ||
84 | def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>; | ||
85 | def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>; | ||
86 | def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>; | ||
87 | def GPRI16 : RegisterClass<"AMDGPU", [i16], 16, | ||
88 | (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> | ||
89 | { | ||
90 | let AltOrders = [(add (sequence "R%u", 1, 20))]; | ||
91 | let AltOrderSelect = [{ | ||
92 | return 1; | ||
93 | }]; | ||
94 | } | ||
95 | def GPRI32 : RegisterClass<"AMDGPU", [i32], 32, | ||
96 | (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> | ||
97 | { | ||
98 | let AltOrders = [(add (sequence "R%u", 1, 20))]; | ||
99 | let AltOrderSelect = [{ | ||
100 | return 1; | ||
101 | }]; | ||
102 | } | ||
103 | def GPRF32 : RegisterClass<"AMDGPU", [f32], 32, | ||
104 | (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> | ||
105 | { | ||
106 | let AltOrders = [(add (sequence "R%u", 1, 20))]; | ||
107 | let AltOrderSelect = [{ | ||
108 | return 1; | ||
109 | }]; | ||
110 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.cpp b/src/gallium/drivers/radeon/AMDILSIDevice.cpp deleted file mode 100644 index 856b00f894a..00000000000 --- a/src/gallium/drivers/radeon/AMDILSIDevice.cpp +++ /dev/null | |||
@@ -1,49 +0,0 @@ | |||
1 | //===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | #include "AMDILSIDevice.h" | ||
10 | #include "AMDILEvergreenDevice.h" | ||
11 | #include "AMDILNIDevice.h" | ||
12 | #include "AMDGPUSubtarget.h" | ||
13 | |||
14 | using namespace llvm; | ||
15 | |||
16 | AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST) | ||
17 | : AMDGPUEvergreenDevice(ST) | ||
18 | { | ||
19 | } | ||
20 | AMDGPUSIDevice::~AMDGPUSIDevice() | ||
21 | { | ||
22 | } | ||
23 | |||
24 | size_t | ||
25 | AMDGPUSIDevice::getMaxLDSSize() const | ||
26 | { | ||
27 | if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { | ||
28 | return MAX_LDS_SIZE_900; | ||
29 | } else { | ||
30 | return 0; | ||
31 | } | ||
32 | } | ||
33 | |||
34 | uint32_t | ||
35 | AMDGPUSIDevice::getGeneration() const | ||
36 | { | ||
37 | return AMDGPUDeviceInfo::HD7XXX; | ||
38 | } | ||
39 | |||
40 | std::string | ||
41 | AMDGPUSIDevice::getDataLayout() const | ||
42 | { | ||
43 | return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16" | ||
44 | "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" | ||
45 | "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" | ||
46 | "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" | ||
47 | "-v512:512:512-v1024:1024:1024-v2048:2048:2048" | ||
48 | "-n8:16:32:64"); | ||
49 | } | ||
diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.h b/src/gallium/drivers/radeon/AMDILSIDevice.h deleted file mode 100644 index 6a684cb6095..00000000000 --- a/src/gallium/drivers/radeon/AMDILSIDevice.h +++ /dev/null | |||
@@ -1,45 +0,0 @@ | |||
1 | //===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface for the subtarget data classes. | ||
11 | // | ||
12 | //===---------------------------------------------------------------------===// | ||
13 | // This file will define the interface that each generation needs to | ||
14 | // implement in order to correctly answer queries on the capabilities of the | ||
15 | // specific hardware. | ||
16 | //===---------------------------------------------------------------------===// | ||
17 | #ifndef _AMDILSIDEVICE_H_ | ||
18 | #define _AMDILSIDEVICE_H_ | ||
19 | #include "AMDILEvergreenDevice.h" | ||
20 | #include "AMDGPUSubtarget.h" | ||
21 | |||
22 | namespace llvm { | ||
23 | class AMDGPUSubtarget; | ||
24 | //===---------------------------------------------------------------------===// | ||
25 | // SI generation of devices and their respective sub classes | ||
26 | //===---------------------------------------------------------------------===// | ||
27 | |||
28 | // The AMDGPUSIDevice is the base class for all Northern Island series of | ||
29 | // cards. It is very similiar to the AMDGPUEvergreenDevice, with the major | ||
30 | // exception being differences in wavefront size and hardware capabilities. The | ||
31 | // SI devices are all 64 wide wavefronts and also add support for signed 24 bit | ||
32 | // integer operations | ||
33 | |||
34 | class AMDGPUSIDevice : public AMDGPUEvergreenDevice { | ||
35 | public: | ||
36 | AMDGPUSIDevice(AMDGPUSubtarget*); | ||
37 | virtual ~AMDGPUSIDevice(); | ||
38 | virtual size_t getMaxLDSSize() const; | ||
39 | virtual uint32_t getGeneration() const; | ||
40 | virtual std::string getDataLayout() const; | ||
41 | protected: | ||
42 | }; // AMDGPUSIDevice | ||
43 | |||
44 | } // namespace llvm | ||
45 | #endif // _AMDILSIDEVICE_H_ | ||
diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h deleted file mode 100644 index e6666f97705..00000000000 --- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h +++ /dev/null | |||
@@ -1,75 +0,0 @@ | |||
1 | //===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //==-----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file provides helper macros for expanding case statements. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | #ifndef AMDILUTILITYFUNCTIONS_H_ | ||
14 | #define AMDILUTILITYFUNCTIONS_H_ | ||
15 | |||
16 | // Macros that are used to help with switch statements for various data types | ||
17 | // However, these macro's do not return anything unlike the second set below. | ||
18 | #define ExpandCaseTo32bitIntTypes(Instr) \ | ||
19 | case Instr##_i32: | ||
20 | |||
21 | #define ExpandCaseTo32bitIntTruncTypes(Instr) \ | ||
22 | case Instr##_i32i8: \ | ||
23 | case Instr##_i32i16: | ||
24 | |||
25 | #define ExpandCaseToIntTypes(Instr) \ | ||
26 | ExpandCaseTo32bitIntTypes(Instr) | ||
27 | |||
28 | #define ExpandCaseToIntTruncTypes(Instr) \ | ||
29 | ExpandCaseTo32bitIntTruncTypes(Instr) | ||
30 | |||
31 | #define ExpandCaseToFloatTypes(Instr) \ | ||
32 | case Instr##_f32: | ||
33 | |||
34 | #define ExpandCaseTo32bitScalarTypes(Instr) \ | ||
35 | ExpandCaseTo32bitIntTypes(Instr) \ | ||
36 | case Instr##_f32: | ||
37 | |||
38 | #define ExpandCaseToAllScalarTypes(Instr) \ | ||
39 | ExpandCaseToFloatTypes(Instr) \ | ||
40 | ExpandCaseToIntTypes(Instr) | ||
41 | |||
42 | #define ExpandCaseToAllScalarTruncTypes(Instr) \ | ||
43 | ExpandCaseToFloatTruncTypes(Instr) \ | ||
44 | ExpandCaseToIntTruncTypes(Instr) | ||
45 | |||
46 | #define ExpandCaseToAllTypes(Instr) \ | ||
47 | ExpandCaseToAllScalarTypes(Instr) | ||
48 | |||
49 | #define ExpandCaseToAllTruncTypes(Instr) \ | ||
50 | ExpandCaseToAllScalarTruncTypes(Instr) | ||
51 | |||
52 | // Macros that expand into statements with return values | ||
53 | #define ExpandCaseTo32bitIntReturn(Instr, Return) \ | ||
54 | case Instr##_i32: return Return##_i32; | ||
55 | |||
56 | #define ExpandCaseToIntReturn(Instr, Return) \ | ||
57 | ExpandCaseTo32bitIntReturn(Instr, Return) | ||
58 | |||
59 | #define ExpandCaseToFloatReturn(Instr, Return) \ | ||
60 | case Instr##_f32: return Return##_f32;\ | ||
61 | |||
62 | #define ExpandCaseToAllScalarReturn(Instr, Return) \ | ||
63 | ExpandCaseToFloatReturn(Instr, Return) \ | ||
64 | ExpandCaseToIntReturn(Instr, Return) | ||
65 | |||
66 | // These macros expand to common groupings of RegClass ID's | ||
67 | #define ExpandCaseTo1CompRegID \ | ||
68 | case AMDGPU::GPRI32RegClassID: \ | ||
69 | case AMDGPU::GPRF32RegClassID: | ||
70 | |||
71 | #define ExpandCaseTo32BitType(Instr) \ | ||
72 | case Instr##_i32: \ | ||
73 | case Instr##_f32: | ||
74 | |||
75 | #endif // AMDILUTILITYFUNCTIONS_H_ | ||
diff --git a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.cpp b/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.cpp deleted file mode 100644 index b6ab9b22fb1..00000000000 --- a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.cpp +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | |||
2 | #include "AMDGPUInstPrinter.h" | ||
3 | #include "llvm/MC/MCInst.h" | ||
4 | |||
5 | using namespace llvm; | ||
6 | |||
7 | void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, | ||
8 | StringRef Annot) { | ||
9 | printInstruction(MI, OS); | ||
10 | |||
11 | printAnnotation(OS, Annot); | ||
12 | } | ||
13 | |||
14 | void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, | ||
15 | raw_ostream &O) { | ||
16 | |||
17 | const MCOperand &Op = MI->getOperand(OpNo); | ||
18 | if (Op.isReg()) { | ||
19 | O << getRegisterName(Op.getReg()); | ||
20 | } else if (Op.isImm()) { | ||
21 | O << Op.getImm(); | ||
22 | } else if (Op.isFPImm()) { | ||
23 | O << Op.getFPImm(); | ||
24 | } else { | ||
25 | assert(!"unknown operand type in printOperand"); | ||
26 | } | ||
27 | } | ||
28 | |||
29 | void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, | ||
30 | raw_ostream &O) { | ||
31 | printOperand(MI, OpNo, O); | ||
32 | } | ||
33 | |||
34 | #include "AMDGPUGenAsmWriter.inc" | ||
diff --git a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.h b/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.h deleted file mode 100644 index 62c1a5ee04f..00000000000 --- a/src/gallium/drivers/radeon/InstPrinter/AMDGPUInstPrinter.h +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | |||
2 | #ifndef AMDGPUINSTPRINTER_H | ||
3 | #define AMDGPUINSTPRINTER_H | ||
4 | |||
5 | #include "llvm/ADT/StringRef.h" | ||
6 | #include "llvm/MC/MCInstPrinter.h" | ||
7 | #include "llvm/Support/raw_ostream.h" | ||
8 | |||
9 | namespace llvm { | ||
10 | |||
11 | class AMDGPUInstPrinter : public MCInstPrinter { | ||
12 | public: | ||
13 | AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, | ||
14 | const MCRegisterInfo &MRI) | ||
15 | : MCInstPrinter(MAI, MII, MRI) {} | ||
16 | |||
17 | //Autogenerated by tblgen | ||
18 | void printInstruction(const MCInst *MI, raw_ostream &O); | ||
19 | static const char *getRegisterName(unsigned RegNo); | ||
20 | |||
21 | // virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; | ||
22 | virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); | ||
23 | |||
24 | private: | ||
25 | void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); | ||
26 | // void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O); | ||
27 | void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); | ||
28 | |||
29 | |||
30 | }; | ||
31 | |||
32 | } // End namespace llvm | ||
33 | |||
34 | #endif // AMDGPUINSTRPRINTER_H | ||
diff --git a/src/gallium/drivers/radeon/LICENSE.TXT b/src/gallium/drivers/radeon/LICENSE.TXT deleted file mode 100644 index a57de2e87a1..00000000000 --- a/src/gallium/drivers/radeon/LICENSE.TXT +++ /dev/null | |||
@@ -1,43 +0,0 @@ | |||
1 | ============================================================================== | ||
2 | LLVM Release License | ||
3 | ============================================================================== | ||
4 | University of Illinois/NCSA | ||
5 | Open Source License | ||
6 | |||
7 | Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign. | ||
8 | All rights reserved. | ||
9 | |||
10 | Developed by: | ||
11 | |||
12 | LLVM Team | ||
13 | |||
14 | University of Illinois at Urbana-Champaign | ||
15 | |||
16 | http://llvm.org | ||
17 | |||
18 | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||
19 | this software and associated documentation files (the "Software"), to deal with | ||
20 | the Software without restriction, including without limitation the rights to | ||
21 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | ||
22 | of the Software, and to permit persons to whom the Software is furnished to do | ||
23 | so, subject to the following conditions: | ||
24 | |||
25 | * Redistributions of source code must retain the above copyright notice, | ||
26 | this list of conditions and the following disclaimers. | ||
27 | |||
28 | * Redistributions in binary form must reproduce the above copyright notice, | ||
29 | this list of conditions and the following disclaimers in the | ||
30 | documentation and/or other materials provided with the distribution. | ||
31 | |||
32 | * Neither the names of the LLVM Team, University of Illinois at | ||
33 | Urbana-Champaign, nor the names of its contributors may be used to | ||
34 | endorse or promote products derived from this Software without specific | ||
35 | prior written permission. | ||
36 | |||
37 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
38 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | ||
39 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
40 | CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
41 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
42 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE | ||
43 | SOFTWARE. | ||
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp deleted file mode 100644 index 5b31bc6c8f0..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp +++ /dev/null | |||
@@ -1,80 +0,0 @@ | |||
1 | //===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | |||
10 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | ||
11 | #include "llvm/ADT/StringRef.h" | ||
12 | #include "llvm/MC/MCAsmBackend.h" | ||
13 | #include "llvm/MC/MCAssembler.h" | ||
14 | #include "llvm/MC/MCObjectWriter.h" | ||
15 | #include "llvm/MC/MCValue.h" | ||
16 | #include "llvm/Support/TargetRegistry.h" | ||
17 | |||
18 | using namespace llvm; | ||
19 | |||
20 | namespace { | ||
21 | |||
22 | class AMDGPUMCObjectWriter : public MCObjectWriter { | ||
23 | public: | ||
24 | AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { } | ||
25 | virtual void ExecutePostLayoutBinding(MCAssembler &Asm, | ||
26 | const MCAsmLayout &Layout) { | ||
27 | //XXX: Implement if necessary. | ||
28 | } | ||
29 | virtual void RecordRelocation(const MCAssembler &Asm, | ||
30 | const MCAsmLayout &Layout, | ||
31 | const MCFragment *Fragment, | ||
32 | const MCFixup &Fixup, | ||
33 | MCValue Target, uint64_t &FixedValue) { | ||
34 | assert(!"Not implemented"); | ||
35 | } | ||
36 | |||
37 | virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout); | ||
38 | |||
39 | }; | ||
40 | |||
41 | class AMDGPUAsmBackend : public MCAsmBackend { | ||
42 | public: | ||
43 | AMDGPUAsmBackend(const Target &T) | ||
44 | : MCAsmBackend() {} | ||
45 | |||
46 | virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const; | ||
47 | virtual unsigned getNumFixupKinds() const { return 0; }; | ||
48 | virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, | ||
49 | uint64_t Value) const { assert(!"Not implemented"); } | ||
50 | virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, | ||
51 | const MCInstFragment *DF, | ||
52 | const MCAsmLayout &Layout) const { | ||
53 | return false; | ||
54 | } | ||
55 | virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const { | ||
56 | assert(!"Not implemented"); | ||
57 | } | ||
58 | virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; } | ||
59 | virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { | ||
60 | return true; | ||
61 | } | ||
62 | }; | ||
63 | |||
64 | } //End anonymous namespace | ||
65 | |||
66 | void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm, | ||
67 | const MCAsmLayout &Layout) { | ||
68 | for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) { | ||
69 | Asm.writeSectionData(I, Layout); | ||
70 | } | ||
71 | } | ||
72 | |||
73 | MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT) { | ||
74 | return new AMDGPUAsmBackend(T); | ||
75 | } | ||
76 | |||
77 | AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter( | ||
78 | raw_ostream &OS) const { | ||
79 | return new AMDGPUMCObjectWriter(OS); | ||
80 | } | ||
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.cpp deleted file mode 100644 index ccd5a3bfaa9..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ /dev/null | |||
@@ -1,96 +0,0 @@ | |||
1 | //===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | |||
10 | #include "AMDGPUMCAsmInfo.h" | ||
11 | |||
12 | using namespace llvm; | ||
13 | AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() | ||
14 | { | ||
15 | HasSingleParameterDotFile = false; | ||
16 | WeakDefDirective = 0; | ||
17 | //===------------------------------------------------------------------===// | ||
18 | HasSubsectionsViaSymbols = true; | ||
19 | HasMachoZeroFillDirective = false; | ||
20 | HasMachoTBSSDirective = false; | ||
21 | HasStaticCtorDtorReferenceInStaticMode = false; | ||
22 | LinkerRequiresNonEmptyDwarfLines = true; | ||
23 | MaxInstLength = 16; | ||
24 | PCSymbol = "$"; | ||
25 | SeparatorString = "\n"; | ||
26 | CommentColumn = 40; | ||
27 | CommentString = ";"; | ||
28 | LabelSuffix = ":"; | ||
29 | GlobalPrefix = "@"; | ||
30 | PrivateGlobalPrefix = ";."; | ||
31 | LinkerPrivateGlobalPrefix = "!"; | ||
32 | InlineAsmStart = ";#ASMSTART"; | ||
33 | InlineAsmEnd = ";#ASMEND"; | ||
34 | AssemblerDialect = 0; | ||
35 | AllowQuotesInName = false; | ||
36 | AllowNameToStartWithDigit = false; | ||
37 | AllowPeriodsInName = false; | ||
38 | |||
39 | //===--- Data Emission Directives -------------------------------------===// | ||
40 | ZeroDirective = ".zero"; | ||
41 | AsciiDirective = ".ascii\t"; | ||
42 | AscizDirective = ".asciz\t"; | ||
43 | Data8bitsDirective = ".byte\t"; | ||
44 | Data16bitsDirective = ".short\t"; | ||
45 | Data32bitsDirective = ".long\t"; | ||
46 | Data64bitsDirective = ".quad\t"; | ||
47 | GPRel32Directive = 0; | ||
48 | SunStyleELFSectionSwitchSyntax = true; | ||
49 | UsesELFSectionDirectiveForBSS = true; | ||
50 | HasMicrosoftFastStdCallMangling = false; | ||
51 | |||
52 | //===--- Alignment Information ----------------------------------------===// | ||
53 | AlignDirective = ".align\t"; | ||
54 | AlignmentIsInBytes = true; | ||
55 | TextAlignFillValue = 0; | ||
56 | |||
57 | //===--- Global Variable Emission Directives --------------------------===// | ||
58 | GlobalDirective = ".global"; | ||
59 | ExternDirective = ".extern"; | ||
60 | HasSetDirective = false; | ||
61 | HasAggressiveSymbolFolding = true; | ||
62 | LCOMMDirectiveType = LCOMM::None; | ||
63 | COMMDirectiveAlignmentIsInBytes = false; | ||
64 | HasDotTypeDotSizeDirective = false; | ||
65 | HasNoDeadStrip = true; | ||
66 | HasSymbolResolver = false; | ||
67 | WeakRefDirective = ".weakref\t"; | ||
68 | LinkOnceDirective = 0; | ||
69 | //===--- Dwarf Emission Directives -----------------------------------===// | ||
70 | HasLEB128 = true; | ||
71 | SupportsDebugInformation = true; | ||
72 | ExceptionsType = ExceptionHandling::None; | ||
73 | DwarfUsesInlineInfoSection = false; | ||
74 | DwarfSectionOffsetDirective = ".offset"; | ||
75 | DwarfUsesLabelOffsetForRanges = true; | ||
76 | |||
77 | //===--- CBE Asm Translation Table -----------------------------------===// | ||
78 | AsmTransCBE = 0; | ||
79 | } | ||
80 | const char* | ||
81 | AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const | ||
82 | { | ||
83 | switch (AS) { | ||
84 | default: | ||
85 | return 0; | ||
86 | case 0: | ||
87 | return 0; | ||
88 | }; | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | const MCSection* | ||
93 | AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const | ||
94 | { | ||
95 | return 0; | ||
96 | } | ||
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.h b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.h deleted file mode 100644 index 0ca264beb0f..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCAsmInfo.h +++ /dev/null | |||
@@ -1,30 +0,0 @@ | |||
1 | //===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // TODO: Add full description | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #ifndef AMDGPUMCASMINFO_H_ | ||
15 | #define AMDGPUMCASMINFO_H_ | ||
16 | |||
17 | #include "llvm/MC/MCAsmInfo.h" | ||
18 | namespace llvm { | ||
19 | class Target; | ||
20 | class StringRef; | ||
21 | |||
22 | class AMDGPUMCAsmInfo : public MCAsmInfo { | ||
23 | public: | ||
24 | explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT); | ||
25 | const char* | ||
26 | getDataASDirective(unsigned int Size, unsigned int AS) const; | ||
27 | const MCSection* getNonexecutableStackSection(MCContext &CTX) const; | ||
28 | }; | ||
29 | } // namespace llvm | ||
30 | #endif // AMDGPUMCASMINFO_H_ | ||
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h deleted file mode 100644 index a75a8414e6c..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ /dev/null | |||
@@ -1,59 +0,0 @@ | |||
1 | //===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // CodeEmitter interface for R600 and SI codegen. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #ifndef AMDGPUCODEEMITTER_H | ||
15 | #define AMDGPUCODEEMITTER_H | ||
16 | |||
17 | #include "llvm/MC/MCCodeEmitter.h" | ||
18 | #include "llvm/Support/raw_ostream.h" | ||
19 | |||
20 | namespace llvm { | ||
21 | |||
22 | class MCInst; | ||
23 | class MCOperand; | ||
24 | |||
25 | class AMDGPUMCCodeEmitter : public MCCodeEmitter { | ||
26 | public: | ||
27 | |||
28 | uint64_t getBinaryCodeForInstr(const MCInst &MI, | ||
29 | SmallVectorImpl<MCFixup> &Fixups) const; | ||
30 | |||
31 | virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, | ||
32 | SmallVectorImpl<MCFixup> &Fixups) const { | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, | ||
37 | SmallVectorImpl<MCFixup> &Fixups) const { | ||
38 | return 0; | ||
39 | } | ||
40 | virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, | ||
41 | SmallVectorImpl<MCFixup> &Fixups) const { | ||
42 | return 0; | ||
43 | } | ||
44 | virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const { | ||
45 | return Value; | ||
46 | } | ||
47 | virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo, | ||
48 | SmallVectorImpl<MCFixup> &Fixups) const { | ||
49 | return 0; | ||
50 | } | ||
51 | virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, | ||
52 | SmallVectorImpl<MCFixup> &Fixups) const { | ||
53 | return 0; | ||
54 | } | ||
55 | }; | ||
56 | |||
57 | } // End namespace llvm | ||
58 | |||
59 | #endif // AMDGPUCODEEMITTER_H | ||
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.cpp deleted file mode 100644 index 6de20fceda6..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ /dev/null | |||
@@ -1,111 +0,0 @@ | |||
1 | //===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file provides AMDGPU specific target descriptions. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #include "AMDGPUMCTargetDesc.h" | ||
15 | #include "AMDGPUMCAsmInfo.h" | ||
16 | #include "InstPrinter/AMDGPUInstPrinter.h" | ||
17 | #include "llvm/MC/MachineLocation.h" | ||
18 | #include "llvm/MC/MCCodeGenInfo.h" | ||
19 | #include "llvm/MC/MCInstrInfo.h" | ||
20 | #include "llvm/MC/MCRegisterInfo.h" | ||
21 | #include "llvm/MC/MCStreamer.h" | ||
22 | #include "llvm/MC/MCSubtargetInfo.h" | ||
23 | #include "llvm/Support/ErrorHandling.h" | ||
24 | #include "llvm/Support/TargetRegistry.h" | ||
25 | |||
26 | #define GET_INSTRINFO_MC_DESC | ||
27 | #include "AMDGPUGenInstrInfo.inc" | ||
28 | |||
29 | #define GET_SUBTARGETINFO_MC_DESC | ||
30 | #include "AMDGPUGenSubtargetInfo.inc" | ||
31 | |||
32 | #define GET_REGINFO_MC_DESC | ||
33 | #include "AMDGPUGenRegisterInfo.inc" | ||
34 | |||
35 | using namespace llvm; | ||
36 | |||
37 | static MCInstrInfo *createAMDGPUMCInstrInfo() { | ||
38 | MCInstrInfo *X = new MCInstrInfo(); | ||
39 | InitAMDGPUMCInstrInfo(X); | ||
40 | return X; | ||
41 | } | ||
42 | |||
43 | static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) { | ||
44 | MCRegisterInfo *X = new MCRegisterInfo(); | ||
45 | InitAMDGPUMCRegisterInfo(X, 0); | ||
46 | return X; | ||
47 | } | ||
48 | |||
49 | static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU, | ||
50 | StringRef FS) { | ||
51 | MCSubtargetInfo * X = new MCSubtargetInfo(); | ||
52 | InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS); | ||
53 | return X; | ||
54 | } | ||
55 | |||
56 | static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, | ||
57 | CodeModel::Model CM, | ||
58 | CodeGenOpt::Level OL) { | ||
59 | MCCodeGenInfo *X = new MCCodeGenInfo(); | ||
60 | X->InitMCCodeGenInfo(RM, CM, OL); | ||
61 | return X; | ||
62 | } | ||
63 | |||
64 | static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T, | ||
65 | unsigned SyntaxVariant, | ||
66 | const MCAsmInfo &MAI, | ||
67 | const MCInstrInfo &MII, | ||
68 | const MCRegisterInfo &MRI, | ||
69 | const MCSubtargetInfo &STI) { | ||
70 | return new AMDGPUInstPrinter(MAI, MII, MRI); | ||
71 | } | ||
72 | |||
73 | static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII, | ||
74 | const MCSubtargetInfo &STI, | ||
75 | MCContext &Ctx) { | ||
76 | if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) { | ||
77 | return createSIMCCodeEmitter(MCII, STI, Ctx); | ||
78 | } else { | ||
79 | return createR600MCCodeEmitter(MCII, STI, Ctx); | ||
80 | } | ||
81 | } | ||
82 | |||
83 | static MCStreamer *createMCStreamer(const Target &T, StringRef TT, | ||
84 | MCContext &Ctx, MCAsmBackend &MAB, | ||
85 | raw_ostream &_OS, | ||
86 | MCCodeEmitter *_Emitter, | ||
87 | bool RelaxAll, | ||
88 | bool NoExecStack) { | ||
89 | return createPureStreamer(Ctx, MAB, _OS, _Emitter); | ||
90 | } | ||
91 | |||
92 | extern "C" void LLVMInitializeAMDGPUTargetMC() { | ||
93 | |||
94 | RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget); | ||
95 | |||
96 | TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo); | ||
97 | |||
98 | TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo); | ||
99 | |||
100 | TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo); | ||
101 | |||
102 | TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo); | ||
103 | |||
104 | TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter); | ||
105 | |||
106 | TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter); | ||
107 | |||
108 | TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend); | ||
109 | |||
110 | TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer); | ||
111 | } | ||
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.h b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.h deleted file mode 100644 index 328e367401a..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCTargetDesc.h +++ /dev/null | |||
@@ -1,51 +0,0 @@ | |||
1 | //===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file provides AMDGPU specific target descriptions. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | // | ||
14 | |||
15 | #ifndef AMDGPUMCTARGETDESC_H | ||
16 | #define AMDGPUMCTARGETDESC_H | ||
17 | |||
18 | #include "llvm/ADT/StringRef.h" | ||
19 | |||
20 | namespace llvm { | ||
21 | class MCAsmBackend; | ||
22 | class MCCodeEmitter; | ||
23 | class MCContext; | ||
24 | class MCInstrInfo; | ||
25 | class MCRegisterInfo; | ||
26 | class MCSubtargetInfo; | ||
27 | class Target; | ||
28 | |||
29 | extern Target TheAMDGPUTarget; | ||
30 | |||
31 | MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, | ||
32 | const MCSubtargetInfo &STI, | ||
33 | MCContext &Ctx); | ||
34 | |||
35 | MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, | ||
36 | const MCSubtargetInfo &STI, | ||
37 | MCContext &Ctx); | ||
38 | |||
39 | MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT); | ||
40 | } // End llvm namespace | ||
41 | |||
42 | #define GET_REGINFO_ENUM | ||
43 | #include "AMDGPUGenRegisterInfo.inc" | ||
44 | |||
45 | #define GET_INSTRINFO_ENUM | ||
46 | #include "AMDGPUGenInstrInfo.inc" | ||
47 | |||
48 | #define GET_SUBTARGETINFO_ENUM | ||
49 | #include "AMDGPUGenSubtargetInfo.inc" | ||
50 | |||
51 | #endif // AMDGPUMCTARGETDESC_H | ||
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp deleted file mode 100644 index a11f48234cb..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp +++ /dev/null | |||
@@ -1,727 +0,0 @@ | |||
1 | //===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This code emitters outputs bytecode that is understood by the r600g driver | ||
11 | // in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, | ||
12 | // except that the size of the instruction fields are rounded up to the | ||
13 | // nearest byte. | ||
14 | // | ||
15 | // [1] http://www.mesa3d.org/ | ||
16 | // | ||
17 | //===----------------------------------------------------------------------===// | ||
18 | |||
19 | #include "R600Defines.h" | ||
20 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | ||
21 | #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" | ||
22 | #include "llvm/MC/MCCodeEmitter.h" | ||
23 | #include "llvm/MC/MCContext.h" | ||
24 | #include "llvm/MC/MCInst.h" | ||
25 | #include "llvm/MC/MCInstrInfo.h" | ||
26 | #include "llvm/MC/MCRegisterInfo.h" | ||
27 | #include "llvm/MC/MCSubtargetInfo.h" | ||
28 | #include "llvm/Support/raw_ostream.h" | ||
29 | |||
30 | #include <stdio.h> | ||
31 | |||
32 | #define SRC_BYTE_COUNT 11 | ||
33 | #define DST_BYTE_COUNT 5 | ||
34 | |||
35 | using namespace llvm; | ||
36 | |||
37 | namespace { | ||
38 | |||
39 | class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { | ||
40 | R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT | ||
41 | void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT | ||
42 | const MCInstrInfo &MCII; | ||
43 | const MCSubtargetInfo &STI; | ||
44 | MCContext &Ctx; | ||
45 | |||
46 | public: | ||
47 | |||
48 | R600MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, | ||
49 | MCContext &ctx) | ||
50 | : MCII(mcii), STI(sti), Ctx(ctx) { } | ||
51 | |||
52 | /// EncodeInstruction - Encode the instruction and write it to the OS. | ||
53 | virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, | ||
54 | SmallVectorImpl<MCFixup> &Fixups) const; | ||
55 | |||
56 | /// getMachineOpValue - Reutrn the encoding for an MCOperand. | ||
57 | virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, | ||
58 | SmallVectorImpl<MCFixup> &Fixups) const; | ||
59 | private: | ||
60 | |||
61 | void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, | ||
62 | raw_ostream &OS) const; | ||
63 | void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; | ||
64 | void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value, | ||
65 | raw_ostream &OS) const; | ||
66 | void EmitDst(const MCInst &MI, raw_ostream &OS) const; | ||
67 | void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, | ||
68 | raw_ostream &OS) const; | ||
69 | void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const; | ||
70 | |||
71 | void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const; | ||
72 | |||
73 | void EmitByte(unsigned int byte, raw_ostream &OS) const; | ||
74 | |||
75 | void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const; | ||
76 | |||
77 | void Emit(uint32_t value, raw_ostream &OS) const; | ||
78 | void Emit(uint64_t value, raw_ostream &OS) const; | ||
79 | |||
80 | unsigned getHWRegIndex(unsigned reg) const; | ||
81 | unsigned getHWRegChan(unsigned reg) const; | ||
82 | unsigned getHWReg(unsigned regNo) const; | ||
83 | |||
84 | bool isFCOp(unsigned opcode) const; | ||
85 | bool isTexOp(unsigned opcode) const; | ||
86 | bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const; | ||
87 | |||
88 | /// getHWRegIndexGen - Get the register's hardware index. Implemented in | ||
89 | /// R600HwRegInfo.include. | ||
90 | unsigned getHWRegIndexGen(unsigned int Reg) const; | ||
91 | |||
92 | /// getHWRegChanGen - Get the register's channel. Implemented in | ||
93 | /// R600HwRegInfo.include. | ||
94 | unsigned getHWRegChanGen(unsigned int Reg) const; | ||
95 | }; | ||
96 | |||
97 | } // End anonymous namespace | ||
98 | |||
99 | enum RegElement { | ||
100 | ELEMENT_X = 0, | ||
101 | ELEMENT_Y, | ||
102 | ELEMENT_Z, | ||
103 | ELEMENT_W | ||
104 | }; | ||
105 | |||
106 | enum InstrTypes { | ||
107 | INSTR_ALU = 0, | ||
108 | INSTR_TEX, | ||
109 | INSTR_FC, | ||
110 | INSTR_NATIVE, | ||
111 | INSTR_VTX | ||
112 | }; | ||
113 | |||
114 | enum FCInstr { | ||
115 | FC_IF = 0, | ||
116 | FC_IF_INT, | ||
117 | FC_ELSE, | ||
118 | FC_ENDIF, | ||
119 | FC_BGNLOOP, | ||
120 | FC_ENDLOOP, | ||
121 | FC_BREAK, | ||
122 | FC_BREAK_NZ_INT, | ||
123 | FC_CONTINUE, | ||
124 | FC_BREAK_Z_INT, | ||
125 | FC_BREAK_NZ | ||
126 | }; | ||
127 | |||
128 | enum TextureTypes { | ||
129 | TEXTURE_1D = 1, | ||
130 | TEXTURE_2D, | ||
131 | TEXTURE_3D, | ||
132 | TEXTURE_CUBE, | ||
133 | TEXTURE_RECT, | ||
134 | TEXTURE_SHADOW1D, | ||
135 | TEXTURE_SHADOW2D, | ||
136 | TEXTURE_SHADOWRECT, | ||
137 | TEXTURE_1D_ARRAY, | ||
138 | TEXTURE_2D_ARRAY, | ||
139 | TEXTURE_SHADOW1D_ARRAY, | ||
140 | TEXTURE_SHADOW2D_ARRAY | ||
141 | }; | ||
142 | |||
143 | MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, | ||
144 | const MCSubtargetInfo &STI, | ||
145 | MCContext &Ctx) { | ||
146 | return new R600MCCodeEmitter(MCII, STI, Ctx); | ||
147 | } | ||
148 | |||
149 | void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, | ||
150 | SmallVectorImpl<MCFixup> &Fixups) const { | ||
151 | if (isTexOp(MI.getOpcode())) { | ||
152 | EmitTexInstr(MI, Fixups, OS); | ||
153 | } else if (isFCOp(MI.getOpcode())){ | ||
154 | EmitFCInstr(MI, OS); | ||
155 | } else if (MI.getOpcode() == AMDGPU::RETURN || | ||
156 | MI.getOpcode() == AMDGPU::BUNDLE || | ||
157 | MI.getOpcode() == AMDGPU::KILL) { | ||
158 | return; | ||
159 | } else { | ||
160 | switch(MI.getOpcode()) { | ||
161 | case AMDGPU::RAT_WRITE_CACHELESS_32_eg: | ||
162 | case AMDGPU::RAT_WRITE_CACHELESS_128_eg: | ||
163 | { | ||
164 | uint64_t inst = getBinaryCodeForInstr(MI, Fixups); | ||
165 | EmitByte(INSTR_NATIVE, OS); | ||
166 | Emit(inst, OS); | ||
167 | break; | ||
168 | } | ||
169 | case AMDGPU::CONSTANT_LOAD_eg: | ||
170 | case AMDGPU::VTX_READ_PARAM_i32_eg: | ||
171 | case AMDGPU::VTX_READ_PARAM_f32_eg: | ||
172 | case AMDGPU::VTX_READ_GLOBAL_i8_eg: | ||
173 | case AMDGPU::VTX_READ_GLOBAL_i32_eg: | ||
174 | case AMDGPU::VTX_READ_GLOBAL_f32_eg: | ||
175 | case AMDGPU::VTX_READ_GLOBAL_v4i32_eg: | ||
176 | case AMDGPU::VTX_READ_GLOBAL_v4f32_eg: | ||
177 | { | ||
178 | uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); | ||
179 | uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset | ||
180 | |||
181 | EmitByte(INSTR_VTX, OS); | ||
182 | Emit(InstWord01, OS); | ||
183 | Emit(InstWord2, OS); | ||
184 | break; | ||
185 | } | ||
186 | |||
187 | default: | ||
188 | EmitALUInstr(MI, Fixups, OS); | ||
189 | break; | ||
190 | } | ||
191 | } | ||
192 | } | ||
193 | |||
194 | void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, | ||
195 | SmallVectorImpl<MCFixup> &Fixups, | ||
196 | raw_ostream &OS) const { | ||
197 | const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); | ||
198 | unsigned NumOperands = MI.getNumOperands(); | ||
199 | |||
200 | if(MCDesc.findFirstPredOperandIdx() > -1) | ||
201 | NumOperands--; | ||
202 | |||
203 | if (GET_FLAG_OPERAND_IDX(MCDesc.TSFlags) != 0) | ||
204 | NumOperands--; | ||
205 | |||
206 | if(MI.getOpcode() == AMDGPU::PRED_X) | ||
207 | NumOperands = 2; | ||
208 | |||
209 | // XXX Check if instruction writes a result | ||
210 | if (NumOperands < 1) { | ||
211 | return; | ||
212 | } | ||
213 | |||
214 | // Emit instruction type | ||
215 | EmitByte(INSTR_ALU, OS); | ||
216 | |||
217 | uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); | ||
218 | |||
219 | //older alu have different encoding for instructions with one or two src | ||
220 | //parameters. | ||
221 | if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) && | ||
222 | !(MCDesc.TSFlags & R600_InstFlag::OP3)) { | ||
223 | uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39); | ||
224 | InstWord01 &= ~(0x3FFULL << 39); | ||
225 | InstWord01 |= ISAOpCode << 1; | ||
226 | } | ||
227 | |||
228 | unsigned int OpIndex; | ||
229 | for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) { | ||
230 | // Literal constants are always stored as the last operand. | ||
231 | if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) { | ||
232 | break; | ||
233 | } | ||
234 | EmitSrcISA(MI, OpIndex, InstWord01, OS); | ||
235 | } | ||
236 | |||
237 | // Emit zeros for unused sources | ||
238 | for ( ; OpIndex < 4; OpIndex++) { | ||
239 | EmitNullBytes(SRC_BYTE_COUNT - 6, OS); | ||
240 | } | ||
241 | |||
242 | // Emit destination register | ||
243 | const MCOperand &dstOp = MI.getOperand(0); | ||
244 | if (dstOp.isReg() && dstOp.getReg() != AMDGPU::PREDICATE_BIT) { | ||
245 | //element of destination register | ||
246 | InstWord01 |= uint64_t(getHWRegChan(dstOp.getReg())) << 61; | ||
247 | |||
248 | // isClamped | ||
249 | if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) { | ||
250 | InstWord01 |= 1ULL << 63; | ||
251 | } | ||
252 | |||
253 | // write mask | ||
254 | if (!isFlagSet(MI, 0, MO_FLAG_MASK) && NumOperands < 4) { | ||
255 | InstWord01 |= 1ULL << 36; | ||
256 | } | ||
257 | |||
258 | // XXX: Emit relative addressing mode | ||
259 | } | ||
260 | |||
261 | // Emit ALU | ||
262 | |||
263 | // Emit IsLast (for this instruction group) (1 byte) | ||
264 | if (!isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) { | ||
265 | InstWord01 |= 1ULL << 31; | ||
266 | } | ||
267 | |||
268 | // XXX: Emit push modifier | ||
269 | if(isFlagSet(MI, 1, MO_FLAG_PUSH)) { | ||
270 | InstWord01 |= 1ULL << 34; | ||
271 | } | ||
272 | |||
273 | // XXX: Emit predicate (1 byte) | ||
274 | int PredIdx = MCDesc.findFirstPredOperandIdx(); | ||
275 | if (PredIdx != -1) { | ||
276 | switch(MI.getOperand(PredIdx).getReg()) { | ||
277 | case AMDGPU::PRED_SEL_ZERO: | ||
278 | InstWord01 |= 2ULL << 29; | ||
279 | break; | ||
280 | case AMDGPU::PRED_SEL_ONE: | ||
281 | InstWord01 |= 3ULL << 29; | ||
282 | break; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | //XXX: predicate | ||
287 | //XXX: bank swizzle | ||
288 | //XXX: OMOD | ||
289 | //XXX: index mode | ||
290 | |||
291 | Emit(InstWord01, OS); | ||
292 | } | ||
293 | |||
294 | void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, | ||
295 | raw_ostream &OS) const { | ||
296 | const MCOperand &MO = MI.getOperand(OpIdx); | ||
297 | union { | ||
298 | float f; | ||
299 | uint32_t i; | ||
300 | } Value; | ||
301 | Value.i = 0; | ||
302 | // Emit the source select (2 bytes). For GPRs, this is the register index. | ||
303 | // For other potential instruction operands, (e.g. constant registers) the | ||
304 | // value of the source select is defined in the r600isa docs. | ||
305 | if (MO.isReg()) { | ||
306 | unsigned reg = MO.getReg(); | ||
307 | EmitTwoBytes(getHWReg(reg), OS); | ||
308 | if (reg == AMDGPU::ALU_LITERAL_X) { | ||
309 | unsigned ImmOpIndex = MI.getNumOperands() - 1; | ||
310 | MCOperand ImmOp = MI.getOperand(ImmOpIndex); | ||
311 | if (ImmOp.isFPImm()) { | ||
312 | Value.f = ImmOp.getFPImm(); | ||
313 | } else { | ||
314 | assert(ImmOp.isImm()); | ||
315 | Value.i = ImmOp.getImm(); | ||
316 | } | ||
317 | } | ||
318 | } else { | ||
319 | // XXX: Handle other operand types. | ||
320 | EmitTwoBytes(0, OS); | ||
321 | } | ||
322 | |||
323 | // Emit the source channel (1 byte) | ||
324 | if (MO.isReg()) { | ||
325 | EmitByte(getHWRegChan(MO.getReg()), OS); | ||
326 | } else { | ||
327 | EmitByte(0, OS); | ||
328 | } | ||
329 | |||
330 | // XXX: Emit isNegated (1 byte) | ||
331 | if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) | ||
332 | && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || | ||
333 | (MO.isReg() && | ||
334 | (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ | ||
335 | EmitByte(1, OS); | ||
336 | } else { | ||
337 | EmitByte(0, OS); | ||
338 | } | ||
339 | |||
340 | // Emit isAbsolute (1 byte) | ||
341 | if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { | ||
342 | EmitByte(1, OS); | ||
343 | } else { | ||
344 | EmitByte(0, OS); | ||
345 | } | ||
346 | |||
347 | // XXX: Emit relative addressing mode (1 byte) | ||
348 | EmitByte(0, OS); | ||
349 | |||
350 | // Emit kc_bank, This will be adjusted later by r600_asm | ||
351 | EmitByte(0, OS); | ||
352 | |||
353 | // Emit the literal value, if applicable (4 bytes). | ||
354 | Emit(Value.i, OS); | ||
355 | |||
356 | } | ||
357 | |||
358 | void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx, | ||
359 | uint64_t &Value, raw_ostream &OS) const { | ||
360 | const MCOperand &MO = MI.getOperand(OpIdx); | ||
361 | union { | ||
362 | float f; | ||
363 | uint32_t i; | ||
364 | } InlineConstant; | ||
365 | InlineConstant.i = 0; | ||
366 | // Emit the source select (2 bytes). For GPRs, this is the register index. | ||
367 | // For other potential instruction operands, (e.g. constant registers) the | ||
368 | // value of the source select is defined in the r600isa docs. | ||
369 | if (MO.isReg()) { | ||
370 | unsigned Reg = MO.getReg(); | ||
371 | if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) { | ||
372 | EmitByte(1, OS); | ||
373 | } else { | ||
374 | EmitByte(0, OS); | ||
375 | } | ||
376 | |||
377 | if (Reg == AMDGPU::ALU_LITERAL_X) { | ||
378 | unsigned ImmOpIndex = MI.getNumOperands() - 1; | ||
379 | MCOperand ImmOp = MI.getOperand(ImmOpIndex); | ||
380 | if (ImmOp.isFPImm()) { | ||
381 | InlineConstant.f = ImmOp.getFPImm(); | ||
382 | } else { | ||
383 | assert(ImmOp.isImm()); | ||
384 | InlineConstant.i = ImmOp.getImm(); | ||
385 | } | ||
386 | } | ||
387 | } else { | ||
388 | // XXX: Handle other operand types. | ||
389 | EmitTwoBytes(0, OS); | ||
390 | } | ||
391 | |||
392 | // source channel | ||
393 | uint64_t sourceChannelValue = getHWRegChan(MO.getReg()); | ||
394 | if (OpIdx == 1) | ||
395 | Value |= sourceChannelValue << 10; | ||
396 | if (OpIdx == 2) | ||
397 | Value |= sourceChannelValue << 23; | ||
398 | if (OpIdx == 3) | ||
399 | Value |= sourceChannelValue << 42; | ||
400 | |||
401 | // isNegated | ||
402 | if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) | ||
403 | && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || | ||
404 | (MO.isReg() && | ||
405 | (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ | ||
406 | if (OpIdx == 1) | ||
407 | Value |= 1ULL << 12; | ||
408 | else if (OpIdx == 2) | ||
409 | Value |= 1ULL << 25; | ||
410 | else if (OpIdx == 3) | ||
411 | Value |= 1ULL << 44; | ||
412 | } | ||
413 | |||
414 | // isAbsolute | ||
415 | if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { | ||
416 | assert(OpIdx < 3); | ||
417 | Value |= 1ULL << (32+OpIdx-1); | ||
418 | } | ||
419 | |||
420 | // XXX: relative addressing mode | ||
421 | // XXX: kc_bank | ||
422 | |||
423 | // Emit the literal value, if applicable (4 bytes). | ||
424 | Emit(InlineConstant.i, OS); | ||
425 | |||
426 | } | ||
427 | |||
428 | void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI, | ||
429 | SmallVectorImpl<MCFixup> &Fixups, | ||
430 | raw_ostream &OS) const { | ||
431 | |||
432 | unsigned opcode = MI.getOpcode(); | ||
433 | bool hasOffsets = (opcode == AMDGPU::TEX_LD); | ||
434 | unsigned op_offset = hasOffsets ? 3 : 0; | ||
435 | int64_t sampler = MI.getOperand(op_offset+2).getImm(); | ||
436 | int64_t textureType = MI.getOperand(op_offset+3).getImm(); | ||
437 | unsigned srcSelect[4] = {0, 1, 2, 3}; | ||
438 | |||
439 | // Emit instruction type | ||
440 | EmitByte(1, OS); | ||
441 | |||
442 | // Emit instruction | ||
443 | EmitByte(getBinaryCodeForInstr(MI, Fixups), OS); | ||
444 | |||
445 | // XXX: Emit resource id r600_shader.c uses sampler + 1. Why? | ||
446 | EmitByte(sampler + 1 + 1, OS); | ||
447 | |||
448 | // Emit source register | ||
449 | EmitByte(getHWReg(MI.getOperand(1).getReg()), OS); | ||
450 | |||
451 | // XXX: Emit src isRelativeAddress | ||
452 | EmitByte(0, OS); | ||
453 | |||
454 | // Emit destination register | ||
455 | EmitByte(getHWReg(MI.getOperand(0).getReg()), OS); | ||
456 | |||
457 | // XXX: Emit dst isRealtiveAddress | ||
458 | EmitByte(0, OS); | ||
459 | |||
460 | // XXX: Emit dst select | ||
461 | EmitByte(0, OS); // X | ||
462 | EmitByte(1, OS); // Y | ||
463 | EmitByte(2, OS); // Z | ||
464 | EmitByte(3, OS); // W | ||
465 | |||
466 | // XXX: Emit lod bias | ||
467 | EmitByte(0, OS); | ||
468 | |||
469 | // XXX: Emit coord types | ||
470 | unsigned coordType[4] = {1, 1, 1, 1}; | ||
471 | |||
472 | if (textureType == TEXTURE_RECT | ||
473 | || textureType == TEXTURE_SHADOWRECT) { | ||
474 | coordType[ELEMENT_X] = 0; | ||
475 | coordType[ELEMENT_Y] = 0; | ||
476 | } | ||
477 | |||
478 | if (textureType == TEXTURE_1D_ARRAY | ||
479 | || textureType == TEXTURE_SHADOW1D_ARRAY) { | ||
480 | if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) { | ||
481 | coordType[ELEMENT_Y] = 0; | ||
482 | } else { | ||
483 | coordType[ELEMENT_Z] = 0; | ||
484 | srcSelect[ELEMENT_Z] = ELEMENT_Y; | ||
485 | } | ||
486 | } else if (textureType == TEXTURE_2D_ARRAY | ||
487 | || textureType == TEXTURE_SHADOW2D_ARRAY) { | ||
488 | coordType[ELEMENT_Z] = 0; | ||
489 | } | ||
490 | |||
491 | for (unsigned i = 0; i < 4; i++) { | ||
492 | EmitByte(coordType[i], OS); | ||
493 | } | ||
494 | |||
495 | // XXX: Emit offsets | ||
496 | if (hasOffsets) | ||
497 | for (unsigned i = 2; i < 5; i++) | ||
498 | EmitByte(MI.getOperand(i).getImm()<<1, OS); | ||
499 | else | ||
500 | EmitNullBytes(3, OS); | ||
501 | |||
502 | // Emit sampler id | ||
503 | EmitByte(sampler, OS); | ||
504 | |||
505 | // XXX:Emit source select | ||
506 | if ((textureType == TEXTURE_SHADOW1D | ||
507 | || textureType == TEXTURE_SHADOW2D | ||
508 | || textureType == TEXTURE_SHADOWRECT | ||
509 | || textureType == TEXTURE_SHADOW1D_ARRAY) | ||
510 | && opcode != AMDGPU::TEX_SAMPLE_C_L | ||
511 | && opcode != AMDGPU::TEX_SAMPLE_C_LB) { | ||
512 | srcSelect[ELEMENT_W] = ELEMENT_Z; | ||
513 | } | ||
514 | |||
515 | for (unsigned i = 0; i < 4; i++) { | ||
516 | EmitByte(srcSelect[i], OS); | ||
517 | } | ||
518 | } | ||
519 | |||
520 | void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const { | ||
521 | |||
522 | // Emit instruction type | ||
523 | EmitByte(INSTR_FC, OS); | ||
524 | |||
525 | // Emit SRC | ||
526 | unsigned NumOperands = MI.getNumOperands(); | ||
527 | if (NumOperands > 0) { | ||
528 | assert(NumOperands == 1); | ||
529 | EmitSrc(MI, 0, OS); | ||
530 | } else { | ||
531 | EmitNullBytes(SRC_BYTE_COUNT, OS); | ||
532 | } | ||
533 | |||
534 | // Emit FC Instruction | ||
535 | enum FCInstr instr; | ||
536 | switch (MI.getOpcode()) { | ||
537 | case AMDGPU::BREAK_LOGICALZ_f32: | ||
538 | instr = FC_BREAK; | ||
539 | break; | ||
540 | case AMDGPU::BREAK_LOGICALNZ_f32: | ||
541 | instr = FC_BREAK_NZ; | ||
542 | break; | ||
543 | case AMDGPU::BREAK_LOGICALNZ_i32: | ||
544 | instr = FC_BREAK_NZ_INT; | ||
545 | break; | ||
546 | case AMDGPU::BREAK_LOGICALZ_i32: | ||
547 | instr = FC_BREAK_Z_INT; | ||
548 | break; | ||
549 | case AMDGPU::CONTINUE_LOGICALNZ_f32: | ||
550 | case AMDGPU::CONTINUE_LOGICALNZ_i32: | ||
551 | instr = FC_CONTINUE; | ||
552 | break; | ||
553 | case AMDGPU::IF_LOGICALNZ_f32: | ||
554 | instr = FC_IF; | ||
555 | case AMDGPU::IF_LOGICALNZ_i32: | ||
556 | instr = FC_IF_INT; | ||
557 | break; | ||
558 | case AMDGPU::IF_LOGICALZ_f32: | ||
559 | abort(); | ||
560 | break; | ||
561 | case AMDGPU::ELSE: | ||
562 | instr = FC_ELSE; | ||
563 | break; | ||
564 | case AMDGPU::ENDIF: | ||
565 | instr = FC_ENDIF; | ||
566 | break; | ||
567 | case AMDGPU::ENDLOOP: | ||
568 | instr = FC_ENDLOOP; | ||
569 | break; | ||
570 | case AMDGPU::WHILELOOP: | ||
571 | instr = FC_BGNLOOP; | ||
572 | break; | ||
573 | default: | ||
574 | abort(); | ||
575 | break; | ||
576 | } | ||
577 | EmitByte(instr, OS); | ||
578 | } | ||
579 | |||
580 | void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount, | ||
581 | raw_ostream &OS) const { | ||
582 | |||
583 | for (unsigned int i = 0; i < ByteCount; i++) { | ||
584 | EmitByte(0, OS); | ||
585 | } | ||
586 | } | ||
587 | |||
588 | void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const { | ||
589 | OS.write((uint8_t) Byte & 0xff); | ||
590 | } | ||
591 | |||
592 | void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes, | ||
593 | raw_ostream &OS) const { | ||
594 | OS.write((uint8_t) (Bytes & 0xff)); | ||
595 | OS.write((uint8_t) ((Bytes >> 8) & 0xff)); | ||
596 | } | ||
597 | |||
598 | void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const { | ||
599 | for (unsigned i = 0; i < 4; i++) { | ||
600 | OS.write((uint8_t) ((Value >> (8 * i)) & 0xff)); | ||
601 | } | ||
602 | } | ||
603 | |||
604 | void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const { | ||
605 | for (unsigned i = 0; i < 8; i++) { | ||
606 | EmitByte((Value >> (8 * i)) & 0xff, OS); | ||
607 | } | ||
608 | } | ||
609 | |||
610 | unsigned R600MCCodeEmitter::getHWRegIndex(unsigned reg) const { | ||
611 | switch(reg) { | ||
612 | case AMDGPU::ZERO: return 248; | ||
613 | case AMDGPU::ONE: | ||
614 | case AMDGPU::NEG_ONE: return 249; | ||
615 | case AMDGPU::ONE_INT: return 250; | ||
616 | case AMDGPU::HALF: | ||
617 | case AMDGPU::NEG_HALF: return 252; | ||
618 | case AMDGPU::ALU_LITERAL_X: return 253; | ||
619 | case AMDGPU::PREDICATE_BIT: | ||
620 | case AMDGPU::PRED_SEL_OFF: | ||
621 | case AMDGPU::PRED_SEL_ZERO: | ||
622 | case AMDGPU::PRED_SEL_ONE: | ||
623 | return 0; | ||
624 | default: return getHWRegIndexGen(reg); | ||
625 | } | ||
626 | } | ||
627 | |||
628 | unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const { | ||
629 | switch(reg) { | ||
630 | case AMDGPU::ZERO: | ||
631 | case AMDGPU::ONE: | ||
632 | case AMDGPU::ONE_INT: | ||
633 | case AMDGPU::NEG_ONE: | ||
634 | case AMDGPU::HALF: | ||
635 | case AMDGPU::NEG_HALF: | ||
636 | case AMDGPU::ALU_LITERAL_X: | ||
637 | case AMDGPU::PREDICATE_BIT: | ||
638 | case AMDGPU::PRED_SEL_OFF: | ||
639 | case AMDGPU::PRED_SEL_ZERO: | ||
640 | case AMDGPU::PRED_SEL_ONE: | ||
641 | return 0; | ||
642 | default: return getHWRegChanGen(reg); | ||
643 | } | ||
644 | } | ||
645 | unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const { | ||
646 | unsigned HWReg; | ||
647 | |||
648 | HWReg = getHWRegIndex(RegNo); | ||
649 | if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(RegNo)) { | ||
650 | HWReg += 512; | ||
651 | } | ||
652 | return HWReg; | ||
653 | } | ||
654 | |||
655 | uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, | ||
656 | const MCOperand &MO, | ||
657 | SmallVectorImpl<MCFixup> &Fixup) const { | ||
658 | if (MO.isReg()) { | ||
659 | return getHWRegIndex(MO.getReg()); | ||
660 | } else if (MO.isImm()) { | ||
661 | return MO.getImm(); | ||
662 | } else { | ||
663 | assert(0); | ||
664 | return 0; | ||
665 | } | ||
666 | } | ||
667 | |||
668 | //===----------------------------------------------------------------------===// | ||
669 | // Encoding helper functions | ||
670 | //===----------------------------------------------------------------------===// | ||
671 | |||
672 | bool R600MCCodeEmitter::isFCOp(unsigned opcode) const { | ||
673 | switch(opcode) { | ||
674 | default: return false; | ||
675 | case AMDGPU::BREAK_LOGICALZ_f32: | ||
676 | case AMDGPU::BREAK_LOGICALNZ_i32: | ||
677 | case AMDGPU::BREAK_LOGICALZ_i32: | ||
678 | case AMDGPU::BREAK_LOGICALNZ_f32: | ||
679 | case AMDGPU::CONTINUE_LOGICALNZ_f32: | ||
680 | case AMDGPU::IF_LOGICALNZ_i32: | ||
681 | case AMDGPU::IF_LOGICALZ_f32: | ||
682 | case AMDGPU::ELSE: | ||
683 | case AMDGPU::ENDIF: | ||
684 | case AMDGPU::ENDLOOP: | ||
685 | case AMDGPU::IF_LOGICALNZ_f32: | ||
686 | case AMDGPU::WHILELOOP: | ||
687 | return true; | ||
688 | } | ||
689 | } | ||
690 | |||
691 | bool R600MCCodeEmitter::isTexOp(unsigned opcode) const { | ||
692 | switch(opcode) { | ||
693 | default: return false; | ||
694 | case AMDGPU::TEX_LD: | ||
695 | case AMDGPU::TEX_GET_TEXTURE_RESINFO: | ||
696 | case AMDGPU::TEX_SAMPLE: | ||
697 | case AMDGPU::TEX_SAMPLE_C: | ||
698 | case AMDGPU::TEX_SAMPLE_L: | ||
699 | case AMDGPU::TEX_SAMPLE_C_L: | ||
700 | case AMDGPU::TEX_SAMPLE_LB: | ||
701 | case AMDGPU::TEX_SAMPLE_C_LB: | ||
702 | case AMDGPU::TEX_SAMPLE_G: | ||
703 | case AMDGPU::TEX_SAMPLE_C_G: | ||
704 | case AMDGPU::TEX_GET_GRADIENTS_H: | ||
705 | case AMDGPU::TEX_GET_GRADIENTS_V: | ||
706 | case AMDGPU::TEX_SET_GRADIENTS_H: | ||
707 | case AMDGPU::TEX_SET_GRADIENTS_V: | ||
708 | return true; | ||
709 | } | ||
710 | } | ||
711 | |||
712 | bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand, | ||
713 | unsigned Flag) const { | ||
714 | const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); | ||
715 | unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags); | ||
716 | if (FlagIndex == 0) { | ||
717 | return false; | ||
718 | } | ||
719 | assert(MI.getOperand(FlagIndex).isImm()); | ||
720 | return !!((MI.getOperand(FlagIndex).getImm() >> | ||
721 | (NUM_MO_FLAGS * Operand)) & Flag); | ||
722 | } | ||
723 | #define R600RegisterInfo R600MCCodeEmitter | ||
724 | #include "R600HwRegInfo.include" | ||
725 | #undef R600RegisterInfo | ||
726 | |||
727 | #include "AMDGPUGenMCCodeEmitter.inc" | ||
diff --git a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp deleted file mode 100644 index ca4b579dcce..00000000000 --- a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp +++ /dev/null | |||
@@ -1,296 +0,0 @@ | |||
1 | //===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // The SI code emitter produces machine code that can be executed directly on | ||
11 | // the GPU device. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | ||
16 | #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" | ||
17 | #include "llvm/MC/MCCodeEmitter.h" | ||
18 | #include "llvm/MC/MCContext.h" | ||
19 | #include "llvm/MC/MCInst.h" | ||
20 | #include "llvm/MC/MCInstrInfo.h" | ||
21 | #include "llvm/MC/MCRegisterInfo.h" | ||
22 | #include "llvm/MC/MCSubtargetInfo.h" | ||
23 | #include "llvm/Support/raw_ostream.h" | ||
24 | |||
25 | #define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1)) | ||
26 | #define SI_INSTR_FLAGS_ENCODING_MASK 0xf | ||
27 | |||
28 | // These must be kept in sync with SIInstructions.td and also the | ||
29 | // InstrEncodingInfo array in SIInstrInfo.cpp. | ||
30 | // | ||
31 | // NOTE: This enum is only used to identify the encoding type within LLVM, | ||
32 | // the actual encoding type that is part of the instruction format is different | ||
33 | namespace SIInstrEncodingType { | ||
34 | enum Encoding { | ||
35 | EXP = 0, | ||
36 | LDS = 1, | ||
37 | MIMG = 2, | ||
38 | MTBUF = 3, | ||
39 | MUBUF = 4, | ||
40 | SMRD = 5, | ||
41 | SOP1 = 6, | ||
42 | SOP2 = 7, | ||
43 | SOPC = 8, | ||
44 | SOPK = 9, | ||
45 | SOPP = 10, | ||
46 | VINTRP = 11, | ||
47 | VOP1 = 12, | ||
48 | VOP2 = 13, | ||
49 | VOP3 = 14, | ||
50 | VOPC = 15 | ||
51 | }; | ||
52 | } | ||
53 | |||
54 | using namespace llvm; | ||
55 | |||
56 | namespace { | ||
57 | class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { | ||
58 | SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT | ||
59 | void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT | ||
60 | const MCInstrInfo &MCII; | ||
61 | const MCSubtargetInfo &STI; | ||
62 | MCContext &Ctx; | ||
63 | |||
64 | public: | ||
65 | SIMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, | ||
66 | MCContext &ctx) | ||
67 | : MCII(mcii), STI(sti), Ctx(ctx) { } | ||
68 | |||
69 | ~SIMCCodeEmitter() { } | ||
70 | |||
71 | /// EncodeInstruction - Encode the instruction and write it to the OS. | ||
72 | virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, | ||
73 | SmallVectorImpl<MCFixup> &Fixups) const; | ||
74 | |||
75 | /// getMachineOpValue - Reutrn the encoding for an MCOperand. | ||
76 | virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, | ||
77 | SmallVectorImpl<MCFixup> &Fixups) const; | ||
78 | |||
79 | public: | ||
80 | |||
81 | /// GPRAlign - Encode a sequence of registers with the correct alignment. | ||
82 | unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const; | ||
83 | |||
84 | /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used | ||
85 | virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, | ||
86 | SmallVectorImpl<MCFixup> &Fixup) const; | ||
87 | |||
88 | /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used | ||
89 | virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, | ||
90 | SmallVectorImpl<MCFixup> &Fixup) const; | ||
91 | |||
92 | /// SMRDmemriEncode - Encoding for SMRD indexed loads | ||
93 | virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, | ||
94 | SmallVectorImpl<MCFixup> &Fixup) const; | ||
95 | |||
96 | /// VOPPostEncode - Post-Encoder method for VOP instructions | ||
97 | virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const; | ||
98 | |||
99 | private: | ||
100 | |||
101 | ///getEncodingType = Return this SIInstrEncodingType for this instruction. | ||
102 | unsigned getEncodingType(const MCInst &MI) const; | ||
103 | |||
104 | ///getEncodingBytes - Get then size in bytes of this instructions encoding. | ||
105 | unsigned getEncodingBytes(const MCInst &MI) const; | ||
106 | |||
107 | /// getRegBinaryCode - Returns the hardware encoding for a register | ||
108 | unsigned getRegBinaryCode(unsigned reg) const; | ||
109 | |||
110 | /// getHWRegNum - Generated function that returns the hardware encoding for | ||
111 | /// a register | ||
112 | unsigned getHWRegNum(unsigned reg) const; | ||
113 | |||
114 | }; | ||
115 | |||
116 | } // End anonymous namespace | ||
117 | |||
118 | MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, | ||
119 | const MCSubtargetInfo &STI, | ||
120 | MCContext &Ctx) { | ||
121 | return new SIMCCodeEmitter(MCII, STI, Ctx); | ||
122 | } | ||
123 | |||
124 | void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, | ||
125 | SmallVectorImpl<MCFixup> &Fixups) const { | ||
126 | uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups); | ||
127 | unsigned bytes = getEncodingBytes(MI); | ||
128 | for (unsigned i = 0; i < bytes; i++) { | ||
129 | OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, | ||
134 | const MCOperand &MO, | ||
135 | SmallVectorImpl<MCFixup> &Fixups) const { | ||
136 | if (MO.isReg()) { | ||
137 | return getRegBinaryCode(MO.getReg()); | ||
138 | } else if (MO.isImm()) { | ||
139 | return MO.getImm(); | ||
140 | } else if (MO.isFPImm()) { | ||
141 | // XXX: Not all instructions can use inline literals | ||
142 | // XXX: We should make sure this is a 32-bit constant | ||
143 | union { | ||
144 | float F; | ||
145 | uint32_t I; | ||
146 | } Imm; | ||
147 | Imm.F = MO.getFPImm(); | ||
148 | return Imm.I; | ||
149 | } else{ | ||
150 | llvm_unreachable("Encoding of this operand type is not supported yet."); | ||
151 | } | ||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | //===----------------------------------------------------------------------===// | ||
156 | // Custom Operand Encodings | ||
157 | //===----------------------------------------------------------------------===// | ||
158 | |||
159 | unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo, | ||
160 | unsigned shift) const { | ||
161 | unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg()); | ||
162 | return regCode >> shift; | ||
163 | return 0; | ||
164 | } | ||
165 | unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI, | ||
166 | unsigned OpNo , | ||
167 | SmallVectorImpl<MCFixup> &Fixup) const { | ||
168 | return GPRAlign(MI, OpNo, 1); | ||
169 | } | ||
170 | |||
171 | unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI, | ||
172 | unsigned OpNo, | ||
173 | SmallVectorImpl<MCFixup> &Fixup) const { | ||
174 | return GPRAlign(MI, OpNo, 2); | ||
175 | } | ||
176 | |||
177 | #define SMRD_OFFSET_MASK 0xff | ||
178 | #define SMRD_IMM_SHIFT 8 | ||
179 | #define SMRD_SBASE_MASK 0x3f | ||
180 | #define SMRD_SBASE_SHIFT 9 | ||
181 | /// SMRDmemriEncode - This function is responsibe for encoding the offset | ||
182 | /// and the base ptr for SMRD instructions it should return a bit string in | ||
183 | /// this format: | ||
184 | /// | ||
185 | /// OFFSET = bits{7-0} | ||
186 | /// IMM = bits{8} | ||
187 | /// SBASE = bits{14-9} | ||
188 | /// | ||
189 | uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo, | ||
190 | SmallVectorImpl<MCFixup> &Fixup) const { | ||
191 | uint32_t Encoding; | ||
192 | |||
193 | const MCOperand &OffsetOp = MI.getOperand(OpNo + 1); | ||
194 | |||
195 | //XXX: Use this function for SMRD loads with register offsets | ||
196 | assert(OffsetOp.isImm()); | ||
197 | |||
198 | Encoding = | ||
199 | (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK) | ||
200 | | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit | ||
201 | | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT) | ||
202 | ; | ||
203 | |||
204 | return Encoding; | ||
205 | } | ||
206 | |||
207 | //===----------------------------------------------------------------------===// | ||
208 | // Post Encoder Callbacks | ||
209 | //===----------------------------------------------------------------------===// | ||
210 | |||
211 | uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{ | ||
212 | unsigned encodingType = getEncodingType(MI); | ||
213 | unsigned numSrcOps; | ||
214 | unsigned vgprBitOffset; | ||
215 | |||
216 | if (encodingType == SIInstrEncodingType::VOP3) { | ||
217 | numSrcOps = 3; | ||
218 | vgprBitOffset = 32; | ||
219 | } else { | ||
220 | numSrcOps = 1; | ||
221 | vgprBitOffset = 0; | ||
222 | } | ||
223 | |||
224 | // Add one to skip over the destination reg operand. | ||
225 | for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) { | ||
226 | const MCOperand &MO = MI.getOperand(opIdx); | ||
227 | if (MO.isReg()) { | ||
228 | unsigned reg = MI.getOperand(opIdx).getReg(); | ||
229 | if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) || | ||
230 | AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) { | ||
231 | Value |= (VGPR_BIT(opIdx)) << vgprBitOffset; | ||
232 | } | ||
233 | } else if (MO.isFPImm()) { | ||
234 | union { | ||
235 | float f; | ||
236 | uint32_t i; | ||
237 | } Imm; | ||
238 | // XXX: Not all instructions can use inline literals | ||
239 | // XXX: We should make sure this is a 32-bit constant | ||
240 | Imm.f = MO.getFPImm(); | ||
241 | Value |= ((uint64_t)Imm.i) << 32; | ||
242 | } | ||
243 | } | ||
244 | return Value; | ||
245 | } | ||
246 | |||
247 | //===----------------------------------------------------------------------===// | ||
248 | // Encoding helper functions | ||
249 | //===----------------------------------------------------------------------===// | ||
250 | |||
251 | unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const { | ||
252 | return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK; | ||
253 | } | ||
254 | |||
255 | unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const { | ||
256 | |||
257 | // These instructions aren't real instructions with an encoding type, so | ||
258 | // we need to manually specify their size. | ||
259 | switch (MI.getOpcode()) { | ||
260 | default: break; | ||
261 | case AMDGPU::SI_LOAD_LITERAL_I32: | ||
262 | case AMDGPU::SI_LOAD_LITERAL_F32: | ||
263 | return 4; | ||
264 | } | ||
265 | |||
266 | unsigned encoding_type = getEncodingType(MI); | ||
267 | switch (encoding_type) { | ||
268 | case SIInstrEncodingType::EXP: | ||
269 | case SIInstrEncodingType::LDS: | ||
270 | case SIInstrEncodingType::MUBUF: | ||
271 | case SIInstrEncodingType::MTBUF: | ||
272 | case SIInstrEncodingType::MIMG: | ||
273 | case SIInstrEncodingType::VOP3: | ||
274 | return 8; | ||
275 | default: | ||
276 | return 4; | ||
277 | } | ||
278 | } | ||
279 | |||
280 | |||
281 | unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const { | ||
282 | switch (reg) { | ||
283 | case AMDGPU::VCC: return 106; | ||
284 | case AMDGPU::M0: return 124; | ||
285 | case AMDGPU::EXEC: return 126; | ||
286 | case AMDGPU::EXEC_LO: return 126; | ||
287 | case AMDGPU::EXEC_HI: return 127; | ||
288 | case AMDGPU::SREG_LIT_0: return 128; | ||
289 | case AMDGPU::SI_LITERAL_CONSTANT: return 255; | ||
290 | default: return getHWRegNum(reg); | ||
291 | } | ||
292 | } | ||
293 | |||
294 | #define SIRegisterInfo SIMCCodeEmitter | ||
295 | #include "SIRegisterGetHWRegNum.inc" | ||
296 | #undef SIRegisterInfo | ||
diff --git a/src/gallium/drivers/radeon/Makefile b/src/gallium/drivers/radeon/Makefile index 05dc518a9aa..7934c712195 100644 --- a/src/gallium/drivers/radeon/Makefile +++ b/src/gallium/drivers/radeon/Makefile | |||
@@ -8,74 +8,8 @@ LIBNAME = radeon | |||
8 | 8 | ||
9 | LIBRARY_INCLUDES = -I$(TOP)/include | 9 | LIBRARY_INCLUDES = -I$(TOP)/include |
10 | 10 | ||
11 | TBLGEN = $(LLVM_BINDIR)/llvm-tblgen | ||
12 | |||
13 | CXXFLAGS+= $(LLVM_CXXFLAGS) | 11 | CXXFLAGS+= $(LLVM_CXXFLAGS) |
14 | 12 | ||
15 | ifeq ($(LLVM_VERSION),3.1) | ||
16 | CPP_SOURCES += $(LLVM_CPP_SOURCES) | ||
17 | GENERATED_SOURCES = $(LLVM_GENERATED_SOURCES) | ||
18 | else | ||
19 | CXXFLAGS+= -DEXTERNAL_LLVM | ||
20 | endif | ||
21 | |||
22 | include ../../Makefile.template | 13 | include ../../Makefile.template |
23 | 14 | ||
24 | CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS)) | 15 | CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS)) |
25 | |||
26 | tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3 | ||
27 | |||
28 | HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td) | ||
29 | |||
30 | SIRegisterInfo.td: SIGenRegisterInfo.pl | ||
31 | $(PERL) $^ > $@ | ||
32 | |||
33 | SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl | ||
34 | $(PERL) $^ $@ > /dev/null | ||
35 | |||
36 | R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td | ||
37 | ifeq ($(HAVE_LLVM_INTRINSICS),) | ||
38 | cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td | ||
39 | else | ||
40 | cp R600IntrinsicsOpenCL.td R600Intrinsics.td | ||
41 | endif | ||
42 | |||
43 | R600RegisterInfo.td: R600GenRegisterInfo.pl | ||
44 | $(PERL) $^ > $@ | ||
45 | |||
46 | AMDGPUGenRegisterInfo.inc: $(TD_FILES) | ||
47 | $(call tablegen, -gen-register-info, AMDGPU.td, $@) | ||
48 | |||
49 | AMDGPUGenInstrInfo.inc: $(TD_FILES) | ||
50 | $(call tablegen, -gen-instr-info, AMDGPU.td, $@) | ||
51 | |||
52 | AMDGPUGenAsmWriter.inc: $(TD_FILES) | ||
53 | $(call tablegen, -gen-asm-writer, AMDGPU.td, $@) | ||
54 | |||
55 | AMDGPUGenDAGISel.inc: $(TD_FILES) | ||
56 | $(call tablegen, -gen-dag-isel, AMDGPU.td, $@) | ||
57 | |||
58 | AMDGPUGenCallingConv.inc: $(TD_FILES) | ||
59 | $(call tablegen, -gen-callingconv, AMDGPU.td, $@) | ||
60 | |||
61 | AMDGPUGenSubtargetInfo.inc: $(TD_FILES) | ||
62 | $(call tablegen, -gen-subtarget, AMDGPU.td, $@) | ||
63 | |||
64 | AMDGPUGenEDInfo.inc: $(TD_FILES) | ||
65 | $(call tablegen, -gen-enhanced-disassembly-info, AMDGPU.td, $@) | ||
66 | |||
67 | AMDGPUGenIntrinsics.inc: $(TD_FILES) | ||
68 | $(call tablegen, -gen-tgt-intrinsic, AMDGPU.td, $@) | ||
69 | |||
70 | AMDGPUGenCodeEmitter.inc: $(TD_FILES) | ||
71 | $(call tablegen, -gen-emitter, AMDGPU.td, $@) | ||
72 | |||
73 | AMDGPUGenMCCodeEmitter.inc: $(TD_FILES) | ||
74 | $(call tablegen, -mc-emitter -gen-emitter, AMDGPU.td, $@) | ||
75 | |||
76 | AMDGPUGenDFAPacketizer.inc: $(TD_FILES) | ||
77 | $(call tablegen, -gen-dfa-packetizer, AMDGPU.td, $@) | ||
78 | |||
79 | LOADER_LIBS=$(shell llvm-config --libs bitreader asmparser) | ||
80 | loader: loader.o libradeon.a | ||
81 | gcc -o loader $(LLVM_LDFLAGS) -L/usr/local/lib $(LDFLAGS) loader.o libradeon.a $(LLVM_LIBS) $(LOADER_LIBS) -lpthread -ldl -lstdc++ -lm | ||
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index 5e793422d66..45d2e8f2e76 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources | |||
@@ -1,86 +1,3 @@ | |||
1 | |||
2 | TD_FILES := \ | ||
3 | AMDGPU.td \ | ||
4 | AMDGPUInstrInfo.td \ | ||
5 | AMDGPUInstructions.td \ | ||
6 | AMDGPUIntrinsics.td \ | ||
7 | AMDGPURegisterInfo.td \ | ||
8 | AMDILBase.td \ | ||
9 | AMDILInstrInfo.td \ | ||
10 | AMDILIntrinsics.td \ | ||
11 | AMDILRegisterInfo.td \ | ||
12 | Processors.td \ | ||
13 | R600Instructions.td \ | ||
14 | R600Intrinsics.td \ | ||
15 | R600IntrinsicsNoOpenCL.td \ | ||
16 | R600IntrinsicsOpenCL.td \ | ||
17 | R600RegisterInfo.td \ | ||
18 | R600Schedule.td \ | ||
19 | SIInstrFormats.td \ | ||
20 | SIInstrInfo.td \ | ||
21 | SIInstructions.td \ | ||
22 | SIIntrinsics.td \ | ||
23 | SIRegisterInfo.td \ | ||
24 | SISchedule.td | ||
25 | |||
26 | LLVM_GENERATED_SOURCES := \ | ||
27 | R600Intrinsics.td \ | ||
28 | R600RegisterInfo.td \ | ||
29 | SIRegisterInfo.td \ | ||
30 | SIRegisterGetHWRegNum.inc \ | ||
31 | AMDGPUGenRegisterInfo.inc \ | ||
32 | AMDGPUGenInstrInfo.inc \ | ||
33 | AMDGPUGenAsmWriter.inc \ | ||
34 | AMDGPUGenDAGISel.inc \ | ||
35 | AMDGPUGenCallingConv.inc \ | ||
36 | AMDGPUGenSubtargetInfo.inc \ | ||
37 | AMDGPUGenEDInfo.inc \ | ||
38 | AMDGPUGenIntrinsics.inc \ | ||
39 | AMDGPUGenCodeEmitter.inc \ | ||
40 | AMDGPUGenMCCodeEmitter.inc \ | ||
41 | AMDGPUGenDFAPacketizer.inc | ||
42 | |||
43 | LLVM_CPP_SOURCES := \ | ||
44 | AMDIL7XXDevice.cpp \ | ||
45 | AMDILCFGStructurizer.cpp \ | ||
46 | AMDILDevice.cpp \ | ||
47 | AMDILDeviceInfo.cpp \ | ||
48 | AMDILEvergreenDevice.cpp \ | ||
49 | AMDILFrameLowering.cpp \ | ||
50 | AMDILIntrinsicInfo.cpp \ | ||
51 | AMDILISelDAGToDAG.cpp \ | ||
52 | AMDILISelLowering.cpp \ | ||
53 | AMDILNIDevice.cpp \ | ||
54 | AMDILPeepholeOptimizer.cpp \ | ||
55 | AMDILSIDevice.cpp \ | ||
56 | AMDGPUAsmPrinter.cpp \ | ||
57 | AMDGPUMCInstLower.cpp \ | ||
58 | AMDGPUSubtarget.cpp \ | ||
59 | AMDGPUTargetMachine.cpp \ | ||
60 | AMDGPUISelLowering.cpp \ | ||
61 | AMDGPUConvertToISA.cpp \ | ||
62 | AMDGPUInstrInfo.cpp \ | ||
63 | AMDGPURegisterInfo.cpp \ | ||
64 | R600ExpandSpecialInstrs.cpp \ | ||
65 | R600ISelLowering.cpp \ | ||
66 | R600InstrInfo.cpp \ | ||
67 | R600MachineFunctionInfo.cpp \ | ||
68 | R600RegisterInfo.cpp \ | ||
69 | SIAssignInterpRegs.cpp \ | ||
70 | SIInstrInfo.cpp \ | ||
71 | SIISelLowering.cpp \ | ||
72 | SILowerLiteralConstants.cpp \ | ||
73 | SILowerFlowControl.cpp \ | ||
74 | SIMachineFunctionInfo.cpp \ | ||
75 | SIRegisterInfo.cpp \ | ||
76 | InstPrinter/AMDGPUInstPrinter.cpp \ | ||
77 | MCTargetDesc/AMDGPUMCAsmInfo.cpp \ | ||
78 | MCTargetDesc/AMDGPUAsmBackend.cpp \ | ||
79 | MCTargetDesc/AMDGPUMCTargetDesc.cpp \ | ||
80 | MCTargetDesc/SIMCCodeEmitter.cpp \ | ||
81 | MCTargetDesc/R600MCCodeEmitter.cpp \ | ||
82 | TargetInfo/AMDGPUTargetInfo.cpp \ | ||
83 | |||
84 | CPP_SOURCES := \ | 1 | CPP_SOURCES := \ |
85 | radeon_llvm_emit.cpp | 2 | radeon_llvm_emit.cpp |
86 | 3 | ||
diff --git a/src/gallium/drivers/radeon/Processors.td b/src/gallium/drivers/radeon/Processors.td deleted file mode 100644 index 3469f828fc0..00000000000 --- a/src/gallium/drivers/radeon/Processors.td +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | //===-- Processors.td - TODO: Add brief description -------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // AMDIL processors supported. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features> | ||
15 | : Processor<Name, itin, Features>; | ||
16 | def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>; | ||
17 | def : Proc<"rv710", R600_EG_Itin, []>; | ||
18 | def : Proc<"rv730", R600_EG_Itin, []>; | ||
19 | def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>; | ||
20 | def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; | ||
21 | def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; | ||
22 | def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; | ||
23 | def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; | ||
24 | def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; | ||
25 | def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; | ||
26 | def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; | ||
27 | def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; | ||
28 | def : Proc<"SI", SI_Itin, [Feature64BitPtr]>; | ||
29 | |||
diff --git a/src/gallium/drivers/radeon/R600Defines.h b/src/gallium/drivers/radeon/R600Defines.h deleted file mode 100644 index 20c357cc15f..00000000000 --- a/src/gallium/drivers/radeon/R600Defines.h +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | //===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | |||
10 | |||
11 | // Operand Flags | ||
12 | #define MO_FLAG_CLAMP (1 << 0) | ||
13 | #define MO_FLAG_NEG (1 << 1) | ||
14 | #define MO_FLAG_ABS (1 << 2) | ||
15 | #define MO_FLAG_MASK (1 << 3) | ||
16 | #define MO_FLAG_PUSH (1 << 4) | ||
17 | #define MO_FLAG_NOT_LAST (1 << 5) | ||
18 | #define NUM_MO_FLAGS 6 | ||
19 | |||
20 | // Helper for finding getting the operand index for the instruction flags | ||
21 | // operand. | ||
22 | #define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3) | ||
23 | |||
24 | namespace R600_InstFlag { | ||
25 | enum TIF { | ||
26 | TRANS_ONLY = (1 << 0), | ||
27 | TEX = (1 << 1), | ||
28 | REDUCTION = (1 << 2), | ||
29 | FC = (1 << 3), | ||
30 | TRIG = (1 << 4), | ||
31 | OP3 = (1 << 5), | ||
32 | VECTOR = (1 << 6) | ||
33 | //FlagOperand bits 7, 8 | ||
34 | }; | ||
35 | } | ||
diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp deleted file mode 100644 index d6184e55302..00000000000 --- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp +++ /dev/null | |||
@@ -1,292 +0,0 @@ | |||
1 | //===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // Vector, Reduction, and Cube instructions need to fill the entire instruction | ||
10 | // group to work correctly. This pass expands these individual instructions | ||
11 | // into several instructions that will completely fill the instruction group. | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #include "AMDGPU.h" | ||
15 | #include "R600Defines.h" | ||
16 | #include "R600InstrInfo.h" | ||
17 | #include "R600RegisterInfo.h" | ||
18 | #include "R600MachineFunctionInfo.h" | ||
19 | #include "llvm/CodeGen/MachineFunctionPass.h" | ||
20 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
21 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
22 | |||
23 | using namespace llvm; | ||
24 | |||
25 | namespace { | ||
26 | |||
27 | class R600ExpandSpecialInstrsPass : public MachineFunctionPass { | ||
28 | |||
29 | private: | ||
30 | static char ID; | ||
31 | const R600InstrInfo *TII; | ||
32 | |||
33 | bool ExpandInputPerspective(MachineInstr& MI); | ||
34 | bool ExpandInputConstant(MachineInstr& MI); | ||
35 | |||
36 | public: | ||
37 | R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), | ||
38 | TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } | ||
39 | |||
40 | virtual bool runOnMachineFunction(MachineFunction &MF); | ||
41 | |||
42 | const char *getPassName() const { | ||
43 | return "R600 Expand special instructions pass"; | ||
44 | } | ||
45 | }; | ||
46 | |||
47 | } // End anonymous namespace | ||
48 | |||
49 | char R600ExpandSpecialInstrsPass::ID = 0; | ||
50 | |||
51 | FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { | ||
52 | return new R600ExpandSpecialInstrsPass(TM); | ||
53 | } | ||
54 | |||
55 | bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) | ||
56 | { | ||
57 | const R600RegisterInfo &TRI = TII->getRegisterInfo(); | ||
58 | if (MI.getOpcode() != AMDGPU::input_perspective) | ||
59 | return false; | ||
60 | |||
61 | MachineBasicBlock::iterator I = &MI; | ||
62 | unsigned DstReg = MI.getOperand(0).getReg(); | ||
63 | R600MachineFunctionInfo *MFI = MI.getParent()->getParent() | ||
64 | ->getInfo<R600MachineFunctionInfo>(); | ||
65 | unsigned IJIndexBase; | ||
66 | |||
67 | // In Evergreen ISA doc section 8.3.2 : | ||
68 | // We need to interpolate XY and ZW in two different instruction groups. | ||
69 | // An INTERP_* must occupy all 4 slots of an instruction group. | ||
70 | // Output of INTERP_XY is written in X,Y slots | ||
71 | // Output of INTERP_ZW is written in Z,W slots | ||
72 | // | ||
73 | // Thus interpolation requires the following sequences : | ||
74 | // | ||
75 | // AnyGPR.x = INTERP_ZW; (Write Masked Out) | ||
76 | // AnyGPR.y = INTERP_ZW; (Write Masked Out) | ||
77 | // DstGPR.z = INTERP_ZW; | ||
78 | // DstGPR.w = INTERP_ZW; (End of first IG) | ||
79 | // DstGPR.x = INTERP_XY; | ||
80 | // DstGPR.y = INTERP_XY; | ||
81 | // AnyGPR.z = INTERP_XY; (Write Masked Out) | ||
82 | // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG) | ||
83 | // | ||
84 | switch (MI.getOperand(1).getImm()) { | ||
85 | case 0: | ||
86 | IJIndexBase = MFI->GetIJPerspectiveIndex(); | ||
87 | break; | ||
88 | case 1: | ||
89 | IJIndexBase = MFI->GetIJLinearIndex(); | ||
90 | break; | ||
91 | default: | ||
92 | assert(0 && "Unknow ij index"); | ||
93 | } | ||
94 | |||
95 | for (unsigned i = 0; i < 8; i++) { | ||
96 | unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( | ||
97 | 2 * IJIndexBase + ((i + 1) % 2)); | ||
98 | unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( | ||
99 | 4 * MI.getOperand(2).getImm()); | ||
100 | |||
101 | unsigned Sel; | ||
102 | switch (i % 4) { | ||
103 | case 0:Sel = AMDGPU::sel_x;break; | ||
104 | case 1:Sel = AMDGPU::sel_y;break; | ||
105 | case 2:Sel = AMDGPU::sel_z;break; | ||
106 | case 3:Sel = AMDGPU::sel_w;break; | ||
107 | default:break; | ||
108 | } | ||
109 | |||
110 | unsigned Res = TRI.getSubReg(DstReg, Sel); | ||
111 | |||
112 | const MCInstrDesc &Opcode = (i < 4)? | ||
113 | TII->get(AMDGPU::INTERP_ZW): | ||
114 | TII->get(AMDGPU::INTERP_XY); | ||
115 | |||
116 | MachineInstr *NewMI = BuildMI(*(MI.getParent()), | ||
117 | I, MI.getParent()->findDebugLoc(I), | ||
118 | Opcode, Res) | ||
119 | .addReg(IJIndex) | ||
120 | .addReg(ReadReg) | ||
121 | .addImm(0); | ||
122 | |||
123 | if (!(i> 1 && i < 6)) { | ||
124 | TII->addFlag(NewMI, 0, MO_FLAG_MASK); | ||
125 | } | ||
126 | |||
127 | if (i % 4 != 3) | ||
128 | TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); | ||
129 | } | ||
130 | |||
131 | MI.eraseFromParent(); | ||
132 | |||
133 | return true; | ||
134 | } | ||
135 | |||
136 | bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) | ||
137 | { | ||
138 | const R600RegisterInfo &TRI = TII->getRegisterInfo(); | ||
139 | if (MI.getOpcode() != AMDGPU::input_constant) | ||
140 | return false; | ||
141 | |||
142 | MachineBasicBlock::iterator I = &MI; | ||
143 | unsigned DstReg = MI.getOperand(0).getReg(); | ||
144 | |||
145 | for (unsigned i = 0; i < 4; i++) { | ||
146 | unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( | ||
147 | 4 * MI.getOperand(1).getImm() + i); | ||
148 | |||
149 | unsigned Sel; | ||
150 | switch (i % 4) { | ||
151 | case 0:Sel = AMDGPU::sel_x;break; | ||
152 | case 1:Sel = AMDGPU::sel_y;break; | ||
153 | case 2:Sel = AMDGPU::sel_z;break; | ||
154 | case 3:Sel = AMDGPU::sel_w;break; | ||
155 | default:break; | ||
156 | } | ||
157 | |||
158 | unsigned Res = TRI.getSubReg(DstReg, Sel); | ||
159 | |||
160 | MachineInstr *NewMI = BuildMI(*(MI.getParent()), | ||
161 | I, MI.getParent()->findDebugLoc(I), | ||
162 | TII->get(AMDGPU::INTERP_LOAD_P0), Res) | ||
163 | .addReg(ReadReg) | ||
164 | .addImm(0); | ||
165 | |||
166 | if (i % 4 != 3) | ||
167 | TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); | ||
168 | } | ||
169 | |||
170 | MI.eraseFromParent(); | ||
171 | |||
172 | return true; | ||
173 | } | ||
174 | |||
175 | bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { | ||
176 | |||
177 | const R600RegisterInfo &TRI = TII->getRegisterInfo(); | ||
178 | |||
179 | for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); | ||
180 | BB != BB_E; ++BB) { | ||
181 | MachineBasicBlock &MBB = *BB; | ||
182 | MachineBasicBlock::iterator I = MBB.begin(); | ||
183 | while (I != MBB.end()) { | ||
184 | MachineInstr &MI = *I; | ||
185 | I = llvm::next(I); | ||
186 | |||
187 | if (ExpandInputPerspective(MI)) | ||
188 | continue; | ||
189 | if (ExpandInputConstant(MI)) | ||
190 | continue; | ||
191 | |||
192 | bool IsReduction = TII->isReductionOp(MI.getOpcode()); | ||
193 | bool IsVector = TII->isVector(MI); | ||
194 | bool IsCube = TII->isCubeOp(MI.getOpcode()); | ||
195 | if (!IsReduction && !IsVector && !IsCube) { | ||
196 | continue; | ||
197 | } | ||
198 | |||
199 | // Expand the instruction | ||
200 | // | ||
201 | // Reduction instructions: | ||
202 | // T0_X = DP4 T1_XYZW, T2_XYZW | ||
203 | // becomes: | ||
204 | // TO_X = DP4 T1_X, T2_X | ||
205 | // TO_Y (write masked) = DP4 T1_Y, T2_Y | ||
206 | // TO_Z (write masked) = DP4 T1_Z, T2_Z | ||
207 | // TO_W (write masked) = DP4 T1_W, T2_W | ||
208 | // | ||
209 | // Vector instructions: | ||
210 | // T0_X = MULLO_INT T1_X, T2_X | ||
211 | // becomes: | ||
212 | // T0_X = MULLO_INT T1_X, T2_X | ||
213 | // T0_Y (write masked) = MULLO_INT T1_X, T2_X | ||
214 | // T0_Z (write masked) = MULLO_INT T1_X, T2_X | ||
215 | // T0_W (write masked) = MULLO_INT T1_X, T2_X | ||
216 | // | ||
217 | // Cube instructions: | ||
218 | // T0_XYZW = CUBE T1_XYZW | ||
219 | // becomes: | ||
220 | // TO_X = CUBE T1_Z, T1_Y | ||
221 | // T0_Y = CUBE T1_Z, T1_X | ||
222 | // T0_Z = CUBE T1_X, T1_Z | ||
223 | // T0_W = CUBE T1_Y, T1_Z | ||
224 | for (unsigned Chan = 0; Chan < 4; Chan++) { | ||
225 | unsigned DstReg = MI.getOperand(0).getReg(); | ||
226 | unsigned Src0 = MI.getOperand(1).getReg(); | ||
227 | unsigned Src1 = 0; | ||
228 | |||
229 | // Determine the correct source registers | ||
230 | if (!IsCube) { | ||
231 | Src1 = MI.getOperand(2).getReg(); | ||
232 | } | ||
233 | if (IsReduction) { | ||
234 | unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); | ||
235 | Src0 = TRI.getSubReg(Src0, SubRegIndex); | ||
236 | Src1 = TRI.getSubReg(Src1, SubRegIndex); | ||
237 | } else if (IsCube) { | ||
238 | static const int CubeSrcSwz[] = {2, 2, 0, 1}; | ||
239 | unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); | ||
240 | unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); | ||
241 | Src1 = TRI.getSubReg(Src0, SubRegIndex1); | ||
242 | Src0 = TRI.getSubReg(Src0, SubRegIndex0); | ||
243 | } | ||
244 | |||
245 | // Determine the correct destination registers; | ||
246 | unsigned Flags = 0; | ||
247 | if (IsCube) { | ||
248 | unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); | ||
249 | DstReg = TRI.getSubReg(DstReg, SubRegIndex); | ||
250 | } else { | ||
251 | // Mask the write if the original instruction does not write to | ||
252 | // the current Channel. | ||
253 | Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); | ||
254 | unsigned DstBase = TRI.getHWRegIndex(DstReg); | ||
255 | DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); | ||
256 | } | ||
257 | |||
258 | // Set the IsLast bit | ||
259 | Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0); | ||
260 | |||
261 | // Add the new instruction | ||
262 | unsigned Opcode; | ||
263 | if (IsCube) { | ||
264 | switch (MI.getOpcode()) { | ||
265 | case AMDGPU::CUBE_r600_pseudo: | ||
266 | Opcode = AMDGPU::CUBE_r600_real; | ||
267 | break; | ||
268 | case AMDGPU::CUBE_eg_pseudo: | ||
269 | Opcode = AMDGPU::CUBE_eg_real; | ||
270 | break; | ||
271 | default: | ||
272 | assert(!"Unknown CUBE instruction"); | ||
273 | Opcode = 0; | ||
274 | break; | ||
275 | } | ||
276 | } else { | ||
277 | Opcode = MI.getOpcode(); | ||
278 | } | ||
279 | MachineInstr *NewMI = | ||
280 | BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg) | ||
281 | .addReg(Src0) | ||
282 | .addReg(Src1) | ||
283 | .addImm(0); // Flag | ||
284 | |||
285 | NewMI->setIsInsideBundle(Chan != 0); | ||
286 | TII->addFlag(NewMI, 0, Flags); | ||
287 | } | ||
288 | MI.eraseFromParent(); | ||
289 | } | ||
290 | } | ||
291 | return false; | ||
292 | } | ||
diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl deleted file mode 100644 index c0a05f54cae..00000000000 --- a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl +++ /dev/null | |||
@@ -1,206 +0,0 @@ | |||
1 | #===-- R600GenRegisterInfo.pl - Script for generating register info files --===# | ||
2 | # | ||
3 | # The LLVM Compiler Infrastructure | ||
4 | # | ||
5 | # This file is distributed under the University of Illinois Open Source | ||
6 | # License. See LICENSE.TXT for details. | ||
7 | # | ||
8 | #===------------------------------------------------------------------------===# | ||
9 | # | ||
10 | # This perl script prints to stdout .td code to be used as R600RegisterInfo.td | ||
11 | # it also generates a file called R600HwRegInfo.include, which contains helper | ||
12 | # functions for determining the hw encoding of registers. | ||
13 | # | ||
14 | #===------------------------------------------------------------------------===# | ||
15 | |||
16 | use strict; | ||
17 | use warnings; | ||
18 | |||
19 | use constant CONST_REG_COUNT => 512; | ||
20 | use constant TEMP_REG_COUNT => 128; | ||
21 | |||
22 | my $CREG_MAX = CONST_REG_COUNT - 1; | ||
23 | my $TREG_MAX = TEMP_REG_COUNT - 1; | ||
24 | |||
25 | print <<STRING; | ||
26 | |||
27 | class R600Reg <string name> : Register<name> { | ||
28 | let Namespace = "AMDGPU"; | ||
29 | } | ||
30 | |||
31 | class R600Reg_128<string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> { | ||
32 | let Namespace = "AMDGPU"; | ||
33 | let SubRegIndices = [sel_x, sel_y, sel_z, sel_w]; | ||
34 | } | ||
35 | |||
36 | STRING | ||
37 | |||
38 | my $i; | ||
39 | |||
40 | ### REG DEFS ### | ||
41 | |||
42 | my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C"); | ||
43 | my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T"); | ||
44 | |||
45 | my @t128reg; | ||
46 | my @treg_x; | ||
47 | for (my $i = 0; $i < TEMP_REG_COUNT; $i++) { | ||
48 | my $name = "T$i\_XYZW"; | ||
49 | print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W] >;\n}; | ||
50 | $t128reg[$i] = $name; | ||
51 | $treg_x[$i] = "T$i\_X"; | ||
52 | } | ||
53 | |||
54 | my $treg_string = join(",", @treg_list); | ||
55 | my $creg_list = join(",", @creg_list); | ||
56 | my $t128_string = join(",", @t128reg); | ||
57 | my $treg_x_string = join(",", @treg_x); | ||
58 | print <<STRING; | ||
59 | |||
60 | class RegSet <dag s> { | ||
61 | dag set = s; | ||
62 | } | ||
63 | |||
64 | def ZERO : R600Reg<"0.0">; | ||
65 | def HALF : R600Reg<"0.5">; | ||
66 | def ONE : R600Reg<"1.0">; | ||
67 | def ONE_INT : R600Reg<"1">; | ||
68 | def NEG_HALF : R600Reg<"-0.5">; | ||
69 | def NEG_ONE : R600Reg<"-1.0">; | ||
70 | def PV_X : R600Reg<"pv.x">; | ||
71 | def ALU_LITERAL_X : R600Reg<"literal.x">; | ||
72 | def PREDICATE_BIT : R600Reg<"PredicateBit">; | ||
73 | def PRED_SEL_OFF: R600Reg<"Pred_sel_off">; | ||
74 | def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero">; | ||
75 | def PRED_SEL_ONE : R600Reg<"Pred_sel_one">; | ||
76 | |||
77 | def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add | ||
78 | $creg_list)>; | ||
79 | |||
80 | def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add | ||
81 | $treg_string)>; | ||
82 | |||
83 | def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add | ||
84 | $treg_x_string)>; | ||
85 | |||
86 | def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add | ||
87 | R600_TReg32, | ||
88 | R600_CReg32, | ||
89 | ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; | ||
90 | |||
91 | def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add | ||
92 | PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>; | ||
93 | |||
94 | def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add | ||
95 | PREDICATE_BIT)>; | ||
96 | |||
97 | def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add | ||
98 | $t128_string)> | ||
99 | { | ||
100 | let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)]; | ||
101 | let CopyCost = -1; | ||
102 | } | ||
103 | |||
104 | STRING | ||
105 | |||
106 | my %index_map; | ||
107 | my %chan_map; | ||
108 | |||
109 | for ($i = 0; $i <= $#creg_list; $i++) { | ||
110 | push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]); | ||
111 | push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]); | ||
112 | } | ||
113 | |||
114 | for ($i = 0; $i <= $#treg_list; $i++) { | ||
115 | push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]); | ||
116 | push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]); | ||
117 | } | ||
118 | |||
119 | for ($i = 0; $i <= $#t128reg; $i++) { | ||
120 | push(@{$index_map{$i}}, $t128reg[$i]); | ||
121 | push(@{$chan_map{'X'}}, $t128reg[$i]); | ||
122 | } | ||
123 | |||
124 | open(OUTFILE, ">", "R600HwRegInfo.include"); | ||
125 | |||
126 | print OUTFILE <<STRING; | ||
127 | |||
128 | unsigned R600RegisterInfo::getHWRegIndexGen(unsigned reg) const | ||
129 | { | ||
130 | switch(reg) { | ||
131 | default: assert(!"Unknown register"); return 0; | ||
132 | STRING | ||
133 | foreach my $key (keys(%index_map)) { | ||
134 | foreach my $reg (@{$index_map{$key}}) { | ||
135 | print OUTFILE " case AMDGPU::$reg:\n"; | ||
136 | } | ||
137 | print OUTFILE " return $key;\n\n"; | ||
138 | } | ||
139 | |||
140 | print OUTFILE " }\n}\n\n"; | ||
141 | |||
142 | print OUTFILE <<STRING; | ||
143 | |||
144 | unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const | ||
145 | { | ||
146 | switch(reg) { | ||
147 | default: assert(!"Unknown register"); return 0; | ||
148 | STRING | ||
149 | |||
150 | foreach my $key (keys(%chan_map)) { | ||
151 | foreach my $reg (@{$chan_map{$key}}) { | ||
152 | print OUTFILE " case AMDGPU::$reg:\n"; | ||
153 | } | ||
154 | my $val; | ||
155 | if ($key eq 'X') { | ||
156 | $val = 0; | ||
157 | } elsif ($key eq 'Y') { | ||
158 | $val = 1; | ||
159 | } elsif ($key eq 'Z') { | ||
160 | $val = 2; | ||
161 | } elsif ($key eq 'W') { | ||
162 | $val = 3; | ||
163 | } else { | ||
164 | die("Unknown chan value; $key"); | ||
165 | } | ||
166 | print OUTFILE " return $val;\n\n"; | ||
167 | } | ||
168 | |||
169 | print OUTFILE " }\n}\n\n"; | ||
170 | |||
171 | sub print_reg_defs { | ||
172 | my ($count, $prefix) = @_; | ||
173 | |||
174 | my @reg_list; | ||
175 | |||
176 | for ($i = 0; $i < $count; $i++) { | ||
177 | my $hw_index = get_hw_index($i); | ||
178 | my $chan= get_chan_str($i); | ||
179 | my $name = "$prefix$hw_index\_$chan"; | ||
180 | print qq{def $name : R600Reg <"$prefix$hw_index.$chan">;\n}; | ||
181 | $reg_list[$i] = $name; | ||
182 | } | ||
183 | return @reg_list; | ||
184 | } | ||
185 | |||
186 | #Helper functions | ||
187 | sub get_hw_index { | ||
188 | my ($index) = @_; | ||
189 | return int($index / 4); | ||
190 | } | ||
191 | |||
192 | sub get_chan_str { | ||
193 | my ($index) = @_; | ||
194 | my $chan = $index % 4; | ||
195 | if ($chan == 0 ) { | ||
196 | return 'X'; | ||
197 | } elsif ($chan == 1) { | ||
198 | return 'Y'; | ||
199 | } elsif ($chan == 2) { | ||
200 | return 'Z'; | ||
201 | } elsif ($chan == 3) { | ||
202 | return 'W'; | ||
203 | } else { | ||
204 | die("Unknown chan value: $chan"); | ||
205 | } | ||
206 | } | ||
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp deleted file mode 100644 index 5dd2f5334c5..00000000000 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ /dev/null | |||
@@ -1,740 +0,0 @@ | |||
1 | //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file | ||
11 | // is mostly EmitInstrWithCustomInserter(). | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #include "R600ISelLowering.h" | ||
16 | #include "R600Defines.h" | ||
17 | #include "R600InstrInfo.h" | ||
18 | #include "R600MachineFunctionInfo.h" | ||
19 | #include "llvm/Argument.h" | ||
20 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
21 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
22 | #include "llvm/CodeGen/SelectionDAG.h" | ||
23 | |||
24 | using namespace llvm; | ||
25 | |||
26 | R600TargetLowering::R600TargetLowering(TargetMachine &TM) : | ||
27 | AMDGPUTargetLowering(TM), | ||
28 | TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) | ||
29 | { | ||
30 | setOperationAction(ISD::MUL, MVT::i64, Expand); | ||
31 | addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); | ||
32 | addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); | ||
33 | addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); | ||
34 | addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); | ||
35 | computeRegisterProperties(); | ||
36 | |||
37 | setOperationAction(ISD::FADD, MVT::v4f32, Expand); | ||
38 | setOperationAction(ISD::FMUL, MVT::v4f32, Expand); | ||
39 | |||
40 | setOperationAction(ISD::BR_CC, MVT::i32, Custom); | ||
41 | setOperationAction(ISD::BR_CC, MVT::f32, Custom); | ||
42 | |||
43 | setOperationAction(ISD::FSUB, MVT::f32, Expand); | ||
44 | |||
45 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); | ||
46 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); | ||
47 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); | ||
48 | |||
49 | setOperationAction(ISD::ROTL, MVT::i32, Custom); | ||
50 | |||
51 | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); | ||
52 | setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); | ||
53 | |||
54 | setOperationAction(ISD::SETCC, MVT::i32, Custom); | ||
55 | setOperationAction(ISD::SETCC, MVT::f32, Custom); | ||
56 | setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); | ||
57 | |||
58 | setTargetDAGCombine(ISD::FP_ROUND); | ||
59 | |||
60 | setSchedulingPreference(Sched::VLIW); | ||
61 | } | ||
62 | |||
63 | MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( | ||
64 | MachineInstr * MI, MachineBasicBlock * BB) const | ||
65 | { | ||
66 | MachineFunction * MF = BB->getParent(); | ||
67 | MachineRegisterInfo &MRI = MF->getRegInfo(); | ||
68 | MachineBasicBlock::iterator I = *MI; | ||
69 | |||
70 | switch (MI->getOpcode()) { | ||
71 | default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); | ||
72 | case AMDGPU::SHADER_TYPE: break; | ||
73 | case AMDGPU::CLAMP_R600: | ||
74 | { | ||
75 | MachineInstr *NewMI = | ||
76 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) | ||
77 | .addOperand(MI->getOperand(0)) | ||
78 | .addOperand(MI->getOperand(1)) | ||
79 | .addImm(0) // Flags | ||
80 | .addReg(AMDGPU::PRED_SEL_OFF); | ||
81 | TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); | ||
82 | break; | ||
83 | } | ||
84 | case AMDGPU::FABS_R600: | ||
85 | { | ||
86 | MachineInstr *NewMI = | ||
87 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) | ||
88 | .addOperand(MI->getOperand(0)) | ||
89 | .addOperand(MI->getOperand(1)) | ||
90 | .addImm(0) // Flags | ||
91 | .addReg(AMDGPU::PRED_SEL_OFF); | ||
92 | TII->addFlag(NewMI, 1, MO_FLAG_ABS); | ||
93 | break; | ||
94 | } | ||
95 | |||
96 | case AMDGPU::FNEG_R600: | ||
97 | { | ||
98 | MachineInstr *NewMI = | ||
99 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) | ||
100 | .addOperand(MI->getOperand(0)) | ||
101 | .addOperand(MI->getOperand(1)) | ||
102 | .addImm(0) // Flags | ||
103 | .addReg(AMDGPU::PRED_SEL_OFF); | ||
104 | TII->addFlag(NewMI, 1, MO_FLAG_NEG); | ||
105 | break; | ||
106 | } | ||
107 | |||
108 | case AMDGPU::R600_LOAD_CONST: | ||
109 | { | ||
110 | int64_t RegIndex = MI->getOperand(1).getImm(); | ||
111 | unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); | ||
112 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) | ||
113 | .addOperand(MI->getOperand(0)) | ||
114 | .addReg(ConstantReg); | ||
115 | break; | ||
116 | } | ||
117 | |||
118 | case AMDGPU::MASK_WRITE: | ||
119 | { | ||
120 | unsigned maskedRegister = MI->getOperand(0).getReg(); | ||
121 | assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); | ||
122 | MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); | ||
123 | TII->addFlag(defInstr, 0, MO_FLAG_MASK); | ||
124 | // Return early so the instruction is not erased | ||
125 | return BB; | ||
126 | } | ||
127 | |||
128 | case AMDGPU::RAT_WRITE_CACHELESS_32_eg: | ||
129 | case AMDGPU::RAT_WRITE_CACHELESS_128_eg: | ||
130 | { | ||
131 | // Convert to DWORD address | ||
132 | unsigned NewAddr = MRI.createVirtualRegister( | ||
133 | &AMDGPU::R600_TReg32_XRegClass); | ||
134 | unsigned ShiftValue = MRI.createVirtualRegister( | ||
135 | &AMDGPU::R600_TReg32RegClass); | ||
136 | unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; | ||
137 | |||
138 | // XXX In theory, we should be able to pass ShiftValue directly to | ||
139 | // the LSHR_eg instruction as an inline literal, but I tried doing it | ||
140 | // this way and it didn't produce the correct results. | ||
141 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32), | ||
142 | ShiftValue) | ||
143 | .addReg(AMDGPU::ALU_LITERAL_X) | ||
144 | .addReg(AMDGPU::PRED_SEL_OFF) | ||
145 | .addImm(2); | ||
146 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) | ||
147 | .addOperand(MI->getOperand(1)) | ||
148 | .addReg(ShiftValue) | ||
149 | .addReg(AMDGPU::PRED_SEL_OFF); | ||
150 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) | ||
151 | .addOperand(MI->getOperand(0)) | ||
152 | .addReg(NewAddr) | ||
153 | .addImm(EOP); // Set End of program bit | ||
154 | break; | ||
155 | } | ||
156 | |||
157 | case AMDGPU::RESERVE_REG: | ||
158 | { | ||
159 | R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); | ||
160 | int64_t ReservedIndex = MI->getOperand(0).getImm(); | ||
161 | unsigned ReservedReg = | ||
162 | AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); | ||
163 | MFI->ReservedRegs.push_back(ReservedReg); | ||
164 | break; | ||
165 | } | ||
166 | |||
167 | case AMDGPU::TXD: | ||
168 | { | ||
169 | unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); | ||
170 | unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); | ||
171 | |||
172 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) | ||
173 | .addOperand(MI->getOperand(3)) | ||
174 | .addOperand(MI->getOperand(4)) | ||
175 | .addOperand(MI->getOperand(5)); | ||
176 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) | ||
177 | .addOperand(MI->getOperand(2)) | ||
178 | .addOperand(MI->getOperand(4)) | ||
179 | .addOperand(MI->getOperand(5)); | ||
180 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) | ||
181 | .addOperand(MI->getOperand(0)) | ||
182 | .addOperand(MI->getOperand(1)) | ||
183 | .addOperand(MI->getOperand(4)) | ||
184 | .addOperand(MI->getOperand(5)) | ||
185 | .addReg(t0, RegState::Implicit) | ||
186 | .addReg(t1, RegState::Implicit); | ||
187 | break; | ||
188 | } | ||
189 | case AMDGPU::TXD_SHADOW: | ||
190 | { | ||
191 | unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); | ||
192 | unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); | ||
193 | |||
194 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) | ||
195 | .addOperand(MI->getOperand(3)) | ||
196 | .addOperand(MI->getOperand(4)) | ||
197 | .addOperand(MI->getOperand(5)); | ||
198 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) | ||
199 | .addOperand(MI->getOperand(2)) | ||
200 | .addOperand(MI->getOperand(4)) | ||
201 | .addOperand(MI->getOperand(5)); | ||
202 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) | ||
203 | .addOperand(MI->getOperand(0)) | ||
204 | .addOperand(MI->getOperand(1)) | ||
205 | .addOperand(MI->getOperand(4)) | ||
206 | .addOperand(MI->getOperand(5)) | ||
207 | .addReg(t0, RegState::Implicit) | ||
208 | .addReg(t1, RegState::Implicit); | ||
209 | break; | ||
210 | } | ||
211 | case AMDGPU::BRANCH: | ||
212 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) | ||
213 | .addOperand(MI->getOperand(0)) | ||
214 | .addReg(0); | ||
215 | break; | ||
216 | case AMDGPU::BRANCH_COND_f32: | ||
217 | { | ||
218 | MachineInstr *NewMI = | ||
219 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) | ||
220 | .addReg(AMDGPU::PREDICATE_BIT) | ||
221 | .addOperand(MI->getOperand(1)) | ||
222 | .addImm(OPCODE_IS_NOT_ZERO) | ||
223 | .addImm(0); // Flags | ||
224 | TII->addFlag(NewMI, 1, MO_FLAG_PUSH); | ||
225 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) | ||
226 | .addOperand(MI->getOperand(0)) | ||
227 | .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); | ||
228 | break; | ||
229 | } | ||
230 | case AMDGPU::BRANCH_COND_i32: | ||
231 | { | ||
232 | MachineInstr *NewMI = | ||
233 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) | ||
234 | .addReg(AMDGPU::PREDICATE_BIT) | ||
235 | .addOperand(MI->getOperand(1)) | ||
236 | .addImm(OPCODE_IS_NOT_ZERO_INT) | ||
237 | .addImm(0); // Flags | ||
238 | TII->addFlag(NewMI, 1, MO_FLAG_PUSH); | ||
239 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) | ||
240 | .addOperand(MI->getOperand(0)) | ||
241 | .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); | ||
242 | break; | ||
243 | } | ||
244 | case AMDGPU::input_perspective: | ||
245 | { | ||
246 | R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); | ||
247 | |||
248 | // XXX Be more fine about register reservation | ||
249 | for (unsigned i = 0; i < 4; i ++) { | ||
250 | unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i); | ||
251 | MFI->ReservedRegs.push_back(ReservedReg); | ||
252 | } | ||
253 | |||
254 | switch (MI->getOperand(1).getImm()) { | ||
255 | case 0:// Perspective | ||
256 | MFI->HasPerspectiveInterpolation = true; | ||
257 | break; | ||
258 | case 1:// Linear | ||
259 | MFI->HasLinearInterpolation = true; | ||
260 | break; | ||
261 | default: | ||
262 | assert(0 && "Unknow ij index"); | ||
263 | } | ||
264 | |||
265 | return BB; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | MI->eraseFromParent(); | ||
270 | return BB; | ||
271 | } | ||
272 | |||
273 | //===----------------------------------------------------------------------===// | ||
274 | // Custom DAG Lowering Operations | ||
275 | //===----------------------------------------------------------------------===// | ||
276 | |||
277 | using namespace llvm::Intrinsic; | ||
278 | using namespace llvm::AMDGPUIntrinsic; | ||
279 | |||
280 | SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const | ||
281 | { | ||
282 | switch (Op.getOpcode()) { | ||
283 | default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); | ||
284 | case ISD::BR_CC: return LowerBR_CC(Op, DAG); | ||
285 | case ISD::ROTL: return LowerROTL(Op, DAG); | ||
286 | case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); | ||
287 | case ISD::SETCC: return LowerSETCC(Op, DAG); | ||
288 | case ISD::INTRINSIC_VOID: { | ||
289 | SDValue Chain = Op.getOperand(0); | ||
290 | unsigned IntrinsicID = | ||
291 | cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); | ||
292 | switch (IntrinsicID) { | ||
293 | case AMDGPUIntrinsic::AMDGPU_store_output: { | ||
294 | MachineFunction &MF = DAG.getMachineFunction(); | ||
295 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||
296 | int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); | ||
297 | unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); | ||
298 | if (!MRI.isLiveOut(Reg)) { | ||
299 | MRI.addLiveOut(Reg); | ||
300 | } | ||
301 | return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); | ||
302 | } | ||
303 | // default for switch(IntrinsicID) | ||
304 | default: break; | ||
305 | } | ||
306 | // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) | ||
307 | break; | ||
308 | } | ||
309 | case ISD::INTRINSIC_WO_CHAIN: { | ||
310 | unsigned IntrinsicID = | ||
311 | cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); | ||
312 | EVT VT = Op.getValueType(); | ||
313 | DebugLoc DL = Op.getDebugLoc(); | ||
314 | switch(IntrinsicID) { | ||
315 | default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); | ||
316 | case AMDGPUIntrinsic::R600_load_input: { | ||
317 | int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); | ||
318 | unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); | ||
319 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); | ||
320 | } | ||
321 | case AMDGPUIntrinsic::R600_load_input_perspective: { | ||
322 | unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); | ||
323 | SDValue FullVector = DAG.getNode( | ||
324 | AMDGPUISD::INTERP, | ||
325 | DL, MVT::v4f32, | ||
326 | DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); | ||
327 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, | ||
328 | DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); | ||
329 | } | ||
330 | case AMDGPUIntrinsic::R600_load_input_linear: { | ||
331 | unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); | ||
332 | SDValue FullVector = DAG.getNode( | ||
333 | AMDGPUISD::INTERP, | ||
334 | DL, MVT::v4f32, | ||
335 | DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); | ||
336 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, | ||
337 | DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); | ||
338 | } | ||
339 | case AMDGPUIntrinsic::R600_load_input_constant: { | ||
340 | unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); | ||
341 | SDValue FullVector = DAG.getNode( | ||
342 | AMDGPUISD::INTERP_P0, | ||
343 | DL, MVT::v4f32, | ||
344 | DAG.getConstant(slot / 4 , MVT::i32)); | ||
345 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, | ||
346 | DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); | ||
347 | } | ||
348 | case AMDGPUIntrinsic::R600_load_input_position: { | ||
349 | unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); | ||
350 | unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot); | ||
351 | SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | ||
352 | RegIndex, MVT::f32); | ||
353 | if ((slot % 4) == 3) { | ||
354 | return DAG.getNode(ISD::FDIV, | ||
355 | DL, VT, | ||
356 | DAG.getConstantFP(1.0f, MVT::f32), | ||
357 | Reg); | ||
358 | } else { | ||
359 | return Reg; | ||
360 | } | ||
361 | } | ||
362 | |||
363 | case r600_read_ngroups_x: | ||
364 | return LowerImplicitParameter(DAG, VT, DL, 0); | ||
365 | case r600_read_ngroups_y: | ||
366 | return LowerImplicitParameter(DAG, VT, DL, 1); | ||
367 | case r600_read_ngroups_z: | ||
368 | return LowerImplicitParameter(DAG, VT, DL, 2); | ||
369 | case r600_read_global_size_x: | ||
370 | return LowerImplicitParameter(DAG, VT, DL, 3); | ||
371 | case r600_read_global_size_y: | ||
372 | return LowerImplicitParameter(DAG, VT, DL, 4); | ||
373 | case r600_read_global_size_z: | ||
374 | return LowerImplicitParameter(DAG, VT, DL, 5); | ||
375 | case r600_read_local_size_x: | ||
376 | return LowerImplicitParameter(DAG, VT, DL, 6); | ||
377 | case r600_read_local_size_y: | ||
378 | return LowerImplicitParameter(DAG, VT, DL, 7); | ||
379 | case r600_read_local_size_z: | ||
380 | return LowerImplicitParameter(DAG, VT, DL, 8); | ||
381 | |||
382 | case r600_read_tgid_x: | ||
383 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | ||
384 | AMDGPU::T1_X, VT); | ||
385 | case r600_read_tgid_y: | ||
386 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | ||
387 | AMDGPU::T1_Y, VT); | ||
388 | case r600_read_tgid_z: | ||
389 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | ||
390 | AMDGPU::T1_Z, VT); | ||
391 | case r600_read_tidig_x: | ||
392 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | ||
393 | AMDGPU::T0_X, VT); | ||
394 | case r600_read_tidig_y: | ||
395 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | ||
396 | AMDGPU::T0_Y, VT); | ||
397 | case r600_read_tidig_z: | ||
398 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | ||
399 | AMDGPU::T0_Z, VT); | ||
400 | } | ||
401 | // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) | ||
402 | break; | ||
403 | } | ||
404 | } // end switch(Op.getOpcode()) | ||
405 | return SDValue(); | ||
406 | } | ||
407 | |||
408 | void R600TargetLowering::ReplaceNodeResults(SDNode *N, | ||
409 | SmallVectorImpl<SDValue> &Results, | ||
410 | SelectionDAG &DAG) const | ||
411 | { | ||
412 | switch (N->getOpcode()) { | ||
413 | default: return; | ||
414 | case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); | ||
415 | case ISD::INTRINSIC_WO_CHAIN: | ||
416 | { | ||
417 | unsigned IntrinsicID = | ||
418 | cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); | ||
419 | if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) { | ||
420 | Results.push_back(LowerInputFace(N, DAG)); | ||
421 | } else { | ||
422 | return; | ||
423 | } | ||
424 | } | ||
425 | } | ||
426 | } | ||
427 | |||
428 | SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const | ||
429 | { | ||
430 | unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); | ||
431 | unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot); | ||
432 | SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | ||
433 | RegIndex, MVT::f32); | ||
434 | return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1, | ||
435 | Reg, DAG.getConstantFP(0.0f, MVT::f32), | ||
436 | DAG.getCondCode(ISD::SETUGT)); | ||
437 | } | ||
438 | |||
439 | SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const | ||
440 | { | ||
441 | return DAG.getNode( | ||
442 | ISD::SETCC, | ||
443 | Op.getDebugLoc(), | ||
444 | MVT::i1, | ||
445 | Op, DAG.getConstantFP(0.0f, MVT::f32), | ||
446 | DAG.getCondCode(ISD::SETNE) | ||
447 | ); | ||
448 | } | ||
449 | |||
450 | SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const | ||
451 | { | ||
452 | SDValue Chain = Op.getOperand(0); | ||
453 | SDValue CC = Op.getOperand(1); | ||
454 | SDValue LHS = Op.getOperand(2); | ||
455 | SDValue RHS = Op.getOperand(3); | ||
456 | SDValue JumpT = Op.getOperand(4); | ||
457 | SDValue CmpValue; | ||
458 | SDValue Result; | ||
459 | |||
460 | if (LHS.getValueType() == MVT::i32) { | ||
461 | CmpValue = DAG.getNode( | ||
462 | ISD::SELECT_CC, | ||
463 | Op.getDebugLoc(), | ||
464 | MVT::i32, | ||
465 | LHS, RHS, | ||
466 | DAG.getConstant(-1, MVT::i32), | ||
467 | DAG.getConstant(0, MVT::i32), | ||
468 | CC); | ||
469 | } else if (LHS.getValueType() == MVT::f32) { | ||
470 | CmpValue = DAG.getNode( | ||
471 | ISD::SELECT_CC, | ||
472 | Op.getDebugLoc(), | ||
473 | MVT::f32, | ||
474 | LHS, RHS, | ||
475 | DAG.getConstantFP(1.0f, MVT::f32), | ||
476 | DAG.getConstantFP(0.0f, MVT::f32), | ||
477 | CC); | ||
478 | } else { | ||
479 | assert(0 && "Not valid type for br_cc"); | ||
480 | } | ||
481 | Result = DAG.getNode( | ||
482 | AMDGPUISD::BRANCH_COND, | ||
483 | CmpValue.getDebugLoc(), | ||
484 | MVT::Other, Chain, | ||
485 | JumpT, CmpValue); | ||
486 | return Result; | ||
487 | } | ||
488 | |||
489 | SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, | ||
490 | DebugLoc DL, | ||
491 | unsigned DwordOffset) const | ||
492 | { | ||
493 | unsigned ByteOffset = DwordOffset * 4; | ||
494 | PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), | ||
495 | AMDGPUAS::PARAM_I_ADDRESS); | ||
496 | |||
497 | // We shouldn't be using an offset wider than 16-bits for implicit parameters. | ||
498 | assert(isInt<16>(ByteOffset)); | ||
499 | |||
500 | return DAG.getLoad(VT, DL, DAG.getEntryNode(), | ||
501 | DAG.getConstant(ByteOffset, MVT::i32), // PTR | ||
502 | MachinePointerInfo(ConstantPointerNull::get(PtrType)), | ||
503 | false, false, false, 0); | ||
504 | } | ||
505 | |||
506 | SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const | ||
507 | { | ||
508 | DebugLoc DL = Op.getDebugLoc(); | ||
509 | EVT VT = Op.getValueType(); | ||
510 | |||
511 | return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, | ||
512 | Op.getOperand(0), | ||
513 | Op.getOperand(0), | ||
514 | DAG.getNode(ISD::SUB, DL, VT, | ||
515 | DAG.getConstant(32, MVT::i32), | ||
516 | Op.getOperand(1))); | ||
517 | } | ||
518 | |||
519 | bool R600TargetLowering::isZero(SDValue Op) const | ||
520 | { | ||
521 | if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { | ||
522 | return Cst->isNullValue(); | ||
523 | } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ | ||
524 | return CstFP->isZero(); | ||
525 | } else { | ||
526 | return false; | ||
527 | } | ||
528 | } | ||
529 | |||
530 | SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const | ||
531 | { | ||
532 | DebugLoc DL = Op.getDebugLoc(); | ||
533 | EVT VT = Op.getValueType(); | ||
534 | |||
535 | SDValue LHS = Op.getOperand(0); | ||
536 | SDValue RHS = Op.getOperand(1); | ||
537 | SDValue True = Op.getOperand(2); | ||
538 | SDValue False = Op.getOperand(3); | ||
539 | SDValue CC = Op.getOperand(4); | ||
540 | SDValue Temp; | ||
541 | |||
542 | // LHS and RHS are guaranteed to be the same value type | ||
543 | EVT CompareVT = LHS.getValueType(); | ||
544 | |||
545 | // We need all the operands of SELECT_CC to have the same value type, so if | ||
546 | // necessary we need to convert LHS and RHS to be the same type True and | ||
547 | // False. True and False are guaranteed to have the same type as this | ||
548 | // SELECT_CC node. | ||
549 | |||
550 | if (isHWTrueValue(True) && isHWFalseValue(False)) { | ||
551 | if (CompareVT != VT) { | ||
552 | if (VT == MVT::f32 && CompareVT == MVT::i32) { | ||
553 | SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, | ||
554 | LHS, RHS, | ||
555 | DAG.getConstant(-1, MVT::i32), | ||
556 | DAG.getConstant(0, MVT::i32), | ||
557 | CC); | ||
558 | return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Boolean); | ||
559 | } else if (VT == MVT::i32 && CompareVT == MVT::f32) { | ||
560 | SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, | ||
561 | LHS, RHS, | ||
562 | DAG.getConstantFP(1.0f, MVT::f32), | ||
563 | DAG.getConstantFP(0.0f, MVT::f32), | ||
564 | CC); | ||
565 | return DAG.getNode(ISD::FP_TO_UINT, DL, VT, BoolAsFlt); | ||
566 | } else { | ||
567 | // I don't think there will be any other type pairings. | ||
568 | assert(!"Unhandled operand type parings in SELECT_CC"); | ||
569 | } | ||
570 | } else { | ||
571 | return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); | ||
572 | } | ||
573 | } | ||
574 | |||
575 | |||
576 | // XXX If True is a hardware TRUE value and False is a hardware FALSE value, | ||
577 | // we can handle this with a native instruction, but we need to swap true | ||
578 | // and false and change the conditional. | ||
579 | if (isHWTrueValue(False) && isHWFalseValue(True)) { | ||
580 | } | ||
581 | |||
582 | // Check if we can lower this to a native operation. | ||
583 | // CND* instructions requires all operands to have the same type, | ||
584 | // and RHS to be zero. | ||
585 | |||
586 | if (isZero(LHS) || isZero(RHS)) { | ||
587 | SDValue Cond = (isZero(LHS) ? RHS : LHS); | ||
588 | SDValue Zero = (isZero(LHS) ? LHS : RHS); | ||
589 | ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); | ||
590 | if (CompareVT != VT) { | ||
591 | True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); | ||
592 | False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); | ||
593 | } | ||
594 | if (isZero(LHS)) { | ||
595 | CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); | ||
596 | } | ||
597 | |||
598 | switch (CCOpcode) { | ||
599 | case ISD::SETONE: | ||
600 | case ISD::SETUNE: | ||
601 | case ISD::SETNE: | ||
602 | case ISD::SETULE: | ||
603 | case ISD::SETULT: | ||
604 | case ISD::SETOLE: | ||
605 | case ISD::SETOLT: | ||
606 | case ISD::SETLE: | ||
607 | case ISD::SETLT: | ||
608 | CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); | ||
609 | Temp = True; | ||
610 | True = False; | ||
611 | False = Temp; | ||
612 | break; | ||
613 | default: | ||
614 | break; | ||
615 | } | ||
616 | SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, | ||
617 | Cond, Zero, | ||
618 | True, False, | ||
619 | DAG.getCondCode(CCOpcode)); | ||
620 | return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); | ||
621 | } | ||
622 | |||
623 | |||
624 | // If we make it this for it means we have no native instructions to handle | ||
625 | // this SELECT_CC, so we must lower it. | ||
626 | SDValue HWTrue, HWFalse; | ||
627 | |||
628 | if (CompareVT == MVT::f32) { | ||
629 | HWTrue = DAG.getConstantFP(1.0f, CompareVT); | ||
630 | HWFalse = DAG.getConstantFP(0.0f, CompareVT); | ||
631 | } else if (CompareVT == MVT::i32) { | ||
632 | HWTrue = DAG.getConstant(-1, CompareVT); | ||
633 | HWFalse = DAG.getConstant(0, CompareVT); | ||
634 | } | ||
635 | else { | ||
636 | assert(!"Unhandled value type in LowerSELECT_CC"); | ||
637 | } | ||
638 | |||
639 | // Lower this unsupported SELECT_CC into a combination of two supported | ||
640 | // SELECT_CC operations. | ||
641 | SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); | ||
642 | |||
643 | return DAG.getNode(ISD::SELECT_CC, DL, VT, | ||
644 | Cond, HWFalse, | ||
645 | True, False, | ||
646 | DAG.getCondCode(ISD::SETNE)); | ||
647 | } | ||
648 | |||
649 | SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const | ||
650 | { | ||
651 | SDValue Cond; | ||
652 | SDValue LHS = Op.getOperand(0); | ||
653 | SDValue RHS = Op.getOperand(1); | ||
654 | SDValue CC = Op.getOperand(2); | ||
655 | DebugLoc DL = Op.getDebugLoc(); | ||
656 | assert(Op.getValueType() == MVT::i32); | ||
657 | if (LHS.getValueType() == MVT::i32) { | ||
658 | Cond = DAG.getNode( | ||
659 | ISD::SELECT_CC, | ||
660 | Op.getDebugLoc(), | ||
661 | MVT::i32, | ||
662 | LHS, RHS, | ||
663 | DAG.getConstant(-1, MVT::i32), | ||
664 | DAG.getConstant(0, MVT::i32), | ||
665 | CC); | ||
666 | } else if (LHS.getValueType() == MVT::f32) { | ||
667 | Cond = DAG.getNode( | ||
668 | ISD::SELECT_CC, | ||
669 | Op.getDebugLoc(), | ||
670 | MVT::f32, | ||
671 | LHS, RHS, | ||
672 | DAG.getConstantFP(1.0f, MVT::f32), | ||
673 | DAG.getConstantFP(0.0f, MVT::f32), | ||
674 | CC); | ||
675 | Cond = DAG.getNode( | ||
676 | ISD::FP_TO_SINT, | ||
677 | DL, | ||
678 | MVT::i32, | ||
679 | Cond); | ||
680 | } else { | ||
681 | assert(0 && "Not valid type for set_cc"); | ||
682 | } | ||
683 | Cond = DAG.getNode( | ||
684 | ISD::AND, | ||
685 | DL, | ||
686 | MVT::i32, | ||
687 | DAG.getConstant(1, MVT::i32), | ||
688 | Cond); | ||
689 | return Cond; | ||
690 | } | ||
691 | |||
692 | // XXX Only kernel functions are supporte, so we can assume for now that | ||
693 | // every function is a kernel function, but in the future we should use | ||
694 | // separate calling conventions for kernel and non-kernel functions. | ||
695 | // Only kernel functions are supported, so we can assume for now | ||
696 | SDValue R600TargetLowering::LowerFormalArguments( | ||
697 | SDValue Chain, | ||
698 | CallingConv::ID CallConv, | ||
699 | bool isVarArg, | ||
700 | const SmallVectorImpl<ISD::InputArg> &Ins, | ||
701 | DebugLoc DL, SelectionDAG &DAG, | ||
702 | SmallVectorImpl<SDValue> &InVals) const | ||
703 | { | ||
704 | unsigned ParamOffsetBytes = 36; | ||
705 | for (unsigned i = 0, e = Ins.size(); i < e; ++i) { | ||
706 | EVT VT = Ins[i].VT; | ||
707 | PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), | ||
708 | AMDGPUAS::PARAM_I_ADDRESS); | ||
709 | SDValue Arg = DAG.getLoad(VT, DL, DAG.getRoot(), | ||
710 | DAG.getConstant(ParamOffsetBytes, MVT::i32), | ||
711 | MachinePointerInfo(new Argument(PtrTy)), | ||
712 | false, false, false, 4); | ||
713 | InVals.push_back(Arg); | ||
714 | ParamOffsetBytes += (VT.getStoreSize()); | ||
715 | } | ||
716 | return Chain; | ||
717 | } | ||
718 | |||
719 | //===----------------------------------------------------------------------===// | ||
720 | // Custom DAG Optimizations | ||
721 | //===----------------------------------------------------------------------===// | ||
722 | |||
723 | SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, | ||
724 | DAGCombinerInfo &DCI) const | ||
725 | { | ||
726 | SelectionDAG &DAG = DCI.DAG; | ||
727 | |||
728 | switch (N->getOpcode()) { | ||
729 | // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) | ||
730 | case ISD::FP_ROUND: { | ||
731 | SDValue Arg = N->getOperand(0); | ||
732 | if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { | ||
733 | return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0), | ||
734 | Arg.getOperand(0)); | ||
735 | } | ||
736 | break; | ||
737 | } | ||
738 | } | ||
739 | return SDValue(); | ||
740 | } | ||
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h deleted file mode 100644 index 7df2dd13787..00000000000 --- a/src/gallium/drivers/radeon/R600ISelLowering.h +++ /dev/null | |||
@@ -1,69 +0,0 @@ | |||
1 | //===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // R600 DAG Lowering interface definition | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #ifndef R600ISELLOWERING_H | ||
15 | #define R600ISELLOWERING_H | ||
16 | |||
17 | #include "AMDGPUISelLowering.h" | ||
18 | |||
19 | namespace llvm { | ||
20 | |||
21 | class R600InstrInfo; | ||
22 | |||
23 | class R600TargetLowering : public AMDGPUTargetLowering | ||
24 | { | ||
25 | public: | ||
26 | R600TargetLowering(TargetMachine &TM); | ||
27 | virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, | ||
28 | MachineBasicBlock * BB) const; | ||
29 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; | ||
30 | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; | ||
31 | void ReplaceNodeResults(SDNode * N, | ||
32 | SmallVectorImpl<SDValue> &Results, | ||
33 | SelectionDAG &DAG) const; | ||
34 | virtual SDValue LowerFormalArguments( | ||
35 | SDValue Chain, | ||
36 | CallingConv::ID CallConv, | ||
37 | bool isVarArg, | ||
38 | const SmallVectorImpl<ISD::InputArg> &Ins, | ||
39 | DebugLoc DL, SelectionDAG &DAG, | ||
40 | SmallVectorImpl<SDValue> &InVals) const; | ||
41 | private: | ||
42 | const R600InstrInfo * TII; | ||
43 | |||
44 | /// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters | ||
45 | /// that are stored in the first nine dwords of a Vertex Buffer. These | ||
46 | /// implicit parameters are lowered to load instructions which retreive the | ||
47 | /// values from the Vertex Buffer. | ||
48 | SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT, | ||
49 | DebugLoc DL, unsigned DwordOffset) const; | ||
50 | |||
51 | void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, | ||
52 | MachineRegisterInfo & MRI, unsigned dword_offset) const; | ||
53 | |||
54 | SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; | ||
55 | |||
56 | /// LowerROTL - Lower ROTL opcode to BITALIGN | ||
57 | SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; | ||
58 | |||
59 | SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; | ||
60 | SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; | ||
61 | SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const; | ||
62 | SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; | ||
63 | |||
64 | bool isZero(SDValue Op) const; | ||
65 | }; | ||
66 | |||
67 | } // End namespace llvm; | ||
68 | |||
69 | #endif // R600ISELLOWERING_H | ||
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp deleted file mode 100644 index e990dd9370b..00000000000 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ /dev/null | |||
@@ -1,512 +0,0 @@ | |||
1 | //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // R600 Implementation of TargetInstrInfo. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #include "R600InstrInfo.h" | ||
15 | #include "AMDGPUTargetMachine.h" | ||
16 | #include "AMDGPUSubtarget.h" | ||
17 | #include "R600Defines.h" | ||
18 | #include "R600RegisterInfo.h" | ||
19 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
20 | #include "AMDILUtilityFunctions.h" | ||
21 | |||
22 | #define GET_INSTRINFO_CTOR | ||
23 | #include "AMDGPUGenDFAPacketizer.inc" | ||
24 | |||
25 | using namespace llvm; | ||
26 | |||
27 | R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) | ||
28 | : AMDGPUInstrInfo(tm), | ||
29 | RI(tm, *this), | ||
30 | TM(tm) | ||
31 | { } | ||
32 | |||
33 | const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const | ||
34 | { | ||
35 | return RI; | ||
36 | } | ||
37 | |||
38 | bool R600InstrInfo::isTrig(const MachineInstr &MI) const | ||
39 | { | ||
40 | return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; | ||
41 | } | ||
42 | |||
43 | bool R600InstrInfo::isVector(const MachineInstr &MI) const | ||
44 | { | ||
45 | return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; | ||
46 | } | ||
47 | |||
48 | void | ||
49 | R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, | ||
50 | MachineBasicBlock::iterator MI, DebugLoc DL, | ||
51 | unsigned DestReg, unsigned SrcReg, | ||
52 | bool KillSrc) const | ||
53 | { | ||
54 | if (AMDGPU::R600_Reg128RegClass.contains(DestReg) | ||
55 | && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { | ||
56 | for (unsigned I = 0; I < 4; I++) { | ||
57 | unsigned SubRegIndex = RI.getSubRegFromChannel(I); | ||
58 | BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) | ||
59 | .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define) | ||
60 | .addReg(RI.getSubReg(SrcReg, SubRegIndex)) | ||
61 | .addImm(0) // Flag | ||
62 | .addReg(0) // PREDICATE_BIT | ||
63 | .addReg(DestReg, RegState::Define | RegState::Implicit); | ||
64 | } | ||
65 | } else { | ||
66 | |||
67 | /* We can't copy vec4 registers */ | ||
68 | assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) | ||
69 | && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); | ||
70 | |||
71 | BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) | ||
72 | .addReg(SrcReg, getKillRegState(KillSrc)) | ||
73 | .addImm(0) // Flag | ||
74 | .addReg(0); // PREDICATE_BIT | ||
75 | } | ||
76 | } | ||
77 | |||
78 | MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, | ||
79 | unsigned DstReg, int64_t Imm) const | ||
80 | { | ||
81 | MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); | ||
82 | MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); | ||
83 | MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); | ||
84 | MachineInstrBuilder(MI).addImm(Imm); | ||
85 | MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT | ||
86 | |||
87 | return MI; | ||
88 | } | ||
89 | |||
90 | unsigned R600InstrInfo::getIEQOpcode() const | ||
91 | { | ||
92 | return AMDGPU::SETE_INT; | ||
93 | } | ||
94 | |||
95 | bool R600InstrInfo::isMov(unsigned Opcode) const | ||
96 | { | ||
97 | |||
98 | |||
99 | switch(Opcode) { | ||
100 | default: return false; | ||
101 | case AMDGPU::MOV: | ||
102 | case AMDGPU::MOV_IMM_F32: | ||
103 | case AMDGPU::MOV_IMM_I32: | ||
104 | return true; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | // Some instructions act as place holders to emulate operations that the GPU | ||
109 | // hardware does automatically. This function can be used to check if | ||
110 | // an opcode falls into this category. | ||
111 | bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const | ||
112 | { | ||
113 | switch (Opcode) { | ||
114 | default: return false; | ||
115 | case AMDGPU::RETURN: | ||
116 | case AMDGPU::MASK_WRITE: | ||
117 | case AMDGPU::RESERVE_REG: | ||
118 | return true; | ||
119 | } | ||
120 | } | ||
121 | |||
122 | bool R600InstrInfo::isReductionOp(unsigned Opcode) const | ||
123 | { | ||
124 | switch(Opcode) { | ||
125 | default: return false; | ||
126 | case AMDGPU::DOT4_r600: | ||
127 | case AMDGPU::DOT4_eg: | ||
128 | return true; | ||
129 | } | ||
130 | } | ||
131 | |||
132 | bool R600InstrInfo::isCubeOp(unsigned Opcode) const | ||
133 | { | ||
134 | switch(Opcode) { | ||
135 | default: return false; | ||
136 | case AMDGPU::CUBE_r600_pseudo: | ||
137 | case AMDGPU::CUBE_r600_real: | ||
138 | case AMDGPU::CUBE_eg_pseudo: | ||
139 | case AMDGPU::CUBE_eg_real: | ||
140 | return true; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, | ||
145 | const ScheduleDAG *DAG) const | ||
146 | { | ||
147 | const InstrItineraryData *II = TM->getInstrItineraryData(); | ||
148 | return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); | ||
149 | } | ||
150 | |||
151 | static bool | ||
152 | isPredicateSetter(unsigned Opcode) | ||
153 | { | ||
154 | switch (Opcode) { | ||
155 | case AMDGPU::PRED_X: | ||
156 | return true; | ||
157 | default: | ||
158 | return false; | ||
159 | } | ||
160 | } | ||
161 | |||
162 | static MachineInstr * | ||
163 | findFirstPredicateSetterFrom(MachineBasicBlock &MBB, | ||
164 | MachineBasicBlock::iterator I) | ||
165 | { | ||
166 | while (I != MBB.begin()) { | ||
167 | --I; | ||
168 | MachineInstr *MI = I; | ||
169 | if (isPredicateSetter(MI->getOpcode())) | ||
170 | return MI; | ||
171 | } | ||
172 | |||
173 | return NULL; | ||
174 | } | ||
175 | |||
176 | bool | ||
177 | R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, | ||
178 | MachineBasicBlock *&TBB, | ||
179 | MachineBasicBlock *&FBB, | ||
180 | SmallVectorImpl<MachineOperand> &Cond, | ||
181 | bool AllowModify) const | ||
182 | { | ||
183 | // Most of the following comes from the ARM implementation of AnalyzeBranch | ||
184 | |||
185 | // If the block has no terminators, it just falls into the block after it. | ||
186 | MachineBasicBlock::iterator I = MBB.end(); | ||
187 | if (I == MBB.begin()) | ||
188 | return false; | ||
189 | --I; | ||
190 | while (I->isDebugValue()) { | ||
191 | if (I == MBB.begin()) | ||
192 | return false; | ||
193 | --I; | ||
194 | } | ||
195 | if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { | ||
196 | return false; | ||
197 | } | ||
198 | |||
199 | // Get the last instruction in the block. | ||
200 | MachineInstr *LastInst = I; | ||
201 | |||
202 | // If there is only one terminator instruction, process it. | ||
203 | unsigned LastOpc = LastInst->getOpcode(); | ||
204 | if (I == MBB.begin() || | ||
205 | static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { | ||
206 | if (LastOpc == AMDGPU::JUMP) { | ||
207 | if(!isPredicated(LastInst)) { | ||
208 | TBB = LastInst->getOperand(0).getMBB(); | ||
209 | return false; | ||
210 | } else { | ||
211 | MachineInstr *predSet = I; | ||
212 | while (!isPredicateSetter(predSet->getOpcode())) { | ||
213 | predSet = --I; | ||
214 | } | ||
215 | TBB = LastInst->getOperand(0).getMBB(); | ||
216 | Cond.push_back(predSet->getOperand(1)); | ||
217 | Cond.push_back(predSet->getOperand(2)); | ||
218 | Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); | ||
219 | return false; | ||
220 | } | ||
221 | } | ||
222 | return true; // Can't handle indirect branch. | ||
223 | } | ||
224 | |||
225 | // Get the instruction before it if it is a terminator. | ||
226 | MachineInstr *SecondLastInst = I; | ||
227 | unsigned SecondLastOpc = SecondLastInst->getOpcode(); | ||
228 | |||
229 | // If the block ends with a B and a Bcc, handle it. | ||
230 | if (SecondLastOpc == AMDGPU::JUMP && | ||
231 | isPredicated(SecondLastInst) && | ||
232 | LastOpc == AMDGPU::JUMP && | ||
233 | !isPredicated(LastInst)) { | ||
234 | MachineInstr *predSet = --I; | ||
235 | while (!isPredicateSetter(predSet->getOpcode())) { | ||
236 | predSet = --I; | ||
237 | } | ||
238 | TBB = SecondLastInst->getOperand(0).getMBB(); | ||
239 | FBB = LastInst->getOperand(0).getMBB(); | ||
240 | Cond.push_back(predSet->getOperand(1)); | ||
241 | Cond.push_back(predSet->getOperand(2)); | ||
242 | Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); | ||
243 | return false; | ||
244 | } | ||
245 | |||
246 | // Otherwise, can't handle this. | ||
247 | return true; | ||
248 | } | ||
249 | |||
250 | int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { | ||
251 | const MachineInstr *MI = op.getParent(); | ||
252 | |||
253 | switch (MI->getDesc().OpInfo->RegClass) { | ||
254 | default: // FIXME: fallthrough?? | ||
255 | case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; | ||
256 | case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; | ||
257 | }; | ||
258 | } | ||
259 | |||
260 | unsigned | ||
261 | R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, | ||
262 | MachineBasicBlock *TBB, | ||
263 | MachineBasicBlock *FBB, | ||
264 | const SmallVectorImpl<MachineOperand> &Cond, | ||
265 | DebugLoc DL) const | ||
266 | { | ||
267 | assert(TBB && "InsertBranch must not be told to insert a fallthrough"); | ||
268 | |||
269 | if (FBB == 0) { | ||
270 | if (Cond.empty()) { | ||
271 | BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); | ||
272 | return 1; | ||
273 | } else { | ||
274 | MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); | ||
275 | assert(PredSet && "No previous predicate !"); | ||
276 | addFlag(PredSet, 1, MO_FLAG_PUSH); | ||
277 | PredSet->getOperand(2).setImm(Cond[1].getImm()); | ||
278 | |||
279 | BuildMI(&MBB, DL, get(AMDGPU::JUMP)) | ||
280 | .addMBB(TBB) | ||
281 | .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); | ||
282 | return 1; | ||
283 | } | ||
284 | } else { | ||
285 | MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); | ||
286 | assert(PredSet && "No previous predicate !"); | ||
287 | addFlag(PredSet, 1, MO_FLAG_PUSH); | ||
288 | PredSet->getOperand(2).setImm(Cond[1].getImm()); | ||
289 | BuildMI(&MBB, DL, get(AMDGPU::JUMP)) | ||
290 | .addMBB(TBB) | ||
291 | .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); | ||
292 | BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); | ||
293 | return 2; | ||
294 | } | ||
295 | } | ||
296 | |||
297 | unsigned | ||
298 | R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const | ||
299 | { | ||
300 | |||
301 | // Note : we leave PRED* instructions there. | ||
302 | // They may be needed when predicating instructions. | ||
303 | |||
304 | MachineBasicBlock::iterator I = MBB.end(); | ||
305 | |||
306 | if (I == MBB.begin()) { | ||
307 | return 0; | ||
308 | } | ||
309 | --I; | ||
310 | switch (I->getOpcode()) { | ||
311 | default: | ||
312 | return 0; | ||
313 | case AMDGPU::JUMP: | ||
314 | if (isPredicated(I)) { | ||
315 | MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); | ||
316 | clearFlag(predSet, 1, MO_FLAG_PUSH); | ||
317 | } | ||
318 | I->eraseFromParent(); | ||
319 | break; | ||
320 | } | ||
321 | I = MBB.end(); | ||
322 | |||
323 | if (I == MBB.begin()) { | ||
324 | return 1; | ||
325 | } | ||
326 | --I; | ||
327 | switch (I->getOpcode()) { | ||
328 | // FIXME: only one case?? | ||
329 | default: | ||
330 | return 1; | ||
331 | case AMDGPU::JUMP: | ||
332 | if (isPredicated(I)) { | ||
333 | MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); | ||
334 | clearFlag(predSet, 1, MO_FLAG_PUSH); | ||
335 | } | ||
336 | I->eraseFromParent(); | ||
337 | break; | ||
338 | } | ||
339 | return 2; | ||
340 | } | ||
341 | |||
342 | bool | ||
343 | R600InstrInfo::isPredicated(const MachineInstr *MI) const | ||
344 | { | ||
345 | int idx = MI->findFirstPredOperandIdx(); | ||
346 | if (idx < 0) | ||
347 | return false; | ||
348 | |||
349 | unsigned Reg = MI->getOperand(idx).getReg(); | ||
350 | switch (Reg) { | ||
351 | default: return false; | ||
352 | case AMDGPU::PRED_SEL_ONE: | ||
353 | case AMDGPU::PRED_SEL_ZERO: | ||
354 | case AMDGPU::PREDICATE_BIT: | ||
355 | return true; | ||
356 | } | ||
357 | } | ||
358 | |||
359 | bool | ||
360 | R600InstrInfo::isPredicable(MachineInstr *MI) const | ||
361 | { | ||
362 | return AMDGPUInstrInfo::isPredicable(MI); | ||
363 | } | ||
364 | |||
365 | |||
366 | bool | ||
367 | R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, | ||
368 | unsigned NumCyles, | ||
369 | unsigned ExtraPredCycles, | ||
370 | const BranchProbability &Probability) const{ | ||
371 | return true; | ||
372 | } | ||
373 | |||
374 | bool | ||
375 | R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, | ||
376 | unsigned NumTCycles, | ||
377 | unsigned ExtraTCycles, | ||
378 | MachineBasicBlock &FMBB, | ||
379 | unsigned NumFCycles, | ||
380 | unsigned ExtraFCycles, | ||
381 | const BranchProbability &Probability) const | ||
382 | { | ||
383 | return true; | ||
384 | } | ||
385 | |||
386 | bool | ||
387 | R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, | ||
388 | unsigned NumCyles, | ||
389 | const BranchProbability &Probability) | ||
390 | const | ||
391 | { | ||
392 | return true; | ||
393 | } | ||
394 | |||
395 | bool | ||
396 | R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, | ||
397 | MachineBasicBlock &FMBB) const | ||
398 | { | ||
399 | return false; | ||
400 | } | ||
401 | |||
402 | |||
403 | bool | ||
404 | R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const | ||
405 | { | ||
406 | MachineOperand &MO = Cond[1]; | ||
407 | switch (MO.getImm()) { | ||
408 | case OPCODE_IS_ZERO_INT: | ||
409 | MO.setImm(OPCODE_IS_NOT_ZERO_INT); | ||
410 | break; | ||
411 | case OPCODE_IS_NOT_ZERO_INT: | ||
412 | MO.setImm(OPCODE_IS_ZERO_INT); | ||
413 | break; | ||
414 | case OPCODE_IS_ZERO: | ||
415 | MO.setImm(OPCODE_IS_NOT_ZERO); | ||
416 | break; | ||
417 | case OPCODE_IS_NOT_ZERO: | ||
418 | MO.setImm(OPCODE_IS_ZERO); | ||
419 | break; | ||
420 | default: | ||
421 | return true; | ||
422 | } | ||
423 | |||
424 | MachineOperand &MO2 = Cond[2]; | ||
425 | switch (MO2.getReg()) { | ||
426 | case AMDGPU::PRED_SEL_ZERO: | ||
427 | MO2.setReg(AMDGPU::PRED_SEL_ONE); | ||
428 | break; | ||
429 | case AMDGPU::PRED_SEL_ONE: | ||
430 | MO2.setReg(AMDGPU::PRED_SEL_ZERO); | ||
431 | break; | ||
432 | default: | ||
433 | return true; | ||
434 | } | ||
435 | return false; | ||
436 | } | ||
437 | |||
438 | bool | ||
439 | R600InstrInfo::DefinesPredicate(MachineInstr *MI, | ||
440 | std::vector<MachineOperand> &Pred) const | ||
441 | { | ||
442 | return isPredicateSetter(MI->getOpcode()); | ||
443 | } | ||
444 | |||
445 | |||
446 | bool | ||
447 | R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, | ||
448 | const SmallVectorImpl<MachineOperand> &Pred2) const | ||
449 | { | ||
450 | return false; | ||
451 | } | ||
452 | |||
453 | |||
454 | bool | ||
455 | R600InstrInfo::PredicateInstruction(MachineInstr *MI, | ||
456 | const SmallVectorImpl<MachineOperand> &Pred) const | ||
457 | { | ||
458 | int PIdx = MI->findFirstPredOperandIdx(); | ||
459 | |||
460 | if (PIdx != -1) { | ||
461 | MachineOperand &PMO = MI->getOperand(PIdx); | ||
462 | PMO.setReg(Pred[2].getReg()); | ||
463 | MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); | ||
464 | return true; | ||
465 | } | ||
466 | |||
467 | return false; | ||
468 | } | ||
469 | |||
470 | int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, | ||
471 | const MachineInstr *MI, | ||
472 | unsigned *PredCost) const | ||
473 | { | ||
474 | if (PredCost) | ||
475 | *PredCost = 2; | ||
476 | return 2; | ||
477 | } | ||
478 | |||
479 | //===----------------------------------------------------------------------===// | ||
480 | // Instruction flag getters/setters | ||
481 | //===----------------------------------------------------------------------===// | ||
482 | |||
483 | bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const | ||
484 | { | ||
485 | return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; | ||
486 | } | ||
487 | |||
488 | MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const | ||
489 | { | ||
490 | unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags); | ||
491 | assert(FlagIndex != 0 && | ||
492 | "Instruction flags not supported for this instruction"); | ||
493 | MachineOperand &FlagOp = MI->getOperand(FlagIndex); | ||
494 | assert(FlagOp.isImm()); | ||
495 | return FlagOp; | ||
496 | } | ||
497 | |||
498 | void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, | ||
499 | unsigned Flag) const | ||
500 | { | ||
501 | MachineOperand &FlagOp = getFlagOp(MI); | ||
502 | FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); | ||
503 | } | ||
504 | |||
505 | void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, | ||
506 | unsigned Flag) const | ||
507 | { | ||
508 | MachineOperand &FlagOp = getFlagOp(MI); | ||
509 | unsigned InstFlags = FlagOp.getImm(); | ||
510 | InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); | ||
511 | FlagOp.setImm(InstFlags); | ||
512 | } | ||
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h deleted file mode 100644 index de82542fa2c..00000000000 --- a/src/gallium/drivers/radeon/R600InstrInfo.h +++ /dev/null | |||
@@ -1,132 +0,0 @@ | |||
1 | //===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface definition for R600InstrInfo | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #ifndef R600INSTRUCTIONINFO_H_ | ||
15 | #define R600INSTRUCTIONINFO_H_ | ||
16 | |||
17 | #include "AMDIL.h" | ||
18 | #include "AMDGPUInstrInfo.h" | ||
19 | #include "R600RegisterInfo.h" | ||
20 | |||
21 | #include <map> | ||
22 | |||
23 | namespace llvm { | ||
24 | |||
25 | class AMDGPUTargetMachine; | ||
26 | class DFAPacketizer; | ||
27 | class ScheduleDAG; | ||
28 | class MachineFunction; | ||
29 | class MachineInstr; | ||
30 | class MachineInstrBuilder; | ||
31 | |||
32 | class R600InstrInfo : public AMDGPUInstrInfo { | ||
33 | private: | ||
34 | const R600RegisterInfo RI; | ||
35 | AMDGPUTargetMachine &TM; | ||
36 | |||
37 | int getBranchInstr(const MachineOperand &op) const; | ||
38 | |||
39 | public: | ||
40 | explicit R600InstrInfo(AMDGPUTargetMachine &tm); | ||
41 | |||
42 | const R600RegisterInfo &getRegisterInfo() const; | ||
43 | virtual void copyPhysReg(MachineBasicBlock &MBB, | ||
44 | MachineBasicBlock::iterator MI, DebugLoc DL, | ||
45 | unsigned DestReg, unsigned SrcReg, | ||
46 | bool KillSrc) const; | ||
47 | |||
48 | bool isTrig(const MachineInstr &MI) const; | ||
49 | bool isPlaceHolderOpcode(unsigned opcode) const; | ||
50 | bool isReductionOp(unsigned opcode) const; | ||
51 | bool isCubeOp(unsigned opcode) const; | ||
52 | |||
53 | /// isVector - Vector instructions are instructions that must fill all | ||
54 | /// instruction slots within an instruction group. | ||
55 | bool isVector(const MachineInstr &MI) const; | ||
56 | |||
57 | virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, | ||
58 | int64_t Imm) const; | ||
59 | |||
60 | virtual unsigned getIEQOpcode() const; | ||
61 | virtual bool isMov(unsigned Opcode) const; | ||
62 | |||
63 | DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM, | ||
64 | const ScheduleDAG *DAG) const; | ||
65 | |||
66 | bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; | ||
67 | |||
68 | bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, | ||
69 | SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; | ||
70 | |||
71 | unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; | ||
72 | |||
73 | unsigned RemoveBranch(MachineBasicBlock &MBB) const; | ||
74 | |||
75 | bool isPredicated(const MachineInstr *MI) const; | ||
76 | |||
77 | bool isPredicable(MachineInstr *MI) const; | ||
78 | |||
79 | bool | ||
80 | isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, | ||
81 | const BranchProbability &Probability) const; | ||
82 | |||
83 | bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, | ||
84 | unsigned ExtraPredCycles, | ||
85 | const BranchProbability &Probability) const ; | ||
86 | |||
87 | bool | ||
88 | isProfitableToIfCvt(MachineBasicBlock &TMBB, | ||
89 | unsigned NumTCycles, unsigned ExtraTCycles, | ||
90 | MachineBasicBlock &FMBB, | ||
91 | unsigned NumFCycles, unsigned ExtraFCycles, | ||
92 | const BranchProbability &Probability) const; | ||
93 | |||
94 | bool DefinesPredicate(MachineInstr *MI, | ||
95 | std::vector<MachineOperand> &Pred) const; | ||
96 | |||
97 | bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, | ||
98 | const SmallVectorImpl<MachineOperand> &Pred2) const; | ||
99 | |||
100 | bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, | ||
101 | MachineBasicBlock &FMBB) const; | ||
102 | |||
103 | bool PredicateInstruction(MachineInstr *MI, | ||
104 | const SmallVectorImpl<MachineOperand> &Pred) const; | ||
105 | |||
106 | int getInstrLatency(const InstrItineraryData *ItinData, | ||
107 | const MachineInstr *MI, | ||
108 | unsigned *PredCost = 0) const; | ||
109 | |||
110 | virtual int getInstrLatency(const InstrItineraryData *ItinData, | ||
111 | SDNode *Node) const { return 1;} | ||
112 | |||
113 | ///hasFlagOperand - Returns true if this instruction has an operand for | ||
114 | /// storing target flags. | ||
115 | bool hasFlagOperand(const MachineInstr &MI) const; | ||
116 | |||
117 | ///addFlag - Add one of the MO_FLAG* flags to the specified Operand. | ||
118 | void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; | ||
119 | |||
120 | ///isFlagSet - Determine if the specified flag is set on this Operand. | ||
121 | bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const; | ||
122 | |||
123 | ///getFlagOp - Return the operand containing the flags for this instruction. | ||
124 | MachineOperand &getFlagOp(MachineInstr *MI) const; | ||
125 | |||
126 | ///clearFlag - Clear the specified flag on the instruction. | ||
127 | void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; | ||
128 | }; | ||
129 | |||
130 | } // End llvm namespace | ||
131 | |||
132 | #endif // R600INSTRINFO_H_ | ||
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td deleted file mode 100644 index 120a71c5b9e..00000000000 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ /dev/null | |||
@@ -1,1458 +0,0 @@ | |||
1 | //===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // R600 Tablegen instruction definitions | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | include "R600Intrinsics.td" | ||
15 | |||
16 | class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, | ||
17 | InstrItinClass itin> | ||
18 | : AMDGPUInst <outs, ins, asm, pattern> { | ||
19 | |||
20 | field bits<64> Inst; | ||
21 | bit Trig = 0; | ||
22 | bit Op3 = 0; | ||
23 | bit isVector = 0; | ||
24 | bits<2> FlagOperandIdx = 0; | ||
25 | |||
26 | bits<11> op_code = inst; | ||
27 | //let Inst = inst; | ||
28 | let Namespace = "AMDGPU"; | ||
29 | let OutOperandList = outs; | ||
30 | let InOperandList = ins; | ||
31 | let AsmString = asm; | ||
32 | let Pattern = pattern; | ||
33 | let Itinerary = itin; | ||
34 | |||
35 | let TSFlags{4} = Trig; | ||
36 | let TSFlags{5} = Op3; | ||
37 | |||
38 | // Vector instructions are instructions that must fill all slots in an | ||
39 | // instruction group | ||
40 | let TSFlags{6} = isVector; | ||
41 | let TSFlags{8-7} = FlagOperandIdx; | ||
42 | } | ||
43 | |||
44 | class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : | ||
45 | AMDGPUInst <outs, ins, asm, pattern> | ||
46 | { | ||
47 | field bits<64> Inst; | ||
48 | |||
49 | let Namespace = "AMDGPU"; | ||
50 | } | ||
51 | |||
52 | def MEMxi : Operand<iPTR> { | ||
53 | let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); | ||
54 | } | ||
55 | |||
56 | def MEMrr : Operand<iPTR> { | ||
57 | let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); | ||
58 | } | ||
59 | |||
60 | def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; | ||
61 | def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; | ||
62 | def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; | ||
63 | |||
64 | class R600_ALU { | ||
65 | |||
66 | bits<7> DST_GPR = 0; | ||
67 | bits<9> SRC0_SEL = 0; | ||
68 | bits<1> SRC0_NEG = 0; | ||
69 | bits<9> SRC1_SEL = 0; | ||
70 | bits<1> SRC1_NEG = 0; | ||
71 | bits<1> CLAMP = 0; | ||
72 | |||
73 | } | ||
74 | |||
75 | def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), | ||
76 | (ops PRED_SEL_OFF)>; | ||
77 | |||
78 | |||
79 | class R600_1OP <bits<11> inst, string opName, list<dag> pattern, | ||
80 | InstrItinClass itin = AnyALU> : | ||
81 | InstR600 <inst, | ||
82 | (outs R600_Reg32:$dst), | ||
83 | (ins R600_Reg32:$src, R600_Pred:$p, variable_ops), | ||
84 | !strconcat(opName, " $dst, $src ($p)"), | ||
85 | pattern, | ||
86 | itin>{ | ||
87 | bits<7> dst; | ||
88 | bits<9> src; | ||
89 | let Inst{8-0} = src; | ||
90 | let Inst{49-39} = inst; | ||
91 | let Inst{59-53} = dst; | ||
92 | } | ||
93 | |||
94 | class R600_2OP <bits<11> inst, string opName, list<dag> pattern, | ||
95 | InstrItinClass itin = AnyALU> : | ||
96 | InstR600 <inst, | ||
97 | (outs R600_Reg32:$dst), | ||
98 | (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops), | ||
99 | !strconcat(opName, " $dst, $src0, $src1"), | ||
100 | pattern, | ||
101 | itin>{ | ||
102 | bits<7> dst; | ||
103 | bits<9> src0; | ||
104 | bits<9> src1; | ||
105 | let Inst{8-0} = src0; | ||
106 | let Inst{21-13} = src1; | ||
107 | let Inst{49-39} = inst; | ||
108 | let Inst{59-53} = dst; | ||
109 | } | ||
110 | |||
111 | class R600_3OP <bits<11> inst, string opName, list<dag> pattern, | ||
112 | InstrItinClass itin = AnyALU> : | ||
113 | InstR600 <inst, | ||
114 | (outs R600_Reg32:$dst), | ||
115 | (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2,R600_Pred:$p, variable_ops), | ||
116 | !strconcat(opName, " $dst, $src0, $src1, $src2"), | ||
117 | pattern, | ||
118 | itin>{ | ||
119 | bits<7> dst; | ||
120 | bits<9> src0; | ||
121 | bits<9> src1; | ||
122 | bits<9> src2; | ||
123 | let Inst{8-0} = src0; | ||
124 | let Inst{21-13} = src1; | ||
125 | let Inst{40-32} = src2; | ||
126 | let Inst{49-45} = inst{4-0}; | ||
127 | let Inst{59-53} = dst; | ||
128 | let Op3 = 1; | ||
129 | } | ||
130 | |||
131 | |||
132 | |||
133 | def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst), | ||
134 | (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), | ||
135 | "PRED $dst, $src0, $src1", | ||
136 | [], NullALU> | ||
137 | { | ||
138 | bits<7> dst; | ||
139 | bits<9> src0; | ||
140 | bits<11> src1; | ||
141 | let Inst{8-0} = src0; | ||
142 | let Inst{49-39} = src1; | ||
143 | let Inst{59-53} = dst; | ||
144 | let FlagOperandIdx = 3; | ||
145 | } | ||
146 | |||
147 | let isTerminator = 1, isBranch = 1, isPseudo = 1 in { | ||
148 | def JUMP : InstR600 <0x10, | ||
149 | (outs), | ||
150 | (ins brtarget:$target, R600_Pred:$p), | ||
151 | "JUMP $target ($p)", | ||
152 | [], AnyALU | ||
153 | >; | ||
154 | } | ||
155 | |||
156 | class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, | ||
157 | InstrItinClass itin = VecALU> : | ||
158 | InstR600 <inst, | ||
159 | (outs R600_Reg32:$dst), | ||
160 | ins, | ||
161 | asm, | ||
162 | pattern, | ||
163 | itin>{ | ||
164 | bits<7> dst; | ||
165 | let Inst{49-39} = inst; | ||
166 | let Inst{59-53} = dst; | ||
167 | } | ||
168 | |||
169 | class R600_TEX <bits<11> inst, string opName, list<dag> pattern, | ||
170 | InstrItinClass itin = AnyALU> : | ||
171 | InstR600 <inst, | ||
172 | (outs R600_Reg128:$dst), | ||
173 | (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2), | ||
174 | !strconcat(opName, "$dst, $src0, $src1, $src2"), | ||
175 | pattern, | ||
176 | itin>{ | ||
177 | let Inst {10-0} = inst; | ||
178 | } | ||
179 | |||
180 | def TEX_SHADOW : PatLeaf< | ||
181 | (imm), | ||
182 | [{uint32_t TType = (uint32_t)N->getZExtValue(); | ||
183 | return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12; | ||
184 | }] | ||
185 | >; | ||
186 | |||
187 | class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, | ||
188 | dag ins, string asm, list<dag> pattern> : | ||
189 | InstR600ISA <outs, ins, asm, pattern> | ||
190 | { | ||
191 | bits<7> RW_GPR; | ||
192 | bits<7> INDEX_GPR; | ||
193 | |||
194 | bits<2> RIM; | ||
195 | bits<2> TYPE; | ||
196 | bits<1> RW_REL; | ||
197 | bits<2> ELEM_SIZE; | ||
198 | |||
199 | bits<12> ARRAY_SIZE; | ||
200 | bits<4> COMP_MASK; | ||
201 | bits<4> BURST_COUNT; | ||
202 | bits<1> VPM; | ||
203 | bits<1> eop; | ||
204 | bits<1> MARK; | ||
205 | bits<1> BARRIER; | ||
206 | |||
207 | // CF_ALLOC_EXPORT_WORD0_RAT | ||
208 | let Inst{3-0} = rat_id; | ||
209 | let Inst{9-4} = rat_inst; | ||
210 | let Inst{10} = 0; // Reserved | ||
211 | let Inst{12-11} = RIM; | ||
212 | let Inst{14-13} = TYPE; | ||
213 | let Inst{21-15} = RW_GPR; | ||
214 | let Inst{22} = RW_REL; | ||
215 | let Inst{29-23} = INDEX_GPR; | ||
216 | let Inst{31-30} = ELEM_SIZE; | ||
217 | |||
218 | // CF_ALLOC_EXPORT_WORD1_BUF | ||
219 | let Inst{43-32} = ARRAY_SIZE; | ||
220 | let Inst{47-44} = COMP_MASK; | ||
221 | let Inst{51-48} = BURST_COUNT; | ||
222 | let Inst{52} = VPM; | ||
223 | let Inst{53} = eop; | ||
224 | let Inst{61-54} = cf_inst; | ||
225 | let Inst{62} = MARK; | ||
226 | let Inst{63} = BARRIER; | ||
227 | } | ||
228 | |||
229 | def load_param : PatFrag<(ops node:$ptr), | ||
230 | (load node:$ptr), | ||
231 | [{ | ||
232 | const Value *Src = cast<LoadSDNode>(N)->getSrcValue(); | ||
233 | if (Src) { | ||
234 | PointerType * PT = dyn_cast<PointerType>(Src->getType()); | ||
235 | return PT && PT->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS; | ||
236 | } | ||
237 | return false; | ||
238 | }]>; | ||
239 | |||
240 | def isR600 : Predicate<"Subtarget.device()" | ||
241 | "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; | ||
242 | def isR700 : Predicate<"Subtarget.device()" | ||
243 | "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" | ||
244 | "Subtarget.device()->getDeviceFlag()" | ||
245 | ">= OCL_DEVICE_RV710">; | ||
246 | def isEG : Predicate< | ||
247 | "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " | ||
248 | "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && " | ||
249 | "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; | ||
250 | |||
251 | def isCayman : Predicate<"Subtarget.device()" | ||
252 | "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; | ||
253 | def isEGorCayman : Predicate<"Subtarget.device()" | ||
254 | "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" | ||
255 | "|| Subtarget.device()->getGeneration() ==" | ||
256 | "AMDGPUDeviceInfo::HD6XXX">; | ||
257 | |||
258 | def isR600toCayman : Predicate< | ||
259 | "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; | ||
260 | |||
261 | //===----------------------------------------------------------------------===// | ||
262 | // Interpolation Instructions | ||
263 | //===----------------------------------------------------------------------===// | ||
264 | |||
265 | def INTERP: SDNode<"AMDGPUISD::INTERP", | ||
266 | SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]> | ||
267 | >; | ||
268 | |||
269 | def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", | ||
270 | SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> | ||
271 | >; | ||
272 | |||
273 | let usesCustomInserter = 1 in { | ||
274 | def input_perspective : AMDGPUShaderInst < | ||
275 | (outs R600_Reg128:$dst), | ||
276 | (ins i32imm:$src0, i32imm:$src1), | ||
277 | "input_perspective $src0 $src1 : dst", | ||
278 | [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>; | ||
279 | } // End usesCustomInserter = 1 | ||
280 | |||
281 | def input_constant : AMDGPUShaderInst < | ||
282 | (outs R600_Reg128:$dst), | ||
283 | (ins i32imm:$src), | ||
284 | "input_perspective $src : dst", | ||
285 | [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>; | ||
286 | |||
287 | |||
288 | |||
289 | def INTERP_XY : InstR600 <0xD6, | ||
290 | (outs R600_Reg32:$dst), | ||
291 | (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), | ||
292 | "INTERP_XY dst", | ||
293 | [], AnyALU> | ||
294 | { | ||
295 | let FlagOperandIdx = 3; | ||
296 | } | ||
297 | |||
298 | def INTERP_ZW : InstR600 <0xD7, | ||
299 | (outs R600_Reg32:$dst), | ||
300 | (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), | ||
301 | "INTERP_ZW dst", | ||
302 | [], AnyALU> | ||
303 | { | ||
304 | let FlagOperandIdx = 3; | ||
305 | } | ||
306 | |||
307 | def INTERP_LOAD_P0 : InstR600 <0xE0, | ||
308 | (outs R600_Reg32:$dst), | ||
309 | (ins R600_Reg32:$src, i32imm:$flags), | ||
310 | "INTERP_LOAD_P0 dst", | ||
311 | [], AnyALU> | ||
312 | { | ||
313 | let FlagOperandIdx = 2; | ||
314 | } | ||
315 | |||
316 | let Predicates = [isR600toCayman] in { | ||
317 | |||
318 | //===----------------------------------------------------------------------===// | ||
319 | // Common Instructions R600, R700, Evergreen, Cayman | ||
320 | //===----------------------------------------------------------------------===// | ||
321 | |||
322 | def ADD : R600_2OP < | ||
323 | 0x0, "ADD", | ||
324 | [(set R600_Reg32:$dst, (fadd R600_Reg32:$src0, R600_Reg32:$src1))] | ||
325 | >; | ||
326 | |||
327 | // Non-IEEE MUL: 0 * anything = 0 | ||
328 | def MUL : R600_2OP < | ||
329 | 0x1, "MUL NON-IEEE", | ||
330 | [(set R600_Reg32:$dst, (int_AMDGPU_mul R600_Reg32:$src0, R600_Reg32:$src1))] | ||
331 | >; | ||
332 | |||
333 | def MUL_IEEE : R600_2OP < | ||
334 | 0x2, "MUL_IEEE", | ||
335 | [(set R600_Reg32:$dst, (fmul R600_Reg32:$src0, R600_Reg32:$src1))] | ||
336 | >; | ||
337 | |||
338 | def MAX : R600_2OP < | ||
339 | 0x3, "MAX", | ||
340 | [(set R600_Reg32:$dst, (AMDGPUfmax R600_Reg32:$src0, R600_Reg32:$src1))] | ||
341 | >; | ||
342 | |||
343 | def MIN : R600_2OP < | ||
344 | 0x4, "MIN", | ||
345 | [(set R600_Reg32:$dst, (AMDGPUfmin R600_Reg32:$src0, R600_Reg32:$src1))] | ||
346 | >; | ||
347 | |||
348 | // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, | ||
349 | // so some of the instruction names don't match the asm string. | ||
350 | // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. | ||
351 | |||
352 | def SETE : R600_2OP < | ||
353 | 0x08, "SETE", | ||
354 | [(set R600_Reg32:$dst, | ||
355 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, | ||
356 | COND_EQ))] | ||
357 | >; | ||
358 | |||
359 | def SGT : R600_2OP < | ||
360 | 0x09, "SETGT", | ||
361 | [(set R600_Reg32:$dst, | ||
362 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, | ||
363 | COND_GT))] | ||
364 | >; | ||
365 | |||
366 | def SGE : R600_2OP < | ||
367 | 0xA, "SETGE", | ||
368 | [(set R600_Reg32:$dst, | ||
369 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, | ||
370 | COND_GE))] | ||
371 | >; | ||
372 | |||
373 | def SNE : R600_2OP < | ||
374 | 0xB, "SETNE", | ||
375 | [(set R600_Reg32:$dst, | ||
376 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, | ||
377 | COND_NE))] | ||
378 | >; | ||
379 | |||
380 | def FRACT : R600_1OP < | ||
381 | 0x10, "FRACT", | ||
382 | [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))] | ||
383 | >; | ||
384 | |||
385 | def TRUNC : R600_1OP < | ||
386 | 0x11, "TRUNC", | ||
387 | [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))] | ||
388 | >; | ||
389 | |||
390 | def CEIL : R600_1OP < | ||
391 | 0x12, "CEIL", | ||
392 | [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))] | ||
393 | >; | ||
394 | |||
395 | def RNDNE : R600_1OP < | ||
396 | 0x13, "RNDNE", | ||
397 | [(set R600_Reg32:$dst, (frint R600_Reg32:$src))] | ||
398 | >; | ||
399 | |||
400 | def FLOOR : R600_1OP < | ||
401 | 0x14, "FLOOR", | ||
402 | [(set R600_Reg32:$dst, (ffloor R600_Reg32:$src))] | ||
403 | >; | ||
404 | |||
405 | def MOV : InstR600 <0x19, (outs R600_Reg32:$dst), | ||
406 | (ins R600_Reg32:$src0, i32imm:$flags, | ||
407 | R600_Pred:$p), | ||
408 | "MOV $dst, $src0", [], AnyALU> { | ||
409 | let FlagOperandIdx = 2; | ||
410 | bits<7> dst; | ||
411 | bits<9> src0; | ||
412 | let Inst{8-0} = src0; | ||
413 | let Inst{49-39} = op_code; | ||
414 | let Inst{59-53} = dst; | ||
415 | } | ||
416 | |||
417 | class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19, | ||
418 | (outs R600_Reg32:$dst), | ||
419 | (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm), | ||
420 | "MOV_IMM $dst, $imm", | ||
421 | [], AnyALU | ||
422 | >{ | ||
423 | bits<7> dst; | ||
424 | bits<9> alu_literal; | ||
425 | bits<9> p; | ||
426 | let Inst{8-0} = alu_literal; | ||
427 | let Inst{21-13} = p; | ||
428 | let Inst{49-39} = op_code; | ||
429 | let Inst{59-53} = dst; | ||
430 | } | ||
431 | |||
432 | def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; | ||
433 | def : Pat < | ||
434 | (imm:$val), | ||
435 | (MOV_IMM_I32 (i32 ALU_LITERAL_X), imm:$val) | ||
436 | >; | ||
437 | |||
438 | def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; | ||
439 | def : Pat < | ||
440 | (fpimm:$val), | ||
441 | (MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val) | ||
442 | >; | ||
443 | |||
444 | def KILLGT : InstR600 <0x2D, | ||
445 | (outs R600_Reg32:$dst), | ||
446 | (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags, R600_Pred:$p, | ||
447 | variable_ops), | ||
448 | "KILLGT $dst, $src0, $src1, $flags ($p)", | ||
449 | [], | ||
450 | NullALU>{ | ||
451 | let FlagOperandIdx = 3; | ||
452 | bits<7> dst; | ||
453 | bits<9> src0; | ||
454 | bits<9> src1; | ||
455 | let Inst{8-0} = src0; | ||
456 | let Inst{21-13} = src1; | ||
457 | let Inst{49-39} = op_code; | ||
458 | let Inst{59-53} = dst; | ||
459 | } | ||
460 | |||
461 | def AND_INT : R600_2OP < | ||
462 | 0x30, "AND_INT", | ||
463 | [(set R600_Reg32:$dst, (and R600_Reg32:$src0, R600_Reg32:$src1))] | ||
464 | >; | ||
465 | |||
466 | def OR_INT : R600_2OP < | ||
467 | 0x31, "OR_INT", | ||
468 | [(set R600_Reg32:$dst, (or R600_Reg32:$src0, R600_Reg32:$src1))] | ||
469 | >; | ||
470 | |||
471 | def XOR_INT : R600_2OP < | ||
472 | 0x32, "XOR_INT", | ||
473 | [(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))] | ||
474 | >; | ||
475 | |||
476 | def NOT_INT : R600_1OP < | ||
477 | 0x33, "NOT_INT", | ||
478 | [(set R600_Reg32:$dst, (not R600_Reg32:$src))] | ||
479 | >; | ||
480 | |||
481 | def ADD_INT : R600_2OP < | ||
482 | 0x34, "ADD_INT", | ||
483 | [(set R600_Reg32:$dst, (add R600_Reg32:$src0, R600_Reg32:$src1))] | ||
484 | >; | ||
485 | |||
486 | def SUB_INT : R600_2OP < | ||
487 | 0x35, "SUB_INT", | ||
488 | [(set R600_Reg32:$dst, (sub R600_Reg32:$src0, R600_Reg32:$src1))] | ||
489 | >; | ||
490 | |||
491 | def MAX_INT : R600_2OP < | ||
492 | 0x36, "MAX_INT", | ||
493 | [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]>; | ||
494 | |||
495 | def MIN_INT : R600_2OP < | ||
496 | 0x37, "MIN_INT", | ||
497 | [(set R600_Reg32:$dst, (AMDGPUsmin R600_Reg32:$src0, R600_Reg32:$src1))]>; | ||
498 | |||
499 | def MAX_UINT : R600_2OP < | ||
500 | 0x38, "MAX_UINT", | ||
501 | [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))] | ||
502 | >; | ||
503 | |||
504 | def MIN_UINT : R600_2OP < | ||
505 | 0x39, "MIN_UINT", | ||
506 | [(set R600_Reg32:$dst, (AMDGPUumin R600_Reg32:$src0, R600_Reg32:$src1))] | ||
507 | >; | ||
508 | |||
509 | def SETE_INT : R600_2OP < | ||
510 | 0x3A, "SETE_INT", | ||
511 | [(set (i32 R600_Reg32:$dst), | ||
512 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] | ||
513 | >; | ||
514 | |||
515 | def SETGT_INT : R600_2OP < | ||
516 | 0x3B, "SGT_INT", | ||
517 | [(set (i32 R600_Reg32:$dst), | ||
518 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] | ||
519 | >; | ||
520 | |||
521 | def SETGE_INT : R600_2OP < | ||
522 | 0x3C, "SETGE_INT", | ||
523 | [(set (i32 R600_Reg32:$dst), | ||
524 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] | ||
525 | >; | ||
526 | |||
527 | def SETNE_INT : R600_2OP < | ||
528 | 0x3D, "SETNE_INT", | ||
529 | [(set (i32 R600_Reg32:$dst), | ||
530 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] | ||
531 | >; | ||
532 | |||
533 | def SETGT_UINT : R600_2OP < | ||
534 | 0x3E, "SETGT_UINT", | ||
535 | [(set (i32 R600_Reg32:$dst), | ||
536 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] | ||
537 | >; | ||
538 | |||
539 | def SETGE_UINT : R600_2OP < | ||
540 | 0x3F, "SETGE_UINT", | ||
541 | [(set (i32 R600_Reg32:$dst), | ||
542 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] | ||
543 | >; | ||
544 | |||
545 | def CNDE_INT : R600_3OP < | ||
546 | 0x1C, "CNDE_INT", | ||
547 | [(set (i32 R600_Reg32:$dst), | ||
548 | (selectcc (i32 R600_Reg32:$src0), 0, | ||
549 | (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), | ||
550 | COND_EQ))] | ||
551 | >; | ||
552 | |||
553 | def CNDGE_INT : R600_3OP < | ||
554 | 0x1E, "CNDGE_INT", | ||
555 | [(set (i32 R600_Reg32:$dst), | ||
556 | (selectcc (i32 R600_Reg32:$src0), 0, | ||
557 | (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), | ||
558 | COND_GE))] | ||
559 | >; | ||
560 | |||
561 | def CNDGT_INT : R600_3OP < | ||
562 | 0x1D, "CNDGT_INT", | ||
563 | [(set (i32 R600_Reg32:$dst), | ||
564 | (selectcc (i32 R600_Reg32:$src0), 0, | ||
565 | (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), | ||
566 | COND_GT))] | ||
567 | >; | ||
568 | |||
569 | //===----------------------------------------------------------------------===// | ||
570 | // Texture instructions | ||
571 | //===----------------------------------------------------------------------===// | ||
572 | |||
573 | def TEX_LD : R600_TEX < | ||
574 | 0x03, "TEX_LD", | ||
575 | [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$src4, imm:$src5))] | ||
576 | > { | ||
577 | let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $src4, $src5"; | ||
578 | let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5); | ||
579 | } | ||
580 | |||
581 | def TEX_GET_TEXTURE_RESINFO : R600_TEX < | ||
582 | 0x04, "TEX_GET_TEXTURE_RESINFO", | ||
583 | [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))] | ||
584 | >; | ||
585 | |||
586 | def TEX_GET_GRADIENTS_H : R600_TEX < | ||
587 | 0x07, "TEX_GET_GRADIENTS_H", | ||
588 | [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))] | ||
589 | >; | ||
590 | |||
591 | def TEX_GET_GRADIENTS_V : R600_TEX < | ||
592 | 0x08, "TEX_GET_GRADIENTS_V", | ||
593 | [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))] | ||
594 | >; | ||
595 | |||
596 | def TEX_SET_GRADIENTS_H : R600_TEX < | ||
597 | 0x0B, "TEX_SET_GRADIENTS_H", | ||
598 | [] | ||
599 | >; | ||
600 | |||
601 | def TEX_SET_GRADIENTS_V : R600_TEX < | ||
602 | 0x0C, "TEX_SET_GRADIENTS_V", | ||
603 | [] | ||
604 | >; | ||
605 | |||
606 | def TEX_SAMPLE : R600_TEX < | ||
607 | 0x10, "TEX_SAMPLE", | ||
608 | [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))] | ||
609 | >; | ||
610 | |||
611 | def TEX_SAMPLE_C : R600_TEX < | ||
612 | 0x18, "TEX_SAMPLE_C", | ||
613 | [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] | ||
614 | >; | ||
615 | |||
616 | def TEX_SAMPLE_L : R600_TEX < | ||
617 | 0x11, "TEX_SAMPLE_L", | ||
618 | [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, imm:$src2))] | ||
619 | >; | ||
620 | |||
621 | def TEX_SAMPLE_C_L : R600_TEX < | ||
622 | 0x19, "TEX_SAMPLE_C_L", | ||
623 | [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] | ||
624 | >; | ||
625 | |||
626 | def TEX_SAMPLE_LB : R600_TEX < | ||
627 | 0x12, "TEX_SAMPLE_LB", | ||
628 | [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, imm:$src2))] | ||
629 | >; | ||
630 | |||
631 | def TEX_SAMPLE_C_LB : R600_TEX < | ||
632 | 0x1A, "TEX_SAMPLE_C_LB", | ||
633 | [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] | ||
634 | >; | ||
635 | |||
636 | def TEX_SAMPLE_G : R600_TEX < | ||
637 | 0x14, "TEX_SAMPLE_G", | ||
638 | [] | ||
639 | >; | ||
640 | |||
641 | def TEX_SAMPLE_C_G : R600_TEX < | ||
642 | 0x1C, "TEX_SAMPLE_C_G", | ||
643 | [] | ||
644 | >; | ||
645 | |||
646 | //===----------------------------------------------------------------------===// | ||
647 | // Helper classes for common instructions | ||
648 | //===----------------------------------------------------------------------===// | ||
649 | |||
650 | class MUL_LIT_Common <bits<11> inst> : R600_3OP < | ||
651 | inst, "MUL_LIT", | ||
652 | [] | ||
653 | >; | ||
654 | |||
655 | class MULADD_Common <bits<11> inst> : R600_3OP < | ||
656 | inst, "MULADD", | ||
657 | [(set (f32 R600_Reg32:$dst), | ||
658 | (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] | ||
659 | >; | ||
660 | |||
661 | class CNDE_Common <bits<11> inst> : R600_3OP < | ||
662 | inst, "CNDE", | ||
663 | [(set R600_Reg32:$dst, | ||
664 | (selectcc (f32 R600_Reg32:$src0), FP_ZERO, | ||
665 | (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), | ||
666 | COND_EQ))] | ||
667 | >; | ||
668 | |||
669 | class CNDGT_Common <bits<11> inst> : R600_3OP < | ||
670 | inst, "CNDGT", | ||
671 | [(set R600_Reg32:$dst, | ||
672 | (selectcc (f32 R600_Reg32:$src0), FP_ZERO, | ||
673 | (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), | ||
674 | COND_GT))] | ||
675 | >; | ||
676 | |||
677 | class CNDGE_Common <bits<11> inst> : R600_3OP < | ||
678 | inst, "CNDGE", | ||
679 | [(set R600_Reg32:$dst, | ||
680 | (selectcc (f32 R600_Reg32:$src0), FP_ZERO, | ||
681 | (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), | ||
682 | COND_GE))] | ||
683 | >; | ||
684 | |||
685 | class DOT4_Common <bits<11> inst> : R600_REDUCTION < | ||
686 | inst, | ||
687 | (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags), | ||
688 | "DOT4 $dst $src0, $src1", | ||
689 | [] | ||
690 | > { | ||
691 | bits<9> src0; | ||
692 | bits<9> src1; | ||
693 | let Inst{8-0} = src0; | ||
694 | let Inst{21-13} = src1; | ||
695 | let FlagOperandIdx = 3; | ||
696 | } | ||
697 | |||
698 | class DOT4_Pat <Instruction dot4> : Pat < | ||
699 | (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1), | ||
700 | (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0) | ||
701 | >; | ||
702 | |||
703 | multiclass CUBE_Common <bits<11> inst> { | ||
704 | |||
705 | def _pseudo : InstR600 < | ||
706 | inst, | ||
707 | (outs R600_Reg128:$dst), | ||
708 | (ins R600_Reg128:$src), | ||
709 | "CUBE $dst $src", | ||
710 | [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], | ||
711 | VecALU | ||
712 | >; | ||
713 | |||
714 | def _real : InstR600 < | ||
715 | inst, | ||
716 | (outs R600_Reg32:$dst), | ||
717 | (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), | ||
718 | "CUBE $dst, $src0, $src1", | ||
719 | [], VecALU | ||
720 | >{ | ||
721 | let FlagOperandIdx = 3; | ||
722 | bits<7> dst; | ||
723 | bits<9> src0; | ||
724 | bits<9> src1; | ||
725 | let Inst{8-0} = src0; | ||
726 | let Inst{21-13} = src1; | ||
727 | let Inst{49-39} = inst; | ||
728 | let Inst{59-53} = dst; | ||
729 | } | ||
730 | } | ||
731 | |||
732 | class EXP_IEEE_Common <bits<11> inst> : R600_1OP < | ||
733 | inst, "EXP_IEEE", | ||
734 | [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))] | ||
735 | >; | ||
736 | |||
737 | class FLT_TO_INT_Common <bits<11> inst> : R600_1OP < | ||
738 | inst, "FLT_TO_INT", | ||
739 | [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))] | ||
740 | >; | ||
741 | |||
742 | class INT_TO_FLT_Common <bits<11> inst> : R600_1OP < | ||
743 | inst, "INT_TO_FLT", | ||
744 | [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))] | ||
745 | >; | ||
746 | |||
747 | class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP < | ||
748 | inst, "FLT_TO_UINT", | ||
749 | [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))] | ||
750 | >; | ||
751 | |||
752 | class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP < | ||
753 | inst, "UINT_TO_FLT", | ||
754 | [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))] | ||
755 | >; | ||
756 | |||
757 | class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < | ||
758 | inst, "LOG_CLAMPED", | ||
759 | [] | ||
760 | >; | ||
761 | |||
762 | class LOG_IEEE_Common <bits<11> inst> : R600_1OP < | ||
763 | inst, "LOG_IEEE", | ||
764 | [(set R600_Reg32:$dst, (flog2 R600_Reg32:$src))] | ||
765 | >; | ||
766 | |||
767 | class LSHL_Common <bits<11> inst> : R600_2OP < | ||
768 | inst, "LSHL $dst, $src0, $src1", | ||
769 | [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))] | ||
770 | >; | ||
771 | |||
772 | class LSHR_Common <bits<11> inst> : R600_2OP < | ||
773 | inst, "LSHR $dst, $src0, $src1", | ||
774 | [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))] | ||
775 | >; | ||
776 | |||
777 | class ASHR_Common <bits<11> inst> : R600_2OP < | ||
778 | inst, "ASHR $dst, $src0, $src1", | ||
779 | [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))] | ||
780 | >; | ||
781 | |||
782 | class MULHI_INT_Common <bits<11> inst> : R600_2OP < | ||
783 | inst, "MULHI_INT $dst, $src0, $src1", | ||
784 | [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))] | ||
785 | >; | ||
786 | |||
787 | class MULHI_UINT_Common <bits<11> inst> : R600_2OP < | ||
788 | inst, "MULHI $dst, $src0, $src1", | ||
789 | [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))] | ||
790 | >; | ||
791 | |||
792 | class MULLO_INT_Common <bits<11> inst> : R600_2OP < | ||
793 | inst, "MULLO_INT $dst, $src0, $src1", | ||
794 | [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))] | ||
795 | >; | ||
796 | |||
797 | class MULLO_UINT_Common <bits<11> inst> : R600_2OP < | ||
798 | inst, "MULLO_UINT $dst, $src0, $src1", | ||
799 | [] | ||
800 | >; | ||
801 | |||
802 | class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < | ||
803 | inst, "RECIP_CLAMPED", | ||
804 | [] | ||
805 | >; | ||
806 | |||
807 | class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < | ||
808 | inst, "RECIP_IEEE", | ||
809 | [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))] | ||
810 | >; | ||
811 | |||
812 | class RECIP_UINT_Common <bits<11> inst> : R600_1OP < | ||
813 | inst, "RECIP_INT $dst, $src", | ||
814 | [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))] | ||
815 | >; | ||
816 | |||
817 | class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP < | ||
818 | inst, "RECIPSQRT_CLAMPED", | ||
819 | [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))] | ||
820 | >; | ||
821 | |||
822 | class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < | ||
823 | inst, "RECIPSQRT_IEEE", | ||
824 | [] | ||
825 | >; | ||
826 | |||
827 | class SIN_Common <bits<11> inst> : R600_1OP < | ||
828 | inst, "SIN", []>{ | ||
829 | let Trig = 1; | ||
830 | } | ||
831 | |||
832 | class COS_Common <bits<11> inst> : R600_1OP < | ||
833 | inst, "COS", []> { | ||
834 | let Trig = 1; | ||
835 | } | ||
836 | |||
837 | //===----------------------------------------------------------------------===// | ||
838 | // Helper patterns for complex intrinsics | ||
839 | //===----------------------------------------------------------------------===// | ||
840 | |||
841 | multiclass DIV_Common <InstR600 recip_ieee> { | ||
842 | def : Pat< | ||
843 | (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), | ||
844 | (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) | ||
845 | >; | ||
846 | |||
847 | def : Pat< | ||
848 | (fdiv R600_Reg32:$src0, R600_Reg32:$src1), | ||
849 | (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) | ||
850 | >; | ||
851 | } | ||
852 | |||
853 | class SSG_Common <InstR600 cndgt, InstR600 cndge> : Pat < | ||
854 | (int_AMDGPU_ssg R600_Reg32:$src), | ||
855 | (cndgt R600_Reg32:$src, (f32 ONE), (cndge R600_Reg32:$src, (f32 ZERO), (f32 NEG_ONE))) | ||
856 | >; | ||
857 | |||
858 | class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat < | ||
859 | (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), | ||
860 | (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) | ||
861 | >; | ||
862 | |||
863 | //===----------------------------------------------------------------------===// | ||
864 | // R600 / R700 Instructions | ||
865 | //===----------------------------------------------------------------------===// | ||
866 | |||
867 | let Predicates = [isR600] in { | ||
868 | |||
869 | def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; | ||
870 | def MULADD_r600 : MULADD_Common<0x10>; | ||
871 | def CNDE_r600 : CNDE_Common<0x18>; | ||
872 | def CNDGT_r600 : CNDGT_Common<0x19>; | ||
873 | def CNDGE_r600 : CNDGE_Common<0x1A>; | ||
874 | def DOT4_r600 : DOT4_Common<0x50>; | ||
875 | def : DOT4_Pat <DOT4_r600>; | ||
876 | defm CUBE_r600 : CUBE_Common<0x52>; | ||
877 | def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; | ||
878 | def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; | ||
879 | def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; | ||
880 | def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; | ||
881 | def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; | ||
882 | def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; | ||
883 | def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; | ||
884 | def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; | ||
885 | def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; | ||
886 | def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; | ||
887 | def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; | ||
888 | def SIN_r600 : SIN_Common<0x6E>; | ||
889 | def COS_r600 : COS_Common<0x6F>; | ||
890 | def ASHR_r600 : ASHR_Common<0x70>; | ||
891 | def LSHR_r600 : LSHR_Common<0x71>; | ||
892 | def LSHL_r600 : LSHL_Common<0x72>; | ||
893 | def MULLO_INT_r600 : MULLO_INT_Common<0x73>; | ||
894 | def MULHI_INT_r600 : MULHI_INT_Common<0x74>; | ||
895 | def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; | ||
896 | def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; | ||
897 | def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; | ||
898 | |||
899 | defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; | ||
900 | def POW_r600 : POW_Common<LOG_IEEE_r600, EXP_IEEE_r600, MUL, GPRF32>; | ||
901 | def SSG_r600 : SSG_Common<CNDGT_r600, CNDGE_r600>; | ||
902 | def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; | ||
903 | |||
904 | } | ||
905 | |||
906 | // Helper pattern for normalizing inputs to triginomic instructions for R700+ | ||
907 | // cards. | ||
908 | class COS_PAT <InstR600 trig> : Pat< | ||
909 | (fcos R600_Reg32:$src), | ||
910 | (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src)) | ||
911 | >; | ||
912 | |||
913 | class SIN_PAT <InstR600 trig> : Pat< | ||
914 | (fsin R600_Reg32:$src), | ||
915 | (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src)) | ||
916 | >; | ||
917 | |||
918 | //===----------------------------------------------------------------------===// | ||
919 | // R700 Only instructions | ||
920 | //===----------------------------------------------------------------------===// | ||
921 | |||
922 | let Predicates = [isR700] in { | ||
923 | def SIN_r700 : SIN_Common<0x6E>; | ||
924 | def COS_r700 : COS_Common<0x6F>; | ||
925 | |||
926 | // R700 normalizes inputs to SIN/COS the same as EG | ||
927 | def : SIN_PAT <SIN_r700>; | ||
928 | def : COS_PAT <COS_r700>; | ||
929 | } | ||
930 | |||
931 | //===----------------------------------------------------------------------===// | ||
932 | // Evergreen Only instructions | ||
933 | //===----------------------------------------------------------------------===// | ||
934 | |||
935 | let Predicates = [isEG] in { | ||
936 | |||
937 | def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; | ||
938 | |||
939 | def MULLO_INT_eg : MULLO_INT_Common<0x8F>; | ||
940 | def MULHI_INT_eg : MULHI_INT_Common<0x90>; | ||
941 | def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; | ||
942 | def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; | ||
943 | def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; | ||
944 | |||
945 | } // End Predicates = [isEG] | ||
946 | |||
947 | //===----------------------------------------------------------------------===// | ||
948 | // Evergreen / Cayman Instructions | ||
949 | //===----------------------------------------------------------------------===// | ||
950 | |||
951 | let Predicates = [isEGorCayman] in { | ||
952 | |||
953 | // BFE_UINT - bit_extract, an optimization for mask and shift | ||
954 | // Src0 = Input | ||
955 | // Src1 = Offset | ||
956 | // Src2 = Width | ||
957 | // | ||
958 | // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) | ||
959 | // | ||
960 | // Example Usage: | ||
961 | // (Offset, Width) | ||
962 | // | ||
963 | // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 | ||
964 | // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 | ||
965 | // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 | ||
966 | // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 | ||
967 | def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", | ||
968 | [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, | ||
969 | R600_Reg32:$src1, | ||
970 | R600_Reg32:$src2))], | ||
971 | VecALU | ||
972 | >; | ||
973 | |||
974 | def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", | ||
975 | [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, | ||
976 | R600_Reg32:$src2))], | ||
977 | VecALU | ||
978 | >; | ||
979 | |||
980 | def MULADD_eg : MULADD_Common<0x14>; | ||
981 | def ASHR_eg : ASHR_Common<0x15>; | ||
982 | def LSHR_eg : LSHR_Common<0x16>; | ||
983 | def LSHL_eg : LSHL_Common<0x17>; | ||
984 | def CNDE_eg : CNDE_Common<0x19>; | ||
985 | def CNDGT_eg : CNDGT_Common<0x1A>; | ||
986 | def CNDGE_eg : CNDGE_Common<0x1B>; | ||
987 | def MUL_LIT_eg : MUL_LIT_Common<0x1F>; | ||
988 | def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; | ||
989 | def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; | ||
990 | def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; | ||
991 | def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; | ||
992 | def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; | ||
993 | def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; | ||
994 | def SIN_eg : SIN_Common<0x8D>; | ||
995 | def COS_eg : COS_Common<0x8E>; | ||
996 | def DOT4_eg : DOT4_Common<0xBE>; | ||
997 | def : DOT4_Pat <DOT4_eg>; | ||
998 | defm CUBE_eg : CUBE_Common<0xC0>; | ||
999 | |||
1000 | defm DIV_eg : DIV_Common<RECIP_IEEE_eg>; | ||
1001 | def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>; | ||
1002 | def SSG_eg : SSG_Common<CNDGT_eg, CNDGE_eg>; | ||
1003 | def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; | ||
1004 | |||
1005 | def : SIN_PAT <SIN_eg>; | ||
1006 | def : COS_PAT <COS_eg>; | ||
1007 | |||
1008 | def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { | ||
1009 | let Pattern = []; | ||
1010 | } | ||
1011 | |||
1012 | def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; | ||
1013 | |||
1014 | def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { | ||
1015 | let Pattern = []; | ||
1016 | } | ||
1017 | |||
1018 | def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; | ||
1019 | |||
1020 | def : Pat<(fp_to_sint R600_Reg32:$src), | ||
1021 | (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>; | ||
1022 | |||
1023 | def : Pat<(fp_to_uint R600_Reg32:$src), | ||
1024 | (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>; | ||
1025 | |||
1026 | def : Pat<(fsqrt R600_Reg32:$src), | ||
1027 | (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>; | ||
1028 | |||
1029 | //===----------------------------------------------------------------------===// | ||
1030 | // Memory read/write instructions | ||
1031 | //===----------------------------------------------------------------------===// | ||
1032 | |||
1033 | let usesCustomInserter = 1 in { | ||
1034 | |||
1035 | class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name> : EG_CF_RAT < | ||
1036 | 0x57, 0x2, 0, (outs), ins, !strconcat(name, " $rw_gpr, $index_gpr, $eop"), []> | ||
1037 | { | ||
1038 | let RIM = 0; | ||
1039 | // XXX: Have a separate instruction for non-indexed writes. | ||
1040 | let TYPE = 1; | ||
1041 | let RW_REL = 0; | ||
1042 | let ELEM_SIZE = 0; | ||
1043 | |||
1044 | let ARRAY_SIZE = 0; | ||
1045 | let COMP_MASK = comp_mask; | ||
1046 | let BURST_COUNT = 0; | ||
1047 | let VPM = 0; | ||
1048 | let MARK = 0; | ||
1049 | let BARRIER = 1; | ||
1050 | } | ||
1051 | |||
1052 | } // End usesCustomInserter = 1 | ||
1053 | |||
1054 | // 32-bit store | ||
1055 | def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < | ||
1056 | (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop), | ||
1057 | 0x1, "RAT_WRITE_CACHELESS_32_eg" | ||
1058 | >; | ||
1059 | |||
1060 | // i32 global_store | ||
1061 | def : Pat < | ||
1062 | (global_store (i32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), | ||
1063 | (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) | ||
1064 | >; | ||
1065 | |||
1066 | // Floating point global_store | ||
1067 | def : Pat < | ||
1068 | (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), | ||
1069 | (RAT_WRITE_CACHELESS_32_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) | ||
1070 | >; | ||
1071 | |||
1072 | //128-bit store | ||
1073 | def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < | ||
1074 | (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop), | ||
1075 | 0xf, "RAT_WRITE_CACHELESS_128" | ||
1076 | >; | ||
1077 | |||
1078 | // v4f32 global store | ||
1079 | def : Pat < | ||
1080 | (global_store (v4f32 R600_Reg128:$val), R600_TReg32_X:$ptr), | ||
1081 | (RAT_WRITE_CACHELESS_128_eg R600_Reg128:$val, R600_TReg32_X:$ptr, 0) | ||
1082 | >; | ||
1083 | |||
1084 | class VTX_READ_eg <bits<8> buffer_id, dag outs, list<dag> pattern> | ||
1085 | : InstR600ISA <outs, (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", pattern> { | ||
1086 | |||
1087 | // Operands | ||
1088 | bits<7> DST_GPR; | ||
1089 | bits<7> SRC_GPR; | ||
1090 | |||
1091 | // Static fields | ||
1092 | bits<5> VC_INST = 0; | ||
1093 | bits<2> FETCH_TYPE = 2; | ||
1094 | bits<1> FETCH_WHOLE_QUAD = 0; | ||
1095 | bits<8> BUFFER_ID = buffer_id; | ||
1096 | bits<1> SRC_REL = 0; | ||
1097 | // XXX: We can infer this field based on the SRC_GPR. This would allow us | ||
1098 | // to store vertex addresses in any channel, not just X. | ||
1099 | bits<2> SRC_SEL_X = 0; | ||
1100 | bits<6> MEGA_FETCH_COUNT; | ||
1101 | bits<1> DST_REL = 0; | ||
1102 | bits<3> DST_SEL_X; | ||
1103 | bits<3> DST_SEL_Y; | ||
1104 | bits<3> DST_SEL_Z; | ||
1105 | bits<3> DST_SEL_W; | ||
1106 | // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, | ||
1107 | // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, | ||
1108 | // however, based on my testing if USE_CONST_FIELDS is set, then all | ||
1109 | // these fields need to be set to 0. | ||
1110 | bits<1> USE_CONST_FIELDS = 0; | ||
1111 | bits<6> DATA_FORMAT; | ||
1112 | bits<2> NUM_FORMAT_ALL = 1; | ||
1113 | bits<1> FORMAT_COMP_ALL = 0; | ||
1114 | bits<1> SRF_MODE_ALL = 0; | ||
1115 | |||
1116 | // LLVM can only encode 64-bit instructions, so these fields are manually | ||
1117 | // encoded in R600CodeEmitter | ||
1118 | // | ||
1119 | // bits<16> OFFSET; | ||
1120 | // bits<2> ENDIAN_SWAP = 0; | ||
1121 | // bits<1> CONST_BUF_NO_STRIDE = 0; | ||
1122 | // bits<1> MEGA_FETCH = 0; | ||
1123 | // bits<1> ALT_CONST = 0; | ||
1124 | // bits<2> BUFFER_INDEX_MODE = 0; | ||
1125 | |||
1126 | // VTX_WORD0 | ||
1127 | let Inst{4-0} = VC_INST; | ||
1128 | let Inst{6-5} = FETCH_TYPE; | ||
1129 | let Inst{7} = FETCH_WHOLE_QUAD; | ||
1130 | let Inst{15-8} = BUFFER_ID; | ||
1131 | let Inst{22-16} = SRC_GPR; | ||
1132 | let Inst{23} = SRC_REL; | ||
1133 | let Inst{25-24} = SRC_SEL_X; | ||
1134 | let Inst{31-26} = MEGA_FETCH_COUNT; | ||
1135 | |||
1136 | // VTX_WORD1_GPR | ||
1137 | let Inst{38-32} = DST_GPR; | ||
1138 | let Inst{39} = DST_REL; | ||
1139 | let Inst{40} = 0; // Reserved | ||
1140 | let Inst{43-41} = DST_SEL_X; | ||
1141 | let Inst{46-44} = DST_SEL_Y; | ||
1142 | let Inst{49-47} = DST_SEL_Z; | ||
1143 | let Inst{52-50} = DST_SEL_W; | ||
1144 | let Inst{53} = USE_CONST_FIELDS; | ||
1145 | let Inst{59-54} = DATA_FORMAT; | ||
1146 | let Inst{61-60} = NUM_FORMAT_ALL; | ||
1147 | let Inst{62} = FORMAT_COMP_ALL; | ||
1148 | let Inst{63} = SRF_MODE_ALL; | ||
1149 | |||
1150 | // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding | ||
1151 | // is done in R600CodeEmitter | ||
1152 | // | ||
1153 | // Inst{79-64} = OFFSET; | ||
1154 | // Inst{81-80} = ENDIAN_SWAP; | ||
1155 | // Inst{82} = CONST_BUF_NO_STRIDE; | ||
1156 | // Inst{83} = MEGA_FETCH; | ||
1157 | // Inst{84} = ALT_CONST; | ||
1158 | // Inst{86-85} = BUFFER_INDEX_MODE; | ||
1159 | // Inst{95-86} = 0; Reserved | ||
1160 | |||
1161 | // VTX_WORD3 (Padding) | ||
1162 | // | ||
1163 | // Inst{127-96} = 0; | ||
1164 | } | ||
1165 | |||
1166 | class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> | ||
1167 | : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> { | ||
1168 | |||
1169 | let MEGA_FETCH_COUNT = 1; | ||
1170 | let DST_SEL_X = 0; | ||
1171 | let DST_SEL_Y = 7; // Masked | ||
1172 | let DST_SEL_Z = 7; // Masked | ||
1173 | let DST_SEL_W = 7; // Masked | ||
1174 | let DATA_FORMAT = 1; // FMT_8 | ||
1175 | } | ||
1176 | |||
1177 | class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> | ||
1178 | : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> { | ||
1179 | |||
1180 | let MEGA_FETCH_COUNT = 4; | ||
1181 | let DST_SEL_X = 0; | ||
1182 | let DST_SEL_Y = 7; // Masked | ||
1183 | let DST_SEL_Z = 7; // Masked | ||
1184 | let DST_SEL_W = 7; // Masked | ||
1185 | let DATA_FORMAT = 0xD; // COLOR_32 | ||
1186 | |||
1187 | // This is not really necessary, but there were some GPU hangs that appeared | ||
1188 | // to be caused by ALU instructions in the next instruction group that wrote | ||
1189 | // to the $ptr registers of the VTX_READ. | ||
1190 | // e.g. | ||
1191 | // %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24 | ||
1192 | // %T2_X<def> = MOV %ZERO | ||
1193 | //Adding this constraint prevents this from happening. | ||
1194 | let Constraints = "$ptr.ptr = $dst"; | ||
1195 | } | ||
1196 | |||
1197 | class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> | ||
1198 | : VTX_READ_eg <buffer_id, (outs R600_Reg128:$dst), pattern> { | ||
1199 | |||
1200 | let MEGA_FETCH_COUNT = 16; | ||
1201 | let DST_SEL_X = 0; | ||
1202 | let DST_SEL_Y = 1; | ||
1203 | let DST_SEL_Z = 2; | ||
1204 | let DST_SEL_W = 3; | ||
1205 | let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 | ||
1206 | |||
1207 | // XXX: Need to force VTX_READ_128 instructions to write to the same register | ||
1208 | // that holds its buffer address to avoid potential hangs. We can't use | ||
1209 | // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst | ||
1210 | // registers are different sizes. | ||
1211 | } | ||
1212 | |||
1213 | //===----------------------------------------------------------------------===// | ||
1214 | // VTX Read from parameter memory space | ||
1215 | //===----------------------------------------------------------------------===// | ||
1216 | |||
1217 | class VTX_READ_PARAM_32_eg <ValueType vt> : VTX_READ_32_eg <0, | ||
1218 | [(set (vt R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] | ||
1219 | >; | ||
1220 | |||
1221 | def VTX_READ_PARAM_i32_eg : VTX_READ_PARAM_32_eg<i32>; | ||
1222 | def VTX_READ_PARAM_f32_eg : VTX_READ_PARAM_32_eg<f32>; | ||
1223 | |||
1224 | |||
1225 | //===----------------------------------------------------------------------===// | ||
1226 | // VTX Read from global memory space | ||
1227 | //===----------------------------------------------------------------------===// | ||
1228 | |||
1229 | // 8-bit reads | ||
1230 | def VTX_READ_GLOBAL_i8_eg : VTX_READ_8_eg <1, | ||
1231 | [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))] | ||
1232 | >; | ||
1233 | |||
1234 | // 32-bit reads | ||
1235 | |||
1236 | class VTX_READ_GLOBAL_eg <ValueType vt> : VTX_READ_32_eg <1, | ||
1237 | [(set (vt R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] | ||
1238 | >; | ||
1239 | |||
1240 | def VTX_READ_GLOBAL_i32_eg : VTX_READ_GLOBAL_eg<i32>; | ||
1241 | def VTX_READ_GLOBAL_f32_eg : VTX_READ_GLOBAL_eg<f32>; | ||
1242 | |||
1243 | // 128-bit reads | ||
1244 | |||
1245 | class VTX_READ_GLOBAL_128_eg <ValueType vt> : VTX_READ_128_eg <1, | ||
1246 | [(set (vt R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] | ||
1247 | >; | ||
1248 | |||
1249 | def VTX_READ_GLOBAL_v4i32_eg : VTX_READ_GLOBAL_128_eg<v4i32>; | ||
1250 | def VTX_READ_GLOBAL_v4f32_eg : VTX_READ_GLOBAL_128_eg<v4f32>; | ||
1251 | |||
1252 | //===----------------------------------------------------------------------===// | ||
1253 | // Constant Loads | ||
1254 | // XXX: We are currently storing all constants in the global address space. | ||
1255 | //===----------------------------------------------------------------------===// | ||
1256 | |||
1257 | def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, | ||
1258 | [(set (f32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))] | ||
1259 | >; | ||
1260 | |||
1261 | } | ||
1262 | |||
1263 | let Predicates = [isCayman] in { | ||
1264 | |||
1265 | let isVector = 1 in { | ||
1266 | |||
1267 | def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; | ||
1268 | |||
1269 | def MULLO_INT_cm : MULLO_INT_Common<0x8F>; | ||
1270 | def MULHI_INT_cm : MULHI_INT_Common<0x90>; | ||
1271 | def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; | ||
1272 | def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; | ||
1273 | |||
1274 | } // End isVector = 1 | ||
1275 | |||
1276 | // RECIP_UINT emulation for Cayman | ||
1277 | def : Pat < | ||
1278 | (AMDGPUurecip R600_Reg32:$src0), | ||
1279 | (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), | ||
1280 | (MOV_IMM_I32 (i32 ALU_LITERAL_X), 0x4f800000))) | ||
1281 | >; | ||
1282 | |||
1283 | } // End isCayman | ||
1284 | |||
1285 | let isCodeGenOnly = 1 in { | ||
1286 | |||
1287 | def MULLIT : AMDGPUShaderInst < | ||
1288 | (outs R600_Reg128:$dst), | ||
1289 | (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2), | ||
1290 | "MULLIT $dst, $src0, $src1", | ||
1291 | [(set R600_Reg128:$dst, (int_AMDGPU_mullit R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] | ||
1292 | >; | ||
1293 | |||
1294 | let usesCustomInserter = 1, isPseudo = 1 in { | ||
1295 | |||
1296 | class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst < | ||
1297 | (outs R600_TReg32:$dst), | ||
1298 | (ins), | ||
1299 | asm, | ||
1300 | [(set R600_TReg32:$dst, (intr))] | ||
1301 | >; | ||
1302 | |||
1303 | def R600_LOAD_CONST : AMDGPUShaderInst < | ||
1304 | (outs R600_Reg32:$dst), | ||
1305 | (ins i32imm:$src0), | ||
1306 | "R600_LOAD_CONST $dst, $src0", | ||
1307 | [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] | ||
1308 | >; | ||
1309 | |||
1310 | def RESERVE_REG : AMDGPUShaderInst < | ||
1311 | (outs), | ||
1312 | (ins i32imm:$src), | ||
1313 | "RESERVE_REG $src", | ||
1314 | [(int_AMDGPU_reserve_reg imm:$src)] | ||
1315 | >; | ||
1316 | |||
1317 | def TXD: AMDGPUShaderInst < | ||
1318 | (outs R600_Reg128:$dst), | ||
1319 | (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), | ||
1320 | "TXD $dst, $src0, $src1, $src2, $src3, $src4", | ||
1321 | [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, imm:$src4))] | ||
1322 | >; | ||
1323 | |||
1324 | def TXD_SHADOW: AMDGPUShaderInst < | ||
1325 | (outs R600_Reg128:$dst), | ||
1326 | (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), | ||
1327 | "TXD_SHADOW $dst, $src0, $src1, $src2, $src3, $src4", | ||
1328 | [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, TEX_SHADOW:$src4))] | ||
1329 | >; | ||
1330 | |||
1331 | } // End usesCustomInserter = 1, isPseudo = 1 | ||
1332 | |||
1333 | } // End isCodeGenOnly = 1 | ||
1334 | |||
1335 | def CLAMP_R600 : CLAMP <R600_Reg32>; | ||
1336 | def FABS_R600 : FABS<R600_Reg32>; | ||
1337 | def FNEG_R600 : FNEG<R600_Reg32>; | ||
1338 | |||
1339 | let usesCustomInserter = 1 in { | ||
1340 | |||
1341 | def MASK_WRITE : AMDGPUShaderInst < | ||
1342 | (outs), | ||
1343 | (ins R600_Reg32:$src), | ||
1344 | "MASK_WRITE $src", | ||
1345 | [] | ||
1346 | >; | ||
1347 | |||
1348 | } // End usesCustomInserter = 1 | ||
1349 | |||
1350 | //===---------------------------------------------------------------------===// | ||
1351 | // Return instruction | ||
1352 | //===---------------------------------------------------------------------===// | ||
1353 | let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { | ||
1354 | def RETURN : ILFormat<(outs), (ins variable_ops), | ||
1355 | "RETURN", [(IL_retflag)]>; | ||
1356 | } | ||
1357 | |||
1358 | //===----------------------------------------------------------------------===// | ||
1359 | // ISel Patterns | ||
1360 | //===----------------------------------------------------------------------===// | ||
1361 | |||
1362 | // KIL Patterns | ||
1363 | def KILP : Pat < | ||
1364 | (int_AMDGPU_kilp), | ||
1365 | (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO), 0)) | ||
1366 | >; | ||
1367 | |||
1368 | def KIL : Pat < | ||
1369 | (int_AMDGPU_kill R600_Reg32:$src0), | ||
1370 | (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0), 0)) | ||
1371 | >; | ||
1372 | |||
1373 | // SGT Reverse args | ||
1374 | def : Pat < | ||
1375 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), | ||
1376 | (SGT R600_Reg32:$src1, R600_Reg32:$src0) | ||
1377 | >; | ||
1378 | |||
1379 | // SGE Reverse args | ||
1380 | def : Pat < | ||
1381 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), | ||
1382 | (SGE R600_Reg32:$src1, R600_Reg32:$src0) | ||
1383 | >; | ||
1384 | |||
1385 | // SETGT_INT reverse args | ||
1386 | def : Pat < | ||
1387 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), | ||
1388 | (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) | ||
1389 | >; | ||
1390 | |||
1391 | // SETGE_INT reverse args | ||
1392 | def : Pat < | ||
1393 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), | ||
1394 | (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) | ||
1395 | >; | ||
1396 | |||
1397 | // SETGT_UINT reverse args | ||
1398 | def : Pat < | ||
1399 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), | ||
1400 | (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) | ||
1401 | >; | ||
1402 | |||
1403 | // SETGE_UINT reverse args | ||
1404 | def : Pat < | ||
1405 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), | ||
1406 | (SETGE_UINT R600_Reg32:$src0, R600_Reg32:$src1) | ||
1407 | >; | ||
1408 | |||
1409 | // The next two patterns are special cases for handling 'true if ordered' and | ||
1410 | // 'true if unordered' conditionals. The assumption here is that the behavior of | ||
1411 | // SETE and SNE conforms to the Direct3D 10 rules for floating point values | ||
1412 | // described here: | ||
1413 | // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit | ||
1414 | // We assume that SETE returns false when one of the operands is NAN and | ||
1415 | // SNE returns true when on of the operands is NAN | ||
1416 | |||
1417 | //SETE - 'true if ordered' | ||
1418 | def : Pat < | ||
1419 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), | ||
1420 | (SETE R600_Reg32:$src0, R600_Reg32:$src1) | ||
1421 | >; | ||
1422 | |||
1423 | //SNE - 'true if unordered' | ||
1424 | def : Pat < | ||
1425 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), | ||
1426 | (SNE R600_Reg32:$src0, R600_Reg32:$src1) | ||
1427 | >; | ||
1428 | |||
1429 | def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>; | ||
1430 | def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>; | ||
1431 | def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>; | ||
1432 | def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>; | ||
1433 | |||
1434 | def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>; | ||
1435 | def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>; | ||
1436 | def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>; | ||
1437 | def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>; | ||
1438 | |||
1439 | def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; | ||
1440 | def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; | ||
1441 | def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; | ||
1442 | def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; | ||
1443 | |||
1444 | def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>; | ||
1445 | def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>; | ||
1446 | def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>; | ||
1447 | def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>; | ||
1448 | |||
1449 | def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; | ||
1450 | def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; | ||
1451 | |||
1452 | // bitconvert patterns | ||
1453 | |||
1454 | def : BitConvert <i32, f32, R600_Reg32>; | ||
1455 | def : BitConvert <f32, i32, R600_Reg32>; | ||
1456 | def : BitConvert <v4f32, v4i32, R600_Reg128>; | ||
1457 | |||
1458 | } // End isR600toCayman Predicate | ||
diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td deleted file mode 100644 index 3b62f0a7303..00000000000 --- a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td +++ /dev/null | |||
@@ -1,50 +0,0 @@ | |||
1 | //===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // R600 Intrinsic Definitions | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | let TargetPrefix = "R600", isTarget = 1 in { | ||
15 | def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; | ||
16 | def int_R600_load_input_perspective : | ||
17 | Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; | ||
18 | def int_R600_load_input_constant : | ||
19 | Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; | ||
20 | def int_R600_load_input_linear : | ||
21 | Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; | ||
22 | def int_R600_load_input_position : | ||
23 | Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; | ||
24 | def int_R600_load_input_face : | ||
25 | Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; | ||
26 | } | ||
27 | |||
28 | let TargetPrefix = "r600", isTarget = 1 in { | ||
29 | |||
30 | class R600ReadPreloadRegisterIntrinsic<string name> | ||
31 | : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, | ||
32 | GCCBuiltin<name>; | ||
33 | |||
34 | multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> { | ||
35 | def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>; | ||
36 | def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>; | ||
37 | def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>; | ||
38 | } | ||
39 | |||
40 | defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz < | ||
41 | "__builtin_r600_read_global_size">; | ||
42 | defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz < | ||
43 | "__builtin_r600_read_local_size">; | ||
44 | defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz < | ||
45 | "__builtin_r600_read_ngroups">; | ||
46 | defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < | ||
47 | "__builtin_r600_read_tgid">; | ||
48 | defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < | ||
49 | "__builtin_r600_read_tidig">; | ||
50 | } // End TargetPrefix = "r600" | ||
diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td deleted file mode 100644 index 00877caf2b9..00000000000 --- a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | //===-- R600Intrinsics.td - TODO: Add brief description -------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // TODO: Add full description | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | let TargetPrefix = "R600", isTarget = 1 in { | ||
15 | def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; | ||
16 | def int_R600_load_input_perspective : | ||
17 | Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; | ||
18 | def int_R600_load_input_constant : | ||
19 | Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; | ||
20 | def int_R600_load_input_linear : | ||
21 | Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; | ||
22 | def int_R600_load_input_position : | ||
23 | Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; | ||
24 | def int_R600_load_input_face : | ||
25 | Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; | ||
26 | } | ||
diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp deleted file mode 100644 index a31848efc99..00000000000 --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp +++ /dev/null | |||
@@ -1,33 +0,0 @@ | |||
1 | //===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | |||
10 | #include "R600MachineFunctionInfo.h" | ||
11 | |||
12 | using namespace llvm; | ||
13 | |||
14 | R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) | ||
15 | : MachineFunctionInfo(), | ||
16 | HasLinearInterpolation(false), | ||
17 | HasPerspectiveInterpolation(false) | ||
18 | { } | ||
19 | |||
20 | unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const | ||
21 | { | ||
22 | assert(HasPerspectiveInterpolation); | ||
23 | return 0; | ||
24 | } | ||
25 | |||
26 | unsigned R600MachineFunctionInfo::GetIJLinearIndex() const | ||
27 | { | ||
28 | assert(HasLinearInterpolation); | ||
29 | if (HasPerspectiveInterpolation) | ||
30 | return 1; | ||
31 | else | ||
32 | return 0; | ||
33 | } | ||
diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h deleted file mode 100644 index 68211b25813..00000000000 --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | //===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // R600MachineFunctionInfo is used for keeping track of which registers have | ||
11 | // been reserved by the llvm.AMDGPU.reserve.reg intrinsic. | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #ifndef R600MACHINEFUNCTIONINFO_H | ||
16 | #define R600MACHINEFUNCTIONINFO_H | ||
17 | |||
18 | #include "llvm/CodeGen/MachineFunction.h" | ||
19 | #include <vector> | ||
20 | |||
21 | namespace llvm { | ||
22 | |||
23 | class R600MachineFunctionInfo : public MachineFunctionInfo { | ||
24 | |||
25 | public: | ||
26 | R600MachineFunctionInfo(const MachineFunction &MF); | ||
27 | std::vector<unsigned> ReservedRegs; | ||
28 | bool HasLinearInterpolation; | ||
29 | bool HasPerspectiveInterpolation; | ||
30 | |||
31 | unsigned GetIJLinearIndex() const; | ||
32 | unsigned GetIJPerspectiveIndex() const; | ||
33 | |||
34 | }; | ||
35 | |||
36 | } // End llvm namespace | ||
37 | |||
38 | #endif //R600MACHINEFUNCTIONINFO_H | ||
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp deleted file mode 100644 index 4096cb050bf..00000000000 --- a/src/gallium/drivers/radeon/R600RegisterInfo.cpp +++ /dev/null | |||
@@ -1,128 +0,0 @@ | |||
1 | //===-- R600RegisterInfo.cpp - R600 Register Information ------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // The file contains the R600 implementation of the TargetRegisterInfo class. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #include "R600RegisterInfo.h" | ||
15 | #include "AMDGPUTargetMachine.h" | ||
16 | #include "R600MachineFunctionInfo.h" | ||
17 | |||
18 | using namespace llvm; | ||
19 | |||
20 | R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm, | ||
21 | const TargetInstrInfo &tii) | ||
22 | : AMDGPURegisterInfo(tm, tii), | ||
23 | TM(tm), | ||
24 | TII(tii) | ||
25 | { } | ||
26 | |||
27 | BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const | ||
28 | { | ||
29 | BitVector Reserved(getNumRegs()); | ||
30 | const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>(); | ||
31 | |||
32 | Reserved.set(AMDGPU::ZERO); | ||
33 | Reserved.set(AMDGPU::HALF); | ||
34 | Reserved.set(AMDGPU::ONE); | ||
35 | Reserved.set(AMDGPU::ONE_INT); | ||
36 | Reserved.set(AMDGPU::NEG_HALF); | ||
37 | Reserved.set(AMDGPU::NEG_ONE); | ||
38 | Reserved.set(AMDGPU::PV_X); | ||
39 | Reserved.set(AMDGPU::ALU_LITERAL_X); | ||
40 | Reserved.set(AMDGPU::PREDICATE_BIT); | ||
41 | Reserved.set(AMDGPU::PRED_SEL_OFF); | ||
42 | Reserved.set(AMDGPU::PRED_SEL_ZERO); | ||
43 | Reserved.set(AMDGPU::PRED_SEL_ONE); | ||
44 | |||
45 | for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(), | ||
46 | E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) { | ||
47 | Reserved.set(*I); | ||
48 | } | ||
49 | |||
50 | for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(), | ||
51 | E = MFI->ReservedRegs.end(); I != E; ++I) { | ||
52 | Reserved.set(*I); | ||
53 | Reserved.set(*(getSuperRegisters(*I))); | ||
54 | } | ||
55 | |||
56 | return Reserved; | ||
57 | } | ||
58 | |||
59 | const TargetRegisterClass * | ||
60 | R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const | ||
61 | { | ||
62 | switch (rc->getID()) { | ||
63 | case AMDGPU::GPRF32RegClassID: | ||
64 | case AMDGPU::GPRI32RegClassID: | ||
65 | return &AMDGPU::R600_Reg32RegClass; | ||
66 | default: return rc; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const | ||
71 | { | ||
72 | switch(reg) { | ||
73 | case AMDGPU::ZERO: return 248; | ||
74 | case AMDGPU::ONE: | ||
75 | case AMDGPU::NEG_ONE: return 249; | ||
76 | case AMDGPU::ONE_INT: return 250; | ||
77 | case AMDGPU::HALF: | ||
78 | case AMDGPU::NEG_HALF: return 252; | ||
79 | case AMDGPU::ALU_LITERAL_X: return 253; | ||
80 | case AMDGPU::PREDICATE_BIT: | ||
81 | case AMDGPU::PRED_SEL_OFF: | ||
82 | case AMDGPU::PRED_SEL_ZERO: | ||
83 | case AMDGPU::PRED_SEL_ONE: | ||
84 | return 0; | ||
85 | default: return getHWRegIndexGen(reg); | ||
86 | } | ||
87 | } | ||
88 | |||
89 | unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const | ||
90 | { | ||
91 | switch(reg) { | ||
92 | case AMDGPU::ZERO: | ||
93 | case AMDGPU::ONE: | ||
94 | case AMDGPU::ONE_INT: | ||
95 | case AMDGPU::NEG_ONE: | ||
96 | case AMDGPU::HALF: | ||
97 | case AMDGPU::NEG_HALF: | ||
98 | case AMDGPU::ALU_LITERAL_X: | ||
99 | case AMDGPU::PREDICATE_BIT: | ||
100 | case AMDGPU::PRED_SEL_OFF: | ||
101 | case AMDGPU::PRED_SEL_ZERO: | ||
102 | case AMDGPU::PRED_SEL_ONE: | ||
103 | return 0; | ||
104 | default: return getHWRegChanGen(reg); | ||
105 | } | ||
106 | } | ||
107 | |||
108 | const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass( | ||
109 | MVT VT) const | ||
110 | { | ||
111 | switch(VT.SimpleTy) { | ||
112 | default: | ||
113 | case MVT::i32: return &AMDGPU::R600_TReg32RegClass; | ||
114 | } | ||
115 | } | ||
116 | |||
117 | unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const | ||
118 | { | ||
119 | switch (Channel) { | ||
120 | default: assert(!"Invalid channel index"); return 0; | ||
121 | case 0: return AMDGPU::sel_x; | ||
122 | case 1: return AMDGPU::sel_y; | ||
123 | case 2: return AMDGPU::sel_z; | ||
124 | case 3: return AMDGPU::sel_w; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | #include "R600HwRegInfo.include" | ||
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.h b/src/gallium/drivers/radeon/R600RegisterInfo.h deleted file mode 100644 index 60f6d53b2d8..00000000000 --- a/src/gallium/drivers/radeon/R600RegisterInfo.h +++ /dev/null | |||
@@ -1,63 +0,0 @@ | |||
1 | //===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface definition for R600RegisterInfo | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #ifndef R600REGISTERINFO_H_ | ||
15 | #define R600REGISTERINFO_H_ | ||
16 | |||
17 | #include "AMDGPUTargetMachine.h" | ||
18 | #include "AMDGPURegisterInfo.h" | ||
19 | |||
20 | namespace llvm { | ||
21 | |||
22 | class R600TargetMachine; | ||
23 | class TargetInstrInfo; | ||
24 | |||
25 | struct R600RegisterInfo : public AMDGPURegisterInfo | ||
26 | { | ||
27 | AMDGPUTargetMachine &TM; | ||
28 | const TargetInstrInfo &TII; | ||
29 | |||
30 | R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); | ||
31 | |||
32 | virtual BitVector getReservedRegs(const MachineFunction &MF) const; | ||
33 | |||
34 | /// getISARegClass - rc is an AMDIL reg class. This function returns the | ||
35 | /// R600 reg class that is equivalent to the given AMDIL reg class. | ||
36 | virtual const TargetRegisterClass * getISARegClass( | ||
37 | const TargetRegisterClass * rc) const; | ||
38 | |||
39 | /// getHWRegIndex - get the HW encoding for a register. | ||
40 | unsigned getHWRegIndex(unsigned reg) const; | ||
41 | |||
42 | /// getHWRegChan - get the HW encoding for a register's channel. | ||
43 | unsigned getHWRegChan(unsigned reg) const; | ||
44 | |||
45 | /// getCFGStructurizerRegClass - get the register class of the specified | ||
46 | /// type to use in the CFGStructurizer | ||
47 | virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; | ||
48 | |||
49 | /// getSubRegFromChannel - Return the sub reg enum value for the given | ||
50 | /// Channel (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x) | ||
51 | unsigned getSubRegFromChannel(unsigned Channel) const; | ||
52 | |||
53 | private: | ||
54 | /// getHWRegIndexGen - Generated function returns a register's encoding | ||
55 | unsigned getHWRegIndexGen(unsigned reg) const; | ||
56 | /// getHWRegChanGen - Generated function returns a register's channel | ||
57 | /// encoding. | ||
58 | unsigned getHWRegChanGen(unsigned reg) const; | ||
59 | }; | ||
60 | |||
61 | } // End namespace llvm | ||
62 | |||
63 | #endif // AMDIDSAREGISTERINFO_H_ | ||
diff --git a/src/gallium/drivers/radeon/R600Schedule.td b/src/gallium/drivers/radeon/R600Schedule.td deleted file mode 100644 index 7ede181c51d..00000000000 --- a/src/gallium/drivers/radeon/R600Schedule.td +++ /dev/null | |||
@@ -1,36 +0,0 @@ | |||
1 | //===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction | ||
11 | // slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS | ||
12 | // slot has been removed. | ||
13 | // | ||
14 | //===----------------------------------------------------------------------===// | ||
15 | |||
16 | |||
17 | def ALU_X : FuncUnit; | ||
18 | def ALU_Y : FuncUnit; | ||
19 | def ALU_Z : FuncUnit; | ||
20 | def ALU_W : FuncUnit; | ||
21 | def TRANS : FuncUnit; | ||
22 | |||
23 | def AnyALU : InstrItinClass; | ||
24 | def VecALU : InstrItinClass; | ||
25 | def TransALU : InstrItinClass; | ||
26 | |||
27 | def R600_EG_Itin : ProcessorItineraries < | ||
28 | [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL], | ||
29 | [], | ||
30 | [ | ||
31 | InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>, | ||
32 | InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>, | ||
33 | InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>, | ||
34 | InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]> | ||
35 | ] | ||
36 | >; | ||
diff --git a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp deleted file mode 100644 index 1fc0a873eb6..00000000000 --- a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp +++ /dev/null | |||
@@ -1,151 +0,0 @@ | |||
1 | //===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This pass maps the pseudo interpolation registers to the correct physical | ||
11 | // registers. Prior to executing a fragment shader, the GPU loads interpolation | ||
12 | // parameters into physical registers. The specific physical register that each | ||
13 | // interpolation parameter ends up in depends on the type of the interpolation | ||
14 | // parameter as well as how many interpolation parameters are used by the | ||
15 | // shader. | ||
16 | // | ||
17 | //===----------------------------------------------------------------------===// | ||
18 | |||
19 | |||
20 | |||
21 | #include "AMDGPU.h" | ||
22 | #include "AMDIL.h" | ||
23 | #include "SIMachineFunctionInfo.h" | ||
24 | #include "llvm/CodeGen/MachineFunctionPass.h" | ||
25 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
26 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
27 | |||
28 | using namespace llvm; | ||
29 | |||
30 | namespace { | ||
31 | |||
32 | class SIAssignInterpRegsPass : public MachineFunctionPass { | ||
33 | |||
34 | private: | ||
35 | static char ID; | ||
36 | TargetMachine &TM; | ||
37 | |||
38 | void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, | ||
39 | unsigned physReg, unsigned virtReg); | ||
40 | |||
41 | public: | ||
42 | SIAssignInterpRegsPass(TargetMachine &tm) : | ||
43 | MachineFunctionPass(ID), TM(tm) { } | ||
44 | |||
45 | virtual bool runOnMachineFunction(MachineFunction &MF); | ||
46 | |||
47 | const char *getPassName() const { return "SI Assign intrpolation registers"; } | ||
48 | }; | ||
49 | |||
50 | } // End anonymous namespace | ||
51 | |||
52 | char SIAssignInterpRegsPass::ID = 0; | ||
53 | |||
54 | #define INTERP_VALUES 16 | ||
55 | #define REQUIRED_VALUE_MAX_INDEX 7 | ||
56 | |||
57 | struct InterpInfo { | ||
58 | bool Enabled; | ||
59 | unsigned Regs[3]; | ||
60 | unsigned RegCount; | ||
61 | }; | ||
62 | |||
63 | |||
64 | FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) { | ||
65 | return new SIAssignInterpRegsPass(tm); | ||
66 | } | ||
67 | |||
68 | bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) | ||
69 | { | ||
70 | |||
71 | struct InterpInfo InterpUse[INTERP_VALUES] = { | ||
72 | {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2}, | ||
73 | {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2}, | ||
74 | {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2}, | ||
75 | {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3}, | ||
76 | {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2}, | ||
77 | {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2}, | ||
78 | {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2}, | ||
79 | {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1}, | ||
80 | {false, {AMDGPU::POS_X_FLOAT}, 1}, | ||
81 | {false, {AMDGPU::POS_Y_FLOAT}, 1}, | ||
82 | {false, {AMDGPU::POS_Z_FLOAT}, 1}, | ||
83 | {false, {AMDGPU::POS_W_FLOAT}, 1}, | ||
84 | {false, {AMDGPU::FRONT_FACE}, 1}, | ||
85 | {false, {AMDGPU::ANCILLARY}, 1}, | ||
86 | {false, {AMDGPU::SAMPLE_COVERAGE}, 1}, | ||
87 | {false, {AMDGPU::POS_FIXED_PT}, 1} | ||
88 | }; | ||
89 | |||
90 | SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); | ||
91 | // This pass is only needed for pixel shaders. | ||
92 | if (MFI->ShaderType != ShaderType::PIXEL) { | ||
93 | return false; | ||
94 | } | ||
95 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||
96 | bool ForceEnable = true; | ||
97 | |||
98 | // First pass, mark the interpolation values that are used. | ||
99 | for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { | ||
100 | for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; | ||
101 | RegIdx++) { | ||
102 | InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled || | ||
103 | !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]); | ||
104 | if (InterpUse[InterpIdx].Enabled && | ||
105 | InterpIdx <= REQUIRED_VALUE_MAX_INDEX) { | ||
106 | ForceEnable = false; | ||
107 | } | ||
108 | } | ||
109 | } | ||
110 | |||
111 | // At least one interpolation mode must be enabled or else the GPU will hang. | ||
112 | if (ForceEnable) { | ||
113 | InterpUse[0].Enabled = true; | ||
114 | } | ||
115 | |||
116 | unsigned UsedVgprs = 0; | ||
117 | |||
118 | // Second pass, replace with VGPRs. | ||
119 | for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { | ||
120 | if (!InterpUse[InterpIdx].Enabled) { | ||
121 | continue; | ||
122 | } | ||
123 | MFI->SPIPSInputAddr |= (1 << InterpIdx); | ||
124 | |||
125 | for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; | ||
126 | RegIdx++, UsedVgprs++) { | ||
127 | unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs); | ||
128 | unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); | ||
129 | MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg); | ||
130 | addLiveIn(&MF, MRI, NewReg, VirtReg); | ||
131 | } | ||
132 | } | ||
133 | |||
134 | return false; | ||
135 | } | ||
136 | |||
137 | void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF, | ||
138 | MachineRegisterInfo & MRI, | ||
139 | unsigned physReg, unsigned virtReg) | ||
140 | { | ||
141 | const TargetInstrInfo * TII = TM.getInstrInfo(); | ||
142 | if (!MRI.isLiveIn(physReg)) { | ||
143 | MRI.addLiveIn(physReg, virtReg); | ||
144 | MF->front().addLiveIn(physReg); | ||
145 | BuildMI(MF->front(), MF->front().begin(), DebugLoc(), | ||
146 | TII->get(TargetOpcode::COPY), virtReg) | ||
147 | .addReg(physReg); | ||
148 | } else { | ||
149 | MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg)); | ||
150 | } | ||
151 | } | ||
diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl deleted file mode 100644 index 48bd5676eb0..00000000000 --- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl +++ /dev/null | |||
@@ -1,291 +0,0 @@ | |||
1 | #===-- SIGenRegisterInfo.pl - Script for generating register info files ----===# | ||
2 | # | ||
3 | # The LLVM Compiler Infrastructure | ||
4 | # | ||
5 | # This file is distributed under the University of Illinois Open Source | ||
6 | # License. See LICENSE.TXT for details. | ||
7 | # | ||
8 | #===------------------------------------------------------------------------===# | ||
9 | # | ||
10 | # This perl script prints to stdout .td code to be used as SIRegisterInfo.td | ||
11 | # it also generates a file called SIHwRegInfo.include, which contains helper | ||
12 | # functions for determining the hw encoding of registers. | ||
13 | # | ||
14 | #===------------------------------------------------------------------------===# | ||
15 | |||
16 | use strict; | ||
17 | use warnings; | ||
18 | |||
19 | my $SGPR_COUNT = 104; | ||
20 | my $VGPR_COUNT = 256; | ||
21 | |||
22 | my $SGPR_MAX_IDX = $SGPR_COUNT - 1; | ||
23 | my $VGPR_MAX_IDX = $VGPR_COUNT - 1; | ||
24 | |||
25 | my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : ''; | ||
26 | |||
27 | print <<STRING; | ||
28 | |||
29 | let Namespace = "AMDGPU" in { | ||
30 | def low : SubRegIndex; | ||
31 | def high : SubRegIndex; | ||
32 | |||
33 | def sub0 : SubRegIndex; | ||
34 | def sub1 : SubRegIndex; | ||
35 | def sub2 : SubRegIndex; | ||
36 | def sub3 : SubRegIndex; | ||
37 | def sub4 : SubRegIndex; | ||
38 | def sub5 : SubRegIndex; | ||
39 | def sub6 : SubRegIndex; | ||
40 | def sub7 : SubRegIndex; | ||
41 | } | ||
42 | |||
43 | class SIReg <string n> : Register<n> { | ||
44 | let Namespace = "AMDGPU"; | ||
45 | } | ||
46 | |||
47 | class SI_64 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> { | ||
48 | let Namespace = "AMDGPU"; | ||
49 | let SubRegIndices = [low, high]; | ||
50 | } | ||
51 | |||
52 | class SI_128 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> { | ||
53 | let Namespace = "AMDGPU"; | ||
54 | let SubRegIndices = [sel_x, sel_y, sel_z, sel_w]; | ||
55 | } | ||
56 | |||
57 | class SI_256 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> { | ||
58 | let Namespace = "AMDGPU"; | ||
59 | let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7]; | ||
60 | } | ||
61 | |||
62 | class SGPR_32 <bits<8> num, string name> : SIReg<name> { | ||
63 | field bits<8> Num; | ||
64 | |||
65 | let Num = num; | ||
66 | } | ||
67 | |||
68 | |||
69 | class VGPR_32 <bits<9> num, string name> : SIReg<name> { | ||
70 | field bits<9> Num; | ||
71 | |||
72 | let Num = num; | ||
73 | } | ||
74 | |||
75 | class SGPR_64 <bits<8> num, string name, list<Register> subregs> : | ||
76 | SI_64 <name, subregs>; | ||
77 | |||
78 | class VGPR_64 <bits<9> num, string name, list<Register> subregs> : | ||
79 | SI_64 <name, subregs>; | ||
80 | |||
81 | class SGPR_128 <bits<8> num, string name, list<Register> subregs> : | ||
82 | SI_128 <name, subregs>; | ||
83 | |||
84 | class VGPR_128 <bits<9> num, string name, list<Register> subregs> : | ||
85 | SI_128 <name, subregs>; | ||
86 | |||
87 | class SGPR_256 <bits<8> num, string name, list<Register> subregs> : | ||
88 | SI_256 <name, subregs>; | ||
89 | |||
90 | def VCC : SIReg<"VCC">; | ||
91 | def EXEC_LO : SIReg<"EXEC LO">; | ||
92 | def EXEC_HI : SIReg<"EXEC HI">; | ||
93 | def EXEC : SI_64<"EXEC", [EXEC_LO,EXEC_HI]>; | ||
94 | def SCC : SIReg<"SCC">; | ||
95 | def SREG_LIT_0 : SIReg <"S LIT 0">; | ||
96 | def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT">; | ||
97 | |||
98 | def M0 : SIReg <"M0">; | ||
99 | |||
100 | //Interpolation registers | ||
101 | |||
102 | def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">; | ||
103 | def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">; | ||
104 | def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">; | ||
105 | def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">; | ||
106 | def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">; | ||
107 | def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">; | ||
108 | def PERSP_I_W : SIReg <"PERSP_I_W">; | ||
109 | def PERSP_J_W : SIReg <"PERSP_J_W">; | ||
110 | def PERSP_1_W : SIReg <"PERSP_1_W">; | ||
111 | def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">; | ||
112 | def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">; | ||
113 | def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">; | ||
114 | def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">; | ||
115 | def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">; | ||
116 | def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">; | ||
117 | def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">; | ||
118 | def POS_X_FLOAT : SIReg <"POS_X_FLOAT">; | ||
119 | def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">; | ||
120 | def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">; | ||
121 | def POS_W_FLOAT : SIReg <"POS_W_FLOAT">; | ||
122 | def FRONT_FACE : SIReg <"FRONT_FACE">; | ||
123 | def ANCILLARY : SIReg <"ANCILLARY">; | ||
124 | def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">; | ||
125 | def POS_FIXED_PT : SIReg <"POS_FIXED_PT">; | ||
126 | |||
127 | STRING | ||
128 | |||
129 | #32 bit register | ||
130 | |||
131 | my @SGPR; | ||
132 | for (my $i = 0; $i < $SGPR_COUNT; $i++) { | ||
133 | print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n"; | ||
134 | $SGPR[$i] = "SGPR$i"; | ||
135 | } | ||
136 | |||
137 | my @VGPR; | ||
138 | for (my $i = 0; $i < $VGPR_COUNT; $i++) { | ||
139 | print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n"; | ||
140 | $VGPR[$i] = "VGPR$i"; | ||
141 | } | ||
142 | |||
143 | print <<STRING; | ||
144 | |||
145 | def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, | ||
146 | (add (sequence "SGPR%u", 0, $SGPR_MAX_IDX), SREG_LIT_0, M0, EXEC_LO, EXEC_HI) | ||
147 | >; | ||
148 | |||
149 | def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, | ||
150 | (add (sequence "VGPR%u", 0, $VGPR_MAX_IDX), | ||
151 | PERSP_SAMPLE_I, PERSP_SAMPLE_J, | ||
152 | PERSP_CENTER_I, PERSP_CENTER_J, | ||
153 | PERSP_CENTROID_I, PERSP_CENTROID_J, | ||
154 | PERSP_I_W, PERSP_J_W, PERSP_1_W, | ||
155 | LINEAR_SAMPLE_I, LINEAR_SAMPLE_J, | ||
156 | LINEAR_CENTER_I, LINEAR_CENTER_J, | ||
157 | LINEAR_CENTROID_I, LINEAR_CENTROID_J, | ||
158 | LINE_STIPPLE_TEX_COORD, | ||
159 | POS_X_FLOAT, | ||
160 | POS_Y_FLOAT, | ||
161 | POS_Z_FLOAT, | ||
162 | POS_W_FLOAT, | ||
163 | FRONT_FACE, | ||
164 | ANCILLARY, | ||
165 | SAMPLE_COVERAGE, | ||
166 | POS_FIXED_PT | ||
167 | ) | ||
168 | >; | ||
169 | |||
170 | def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, | ||
171 | (add VReg_32, SReg_32) | ||
172 | >; | ||
173 | |||
174 | def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>; | ||
175 | def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>; | ||
176 | def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>; | ||
177 | def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; | ||
178 | |||
179 | |||
180 | STRING | ||
181 | |||
182 | my @subregs_64 = ('low', 'high'); | ||
183 | my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w'); | ||
184 | my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7'); | ||
185 | |||
186 | my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64')); | ||
187 | my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32')); | ||
188 | my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32')); | ||
189 | |||
190 | my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64')); | ||
191 | my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32')); | ||
192 | |||
193 | |||
194 | my $sgpr64_list = join(',', @SGPR64); | ||
195 | my $vgpr64_list = join(',', @VGPR64); | ||
196 | print <<STRING; | ||
197 | |||
198 | def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64, | ||
199 | (add $sgpr64_list, $vgpr64_list) | ||
200 | >; | ||
201 | |||
202 | STRING | ||
203 | |||
204 | if ($INDEX_FILE ne '') { | ||
205 | open(my $fh, ">", $INDEX_FILE); | ||
206 | my %hw_values; | ||
207 | |||
208 | for (my $i = 0; $i <= $#SGPR; $i++) { | ||
209 | push (@{$hw_values{$i}}, $SGPR[$i]); | ||
210 | } | ||
211 | |||
212 | for (my $i = 0; $i <= $#SGPR64; $i++) { | ||
213 | push (@{$hw_values{$i * 2}}, $SGPR64[$i]) | ||
214 | } | ||
215 | |||
216 | for (my $i = 0; $i <= $#SGPR128; $i++) { | ||
217 | push (@{$hw_values{$i * 4}}, $SGPR128[$i]); | ||
218 | } | ||
219 | |||
220 | for (my $i = 0; $i <= $#SGPR256; $i++) { | ||
221 | push (@{$hw_values{$i * 8}}, $SGPR256[$i]); | ||
222 | } | ||
223 | |||
224 | for (my $i = 0; $i <= $#VGPR; $i++) { | ||
225 | push (@{$hw_values{$i}}, $VGPR[$i]); | ||
226 | } | ||
227 | for (my $i = 0; $i <= $#VGPR64; $i++) { | ||
228 | push (@{$hw_values{$i * 2}}, $VGPR64[$i]); | ||
229 | } | ||
230 | |||
231 | for (my $i = 0; $i <= $#VGPR128; $i++) { | ||
232 | push (@{$hw_values{$i * 4}}, $VGPR128[$i]); | ||
233 | } | ||
234 | |||
235 | |||
236 | print $fh "unsigned SIRegisterInfo::getHWRegNum(unsigned reg) const\n{\n switch(reg) {\n"; | ||
237 | for my $key (keys(%hw_values)) { | ||
238 | my @names = @{$hw_values{$key}}; | ||
239 | for my $regname (@names) { | ||
240 | print $fh " case AMDGPU::$regname:\n" | ||
241 | } | ||
242 | print $fh " return $key;\n"; | ||
243 | } | ||
244 | print $fh " default: assert(!\"Unknown Register\"); return 0;\n }\n}\n" | ||
245 | } | ||
246 | |||
247 | |||
248 | |||
249 | |||
250 | sub print_sgpr_class { | ||
251 | my ($reg_width, $sub_reg_ref, @types) = @_; | ||
252 | return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types); | ||
253 | } | ||
254 | |||
255 | sub print_vgpr_class { | ||
256 | my ($reg_width, $sub_reg_ref, @types) = @_; | ||
257 | return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types); | ||
258 | } | ||
259 | |||
260 | sub print_reg_class { | ||
261 | my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_; | ||
262 | my @registers; | ||
263 | my $component_count = $reg_width / 32; | ||
264 | |||
265 | for (my $i = 0; $i < $reg_count; $i += $component_count) { | ||
266 | my $reg_name = $reg_prefix . $i . '_' . $reg_width; | ||
267 | my @sub_regs; | ||
268 | for (my $idx = 0; $idx < $component_count; $idx++) { | ||
269 | my $sub_idx = $i + $idx; | ||
270 | push(@sub_regs, $reg_prefix . $sub_idx); | ||
271 | } | ||
272 | print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n"; | ||
273 | push (@registers, $reg_name); | ||
274 | } | ||
275 | |||
276 | #Add VCC to SReg_64 | ||
277 | if ($class_prefix eq 'SReg' and $reg_width == 64) { | ||
278 | push (@registers, 'VCC') | ||
279 | } | ||
280 | |||
281 | #Add EXEC to SReg_64 | ||
282 | if ($class_prefix eq 'SReg' and $reg_width == 64) { | ||
283 | push (@registers, 'EXEC') | ||
284 | } | ||
285 | |||
286 | my $reg_list = join(', ', @registers); | ||
287 | |||
288 | print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n (add $reg_list)\n>{\n"; | ||
289 | print " let SubRegClasses = [($class_prefix\_", ($reg_width / $component_count) , ' ', join(', ', @{$sub_reg_ref}), ")];\n}\n"; | ||
290 | return @registers; | ||
291 | } | ||
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp deleted file mode 100644 index 7c2739cf5b6..00000000000 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ /dev/null | |||
@@ -1,466 +0,0 @@ | |||
1 | //===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is | ||
11 | // mostly EmitInstrWithCustomInserter(). | ||
12 | // | ||
13 | //===----------------------------------------------------------------------===// | ||
14 | |||
15 | #include "SIISelLowering.h" | ||
16 | #include "AMDIL.h" | ||
17 | #include "AMDILIntrinsicInfo.h" | ||
18 | #include "SIInstrInfo.h" | ||
19 | #include "SIMachineFunctionInfo.h" | ||
20 | #include "SIRegisterInfo.h" | ||
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
22 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
23 | #include "llvm/CodeGen/SelectionDAG.h" | ||
24 | |||
25 | using namespace llvm; | ||
26 | |||
27 | SITargetLowering::SITargetLowering(TargetMachine &TM) : | ||
28 | AMDGPUTargetLowering(TM), | ||
29 | TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) | ||
30 | { | ||
31 | addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); | ||
32 | addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); | ||
33 | addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); | ||
34 | addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass); | ||
35 | addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); | ||
36 | addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); | ||
37 | |||
38 | addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); | ||
39 | addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); | ||
40 | |||
41 | computeRegisterProperties(); | ||
42 | |||
43 | setOperationAction(ISD::AND, MVT::i1, Custom); | ||
44 | |||
45 | setOperationAction(ISD::ADD, MVT::i64, Legal); | ||
46 | setOperationAction(ISD::ADD, MVT::i32, Legal); | ||
47 | |||
48 | setOperationAction(ISD::BR_CC, MVT::i32, Custom); | ||
49 | |||
50 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); | ||
51 | |||
52 | // We need to custom lower loads from the USER_SGPR address space, so we can | ||
53 | // add the SGPRs as livein registers. | ||
54 | setOperationAction(ISD::LOAD, MVT::i32, Custom); | ||
55 | setOperationAction(ISD::LOAD, MVT::i64, Custom); | ||
56 | |||
57 | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); | ||
58 | setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); | ||
59 | |||
60 | setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); | ||
61 | setTargetDAGCombine(ISD::SELECT_CC); | ||
62 | |||
63 | setTargetDAGCombine(ISD::SETCC); | ||
64 | } | ||
65 | |||
66 | MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( | ||
67 | MachineInstr * MI, MachineBasicBlock * BB) const | ||
68 | { | ||
69 | const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); | ||
70 | MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); | ||
71 | MachineBasicBlock::iterator I = MI; | ||
72 | |||
73 | if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) { | ||
74 | AppendS_WAITCNT(MI, *BB, llvm::next(I)); | ||
75 | return BB; | ||
76 | } | ||
77 | |||
78 | switch (MI->getOpcode()) { | ||
79 | default: | ||
80 | return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); | ||
81 | case AMDGPU::BRANCH: return BB; | ||
82 | case AMDGPU::CLAMP_SI: | ||
83 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) | ||
84 | .addOperand(MI->getOperand(0)) | ||
85 | .addOperand(MI->getOperand(1)) | ||
86 | // VSRC1-2 are unused, but we still need to fill all the | ||
87 | // operand slots, so we just reuse the VSRC0 operand | ||
88 | .addOperand(MI->getOperand(1)) | ||
89 | .addOperand(MI->getOperand(1)) | ||
90 | .addImm(0) // ABS | ||
91 | .addImm(1) // CLAMP | ||
92 | .addImm(0) // OMOD | ||
93 | .addImm(0); // NEG | ||
94 | MI->eraseFromParent(); | ||
95 | break; | ||
96 | |||
97 | case AMDGPU::FABS_SI: | ||
98 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) | ||
99 | .addOperand(MI->getOperand(0)) | ||
100 | .addOperand(MI->getOperand(1)) | ||
101 | // VSRC1-2 are unused, but we still need to fill all the | ||
102 | // operand slots, so we just reuse the VSRC0 operand | ||
103 | .addOperand(MI->getOperand(1)) | ||
104 | .addOperand(MI->getOperand(1)) | ||
105 | .addImm(1) // ABS | ||
106 | .addImm(0) // CLAMP | ||
107 | .addImm(0) // OMOD | ||
108 | .addImm(0); // NEG | ||
109 | MI->eraseFromParent(); | ||
110 | break; | ||
111 | |||
112 | case AMDGPU::FNEG_SI: | ||
113 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) | ||
114 | .addOperand(MI->getOperand(0)) | ||
115 | .addOperand(MI->getOperand(1)) | ||
116 | // VSRC1-2 are unused, but we still need to fill all the | ||
117 | // operand slots, so we just reuse the VSRC0 operand | ||
118 | .addOperand(MI->getOperand(1)) | ||
119 | .addOperand(MI->getOperand(1)) | ||
120 | .addImm(0) // ABS | ||
121 | .addImm(0) // CLAMP | ||
122 | .addImm(0) // OMOD | ||
123 | .addImm(1); // NEG | ||
124 | MI->eraseFromParent(); | ||
125 | break; | ||
126 | case AMDGPU::SHADER_TYPE: | ||
127 | BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType = | ||
128 | MI->getOperand(0).getImm(); | ||
129 | MI->eraseFromParent(); | ||
130 | break; | ||
131 | |||
132 | case AMDGPU::SI_INTERP: | ||
133 | LowerSI_INTERP(MI, *BB, I, MRI); | ||
134 | break; | ||
135 | case AMDGPU::SI_INTERP_CONST: | ||
136 | LowerSI_INTERP_CONST(MI, *BB, I, MRI); | ||
137 | break; | ||
138 | case AMDGPU::SI_KIL: | ||
139 | LowerSI_KIL(MI, *BB, I, MRI); | ||
140 | break; | ||
141 | case AMDGPU::SI_WQM: | ||
142 | LowerSI_WQM(MI, *BB, I, MRI); | ||
143 | break; | ||
144 | case AMDGPU::SI_V_CNDLT: | ||
145 | LowerSI_V_CNDLT(MI, *BB, I, MRI); | ||
146 | break; | ||
147 | } | ||
148 | return BB; | ||
149 | } | ||
150 | |||
151 | void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, | ||
152 | MachineBasicBlock::iterator I) const | ||
153 | { | ||
154 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT)) | ||
155 | .addImm(0); | ||
156 | } | ||
157 | |||
158 | |||
159 | void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, | ||
160 | MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const | ||
161 | { | ||
162 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) | ||
163 | .addReg(AMDGPU::EXEC); | ||
164 | |||
165 | MI->eraseFromParent(); | ||
166 | } | ||
167 | |||
168 | void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, | ||
169 | MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const | ||
170 | { | ||
171 | unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); | ||
172 | unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); | ||
173 | MachineOperand dst = MI->getOperand(0); | ||
174 | MachineOperand iReg = MI->getOperand(1); | ||
175 | MachineOperand jReg = MI->getOperand(2); | ||
176 | MachineOperand attr_chan = MI->getOperand(3); | ||
177 | MachineOperand attr = MI->getOperand(4); | ||
178 | MachineOperand params = MI->getOperand(5); | ||
179 | |||
180 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) | ||
181 | .addOperand(params); | ||
182 | |||
183 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp) | ||
184 | .addOperand(iReg) | ||
185 | .addOperand(attr_chan) | ||
186 | .addOperand(attr) | ||
187 | .addReg(M0); | ||
188 | |||
189 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32)) | ||
190 | .addOperand(dst) | ||
191 | .addReg(tmp) | ||
192 | .addOperand(jReg) | ||
193 | .addOperand(attr_chan) | ||
194 | .addOperand(attr) | ||
195 | .addReg(M0); | ||
196 | |||
197 | MI->eraseFromParent(); | ||
198 | } | ||
199 | |||
200 | void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, | ||
201 | MachineBasicBlock &BB, MachineBasicBlock::iterator I, | ||
202 | MachineRegisterInfo &MRI) const | ||
203 | { | ||
204 | MachineOperand dst = MI->getOperand(0); | ||
205 | MachineOperand attr_chan = MI->getOperand(1); | ||
206 | MachineOperand attr = MI->getOperand(2); | ||
207 | MachineOperand params = MI->getOperand(3); | ||
208 | unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); | ||
209 | |||
210 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) | ||
211 | .addOperand(params); | ||
212 | |||
213 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) | ||
214 | .addOperand(dst) | ||
215 | .addOperand(attr_chan) | ||
216 | .addOperand(attr) | ||
217 | .addReg(M0); | ||
218 | |||
219 | MI->eraseFromParent(); | ||
220 | } | ||
221 | |||
222 | void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, | ||
223 | MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const | ||
224 | { | ||
225 | // Clear this pixel from the exec mask if the operand is negative | ||
226 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32), | ||
227 | AMDGPU::VCC) | ||
228 | .addReg(AMDGPU::SREG_LIT_0) | ||
229 | .addOperand(MI->getOperand(0)); | ||
230 | |||
231 | // If the exec mask is non-zero, skip the next two instructions | ||
232 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ)) | ||
233 | .addImm(3) | ||
234 | .addReg(AMDGPU::EXEC); | ||
235 | |||
236 | // Exec mask is zero: Export to NULL target... | ||
237 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP)) | ||
238 | .addImm(0) | ||
239 | .addImm(0x09) // V_008DFC_SQ_EXP_NULL | ||
240 | .addImm(0) | ||
241 | .addImm(1) | ||
242 | .addImm(1) | ||
243 | .addReg(AMDGPU::SREG_LIT_0) | ||
244 | .addReg(AMDGPU::SREG_LIT_0) | ||
245 | .addReg(AMDGPU::SREG_LIT_0) | ||
246 | .addReg(AMDGPU::SREG_LIT_0); | ||
247 | |||
248 | // ... and terminate wavefront | ||
249 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM)); | ||
250 | |||
251 | MI->eraseFromParent(); | ||
252 | } | ||
253 | |||
254 | void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, | ||
255 | MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const | ||
256 | { | ||
257 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_GT_F32_e32), | ||
258 | AMDGPU::VCC) | ||
259 | .addReg(AMDGPU::SREG_LIT_0) | ||
260 | .addOperand(MI->getOperand(1)); | ||
261 | |||
262 | BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32)) | ||
263 | .addOperand(MI->getOperand(0)) | ||
264 | .addOperand(MI->getOperand(3)) | ||
265 | .addOperand(MI->getOperand(2)) | ||
266 | .addReg(AMDGPU::VCC); | ||
267 | |||
268 | MI->eraseFromParent(); | ||
269 | } | ||
270 | |||
271 | EVT SITargetLowering::getSetCCResultType(EVT VT) const | ||
272 | { | ||
273 | return MVT::i1; | ||
274 | } | ||
275 | |||
276 | //===----------------------------------------------------------------------===// | ||
277 | // Custom DAG Lowering Operations | ||
278 | //===----------------------------------------------------------------------===// | ||
279 | |||
280 | SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const | ||
281 | { | ||
282 | switch (Op.getOpcode()) { | ||
283 | default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); | ||
284 | case ISD::BR_CC: return LowerBR_CC(Op, DAG); | ||
285 | case ISD::LOAD: return LowerLOAD(Op, DAG); | ||
286 | case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); | ||
287 | case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); | ||
288 | case ISD::INTRINSIC_WO_CHAIN: { | ||
289 | unsigned IntrinsicID = | ||
290 | cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); | ||
291 | EVT VT = Op.getValueType(); | ||
292 | switch (IntrinsicID) { | ||
293 | case AMDGPUIntrinsic::SI_vs_load_buffer_index: | ||
294 | return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, | ||
295 | AMDGPU::VGPR0, VT); | ||
296 | default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); | ||
297 | } | ||
298 | break; | ||
299 | } | ||
300 | } | ||
301 | return SDValue(); | ||
302 | } | ||
303 | |||
304 | /// Loweri1ContextSwitch - The function is for lowering i1 operations on the | ||
305 | /// VCC register. In the VALU context, VCC is a one bit register, but in the | ||
306 | /// SALU context the VCC is a 64-bit register (1-bit per thread). Since only | ||
307 | /// the SALU can perform operations on the VCC register, we need to promote | ||
308 | /// the operand types from i1 to i64 in order for tablegen to be able to match | ||
309 | /// this operation to the correct SALU instruction. We do this promotion by | ||
310 | /// wrapping the operands in a CopyToReg node. | ||
311 | /// | ||
312 | SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, | ||
313 | SelectionDAG &DAG, | ||
314 | unsigned VCCNode) const | ||
315 | { | ||
316 | DebugLoc DL = Op.getDebugLoc(); | ||
317 | |||
318 | SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, | ||
319 | DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, | ||
320 | Op.getOperand(0)), | ||
321 | DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, | ||
322 | Op.getOperand(1))); | ||
323 | |||
324 | return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); | ||
325 | } | ||
326 | |||
327 | SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const | ||
328 | { | ||
329 | SDValue Chain = Op.getOperand(0); | ||
330 | SDValue CC = Op.getOperand(1); | ||
331 | SDValue LHS = Op.getOperand(2); | ||
332 | SDValue RHS = Op.getOperand(3); | ||
333 | SDValue JumpT = Op.getOperand(4); | ||
334 | SDValue CmpValue; | ||
335 | SDValue Result; | ||
336 | CmpValue = DAG.getNode( | ||
337 | ISD::SETCC, | ||
338 | Op.getDebugLoc(), | ||
339 | MVT::i1, | ||
340 | LHS, RHS, | ||
341 | CC); | ||
342 | |||
343 | Result = DAG.getNode( | ||
344 | AMDGPUISD::BRANCH_COND, | ||
345 | CmpValue.getDebugLoc(), | ||
346 | MVT::Other, Chain, | ||
347 | JumpT, CmpValue); | ||
348 | return Result; | ||
349 | } | ||
350 | |||
351 | SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const | ||
352 | { | ||
353 | EVT VT = Op.getValueType(); | ||
354 | LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op); | ||
355 | |||
356 | assert(Ptr); | ||
357 | |||
358 | unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace(); | ||
359 | |||
360 | // We only need to lower USER_SGPR address space loads | ||
361 | if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) { | ||
362 | return SDValue(); | ||
363 | } | ||
364 | |||
365 | // Loads from the USER_SGPR address space can only have constant value | ||
366 | // pointers. | ||
367 | ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr()); | ||
368 | assert(BasePtr); | ||
369 | |||
370 | unsigned TypeDwordWidth = VT.getSizeInBits() / 32; | ||
371 | const TargetRegisterClass * dstClass; | ||
372 | switch (TypeDwordWidth) { | ||
373 | default: | ||
374 | assert(!"USER_SGPR value size not implemented"); | ||
375 | return SDValue(); | ||
376 | case 1: | ||
377 | dstClass = &AMDGPU::SReg_32RegClass; | ||
378 | break; | ||
379 | case 2: | ||
380 | dstClass = &AMDGPU::SReg_64RegClass; | ||
381 | break; | ||
382 | } | ||
383 | uint64_t Index = BasePtr->getZExtValue(); | ||
384 | assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned"); | ||
385 | unsigned SGPRIndex = Index / TypeDwordWidth; | ||
386 | unsigned Reg = dstClass->getRegister(SGPRIndex); | ||
387 | |||
388 | DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg, | ||
389 | VT)); | ||
390 | return SDValue(); | ||
391 | } | ||
392 | |||
393 | SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const | ||
394 | { | ||
395 | SDValue LHS = Op.getOperand(0); | ||
396 | SDValue RHS = Op.getOperand(1); | ||
397 | SDValue True = Op.getOperand(2); | ||
398 | SDValue False = Op.getOperand(3); | ||
399 | SDValue CC = Op.getOperand(4); | ||
400 | EVT VT = Op.getValueType(); | ||
401 | DebugLoc DL = Op.getDebugLoc(); | ||
402 | |||
403 | SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); | ||
404 | return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); | ||
405 | } | ||
406 | |||
407 | //===----------------------------------------------------------------------===// | ||
408 | // Custom DAG optimizations | ||
409 | //===----------------------------------------------------------------------===// | ||
410 | |||
411 | SDValue SITargetLowering::PerformDAGCombine(SDNode *N, | ||
412 | DAGCombinerInfo &DCI) const { | ||
413 | SelectionDAG &DAG = DCI.DAG; | ||
414 | DebugLoc DL = N->getDebugLoc(); | ||
415 | EVT VT = N->getValueType(0); | ||
416 | |||
417 | switch (N->getOpcode()) { | ||
418 | default: break; | ||
419 | case ISD::SELECT_CC: { | ||
420 | N->dump(); | ||
421 | ConstantSDNode *True, *False; | ||
422 | // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) | ||
423 | if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2))) | ||
424 | && (False = dyn_cast<ConstantSDNode>(N->getOperand(3))) | ||
425 | && True->isAllOnesValue() | ||
426 | && False->isNullValue() | ||
427 | && VT == MVT::i1) { | ||
428 | return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0), | ||
429 | N->getOperand(1), N->getOperand(4)); | ||
430 | |||
431 | } | ||
432 | break; | ||
433 | } | ||
434 | case ISD::SETCC: { | ||
435 | SDValue Arg0 = N->getOperand(0); | ||
436 | SDValue Arg1 = N->getOperand(1); | ||
437 | SDValue CC = N->getOperand(2); | ||
438 | ConstantSDNode * C = NULL; | ||
439 | ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get(); | ||
440 | |||
441 | // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) | ||
442 | if (VT == MVT::i1 | ||
443 | && Arg0.getOpcode() == ISD::SIGN_EXTEND | ||
444 | && Arg0.getOperand(0).getValueType() == MVT::i1 | ||
445 | && (C = dyn_cast<ConstantSDNode>(Arg1)) | ||
446 | && C->isNullValue() | ||
447 | && CCOp == ISD::SETNE) { | ||
448 | return SimplifySetCC(VT, Arg0.getOperand(0), | ||
449 | DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL); | ||
450 | } | ||
451 | break; | ||
452 | } | ||
453 | } | ||
454 | return SDValue(); | ||
455 | } | ||
456 | |||
457 | #define NODE_NAME_CASE(node) case SIISD::node: return #node; | ||
458 | |||
459 | const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const | ||
460 | { | ||
461 | switch (Opcode) { | ||
462 | default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); | ||
463 | NODE_NAME_CASE(VCC_AND) | ||
464 | NODE_NAME_CASE(VCC_BITCAST) | ||
465 | } | ||
466 | } | ||
diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h deleted file mode 100644 index 4407bf04667..00000000000 --- a/src/gallium/drivers/radeon/SIISelLowering.h +++ /dev/null | |||
@@ -1,63 +0,0 @@ | |||
1 | //===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // SI DAG Lowering interface definition | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #ifndef SIISELLOWERING_H | ||
15 | #define SIISELLOWERING_H | ||
16 | |||
17 | #include "AMDGPUISelLowering.h" | ||
18 | #include "SIInstrInfo.h" | ||
19 | |||
20 | namespace llvm { | ||
21 | |||
22 | class SITargetLowering : public AMDGPUTargetLowering | ||
23 | { | ||
24 | const SIInstrInfo * TII; | ||
25 | |||
26 | /// AppendS_WAITCNT - Memory reads and writes are syncronized using the | ||
27 | /// S_WAITCNT instruction. This function takes the most conservative | ||
28 | /// approach and inserts an S_WAITCNT instruction after every read and | ||
29 | /// write. | ||
30 | void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, | ||
31 | MachineBasicBlock::iterator I) const; | ||
32 | void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, | ||
33 | MachineBasicBlock::iterator I, unsigned Opocde) const; | ||
34 | void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, | ||
35 | MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; | ||
36 | void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, | ||
37 | MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; | ||
38 | void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, | ||
39 | MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; | ||
40 | void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, | ||
41 | MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; | ||
42 | void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, | ||
43 | MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; | ||
44 | |||
45 | SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG, | ||
46 | unsigned VCCNode) const; | ||
47 | SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; | ||
48 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; | ||
49 | SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; | ||
50 | |||
51 | public: | ||
52 | SITargetLowering(TargetMachine &tm); | ||
53 | virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, | ||
54 | MachineBasicBlock * BB) const; | ||
55 | virtual EVT getSetCCResultType(EVT VT) const; | ||
56 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; | ||
57 | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; | ||
58 | virtual const char* getTargetNodeName(unsigned Opcode) const; | ||
59 | }; | ||
60 | |||
61 | } // End namespace llvm | ||
62 | |||
63 | #endif //SIISELLOWERING_H | ||
diff --git a/src/gallium/drivers/radeon/SIInstrFormats.td b/src/gallium/drivers/radeon/SIInstrFormats.td deleted file mode 100644 index 8f56e21f5a6..00000000000 --- a/src/gallium/drivers/radeon/SIInstrFormats.td +++ /dev/null | |||
@@ -1,131 +0,0 @@ | |||
1 | //===-- SIInstrFormats.td - SI Instruction Formats ------------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // SI Instruction format definitions. | ||
11 | // | ||
12 | // Instructions with _32 take 32-bit operands. | ||
13 | // Instructions with _64 take 64-bit operands. | ||
14 | // | ||
15 | // VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit | ||
16 | // encoding is the standard encoding, but instruction that make use of | ||
17 | // any of the instruction modifiers must use the 64-bit encoding. | ||
18 | // | ||
19 | // Instructions with _e32 use the 32-bit encoding. | ||
20 | // Instructions with _e64 use the 64-bit encoding. | ||
21 | // | ||
22 | //===----------------------------------------------------------------------===// | ||
23 | |||
24 | |||
25 | class VOP3_32 <bits<9> op, string opName, list<dag> pattern> | ||
26 | : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>; | ||
27 | |||
28 | class VOP3_64 <bits<9> op, string opName, list<dag> pattern> | ||
29 | : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>; | ||
30 | |||
31 | |||
32 | class SOP1_32 <bits<8> op, string opName, list<dag> pattern> | ||
33 | : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>; | ||
34 | |||
35 | class SOP1_64 <bits<8> op, string opName, list<dag> pattern> | ||
36 | : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>; | ||
37 | |||
38 | class SOP2_32 <bits<7> op, string opName, list<dag> pattern> | ||
39 | : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>; | ||
40 | |||
41 | class SOP2_64 <bits<7> op, string opName, list<dag> pattern> | ||
42 | : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; | ||
43 | |||
44 | class SOP2_VCC <bits<7> op, string opName, list<dag> pattern> | ||
45 | : SOP2 <op, (outs VCCReg:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; | ||
46 | |||
47 | class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, | ||
48 | string opName, list<dag> pattern> : | ||
49 | VOP1 < | ||
50 | op, (outs vrc:$dst), (ins arc:$src0), opName, pattern | ||
51 | >; | ||
52 | |||
53 | multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> { | ||
54 | def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>; | ||
55 | def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, | ||
56 | opName, [] | ||
57 | >; | ||
58 | } | ||
59 | |||
60 | multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> { | ||
61 | |||
62 | def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>; | ||
63 | |||
64 | def _e64 : VOP3_64 < | ||
65 | {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, | ||
66 | opName, [] | ||
67 | >; | ||
68 | } | ||
69 | |||
70 | class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, | ||
71 | string opName, list<dag> pattern> : | ||
72 | VOP2 < | ||
73 | op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern | ||
74 | >; | ||
75 | |||
76 | multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> { | ||
77 | |||
78 | def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>; | ||
79 | |||
80 | def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, | ||
81 | opName, [] | ||
82 | >; | ||
83 | } | ||
84 | |||
85 | multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> { | ||
86 | def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>; | ||
87 | |||
88 | def _e64 : VOP3_64 < | ||
89 | {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, | ||
90 | opName, [] | ||
91 | >; | ||
92 | } | ||
93 | |||
94 | class SOPK_32 <bits<5> op, string opName, list<dag> pattern> | ||
95 | : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>; | ||
96 | |||
97 | class SOPK_64 <bits<5> op, string opName, list<dag> pattern> | ||
98 | : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>; | ||
99 | |||
100 | class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, | ||
101 | string opName, list<dag> pattern> : | ||
102 | VOPC < | ||
103 | op, (ins arc:$src0, vrc:$src1), opName, pattern | ||
104 | >; | ||
105 | |||
106 | multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> { | ||
107 | |||
108 | def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>; | ||
109 | |||
110 | def _e64 : VOP3_32 < | ||
111 | {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, | ||
112 | opName, [] | ||
113 | >; | ||
114 | } | ||
115 | |||
116 | multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> { | ||
117 | |||
118 | def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>; | ||
119 | |||
120 | def _e64 : VOP3_64 < | ||
121 | {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, | ||
122 | opName, [] | ||
123 | >; | ||
124 | } | ||
125 | |||
126 | class SOPC_32 <bits<7> op, string opName, list<dag> pattern> | ||
127 | : SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>; | ||
128 | |||
129 | class SOPC_64 <bits<7> op, string opName, list<dag> pattern> | ||
130 | : SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; | ||
131 | |||
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp deleted file mode 100644 index 42fef6ba468..00000000000 --- a/src/gallium/drivers/radeon/SIInstrInfo.cpp +++ /dev/null | |||
@@ -1,76 +0,0 @@ | |||
1 | //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // SI Implementation of TargetInstrInfo. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | |||
15 | #include "SIInstrInfo.h" | ||
16 | #include "AMDGPUTargetMachine.h" | ||
17 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
18 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
19 | #include "llvm/MC/MCInstrDesc.h" | ||
20 | |||
21 | #include <stdio.h> | ||
22 | |||
23 | using namespace llvm; | ||
24 | |||
25 | SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) | ||
26 | : AMDGPUInstrInfo(tm), | ||
27 | RI(tm, *this), | ||
28 | TM(tm) | ||
29 | { } | ||
30 | |||
31 | const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const | ||
32 | { | ||
33 | return RI; | ||
34 | } | ||
35 | |||
36 | void | ||
37 | SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, | ||
38 | MachineBasicBlock::iterator MI, DebugLoc DL, | ||
39 | unsigned DestReg, unsigned SrcReg, | ||
40 | bool KillSrc) const | ||
41 | { | ||
42 | |||
43 | // If we are trying to copy to or from SCC, there is a bug somewhere else in | ||
44 | // the backend. While it may be theoretically possible to do this, it should | ||
45 | // never be necessary. | ||
46 | assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); | ||
47 | |||
48 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) | ||
49 | .addReg(SrcReg, getKillRegState(KillSrc)); | ||
50 | } | ||
51 | |||
52 | MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, | ||
53 | int64_t Imm) const | ||
54 | { | ||
55 | MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc()); | ||
56 | MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); | ||
57 | MachineInstrBuilder(MI).addImm(Imm); | ||
58 | |||
59 | return MI; | ||
60 | |||
61 | } | ||
62 | |||
63 | bool SIInstrInfo::isMov(unsigned Opcode) const | ||
64 | { | ||
65 | switch(Opcode) { | ||
66 | default: return false; | ||
67 | case AMDGPU::S_MOV_B32: | ||
68 | case AMDGPU::S_MOV_B64: | ||
69 | case AMDGPU::V_MOV_B32_e32: | ||
70 | case AMDGPU::V_MOV_B32_e64: | ||
71 | case AMDGPU::V_MOV_IMM_F32: | ||
72 | case AMDGPU::V_MOV_IMM_I32: | ||
73 | case AMDGPU::S_MOV_IMM_I32: | ||
74 | return true; | ||
75 | } | ||
76 | } | ||
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h deleted file mode 100644 index 5915a00f779..00000000000 --- a/src/gallium/drivers/radeon/SIInstrInfo.h +++ /dev/null | |||
@@ -1,62 +0,0 @@ | |||
1 | //===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface definition for SIInstrInfo. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | |||
15 | #ifndef SIINSTRINFO_H | ||
16 | #define SIINSTRINFO_H | ||
17 | |||
18 | #include "AMDGPUInstrInfo.h" | ||
19 | #include "SIRegisterInfo.h" | ||
20 | |||
21 | namespace llvm { | ||
22 | |||
23 | class SIInstrInfo : public AMDGPUInstrInfo { | ||
24 | private: | ||
25 | const SIRegisterInfo RI; | ||
26 | AMDGPUTargetMachine &TM; | ||
27 | |||
28 | public: | ||
29 | explicit SIInstrInfo(AMDGPUTargetMachine &tm); | ||
30 | |||
31 | const SIRegisterInfo &getRegisterInfo() const; | ||
32 | |||
33 | virtual void copyPhysReg(MachineBasicBlock &MBB, | ||
34 | MachineBasicBlock::iterator MI, DebugLoc DL, | ||
35 | unsigned DestReg, unsigned SrcReg, | ||
36 | bool KillSrc) const; | ||
37 | |||
38 | /// getEncodingType - Returns the encoding type of this instruction. | ||
39 | unsigned getEncodingType(const MachineInstr &MI) const; | ||
40 | |||
41 | /// getEncodingBytes - Returns the size of this instructions encoding in | ||
42 | /// number of bytes. | ||
43 | unsigned getEncodingBytes(const MachineInstr &MI) const; | ||
44 | |||
45 | virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, | ||
46 | int64_t Imm) const; | ||
47 | |||
48 | virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;} | ||
49 | virtual bool isMov(unsigned Opcode) const; | ||
50 | |||
51 | }; | ||
52 | |||
53 | } // End namespace llvm | ||
54 | |||
55 | namespace SIInstrFlags { | ||
56 | enum Flags { | ||
57 | // First 4 bits are the instruction encoding | ||
58 | NEED_WAIT = 1 << 4 | ||
59 | }; | ||
60 | } | ||
61 | |||
62 | #endif //SIINSTRINFO_H | ||
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td deleted file mode 100644 index 49ef342a154..00000000000 --- a/src/gallium/drivers/radeon/SIInstrInfo.td +++ /dev/null | |||
@@ -1,506 +0,0 @@ | |||
1 | //===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | |||
10 | //===----------------------------------------------------------------------===// | ||
11 | // SI DAG Profiles | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | def SDTVCCBinaryOp : SDTypeProfile<1, 2, [ | ||
14 | SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2> | ||
15 | ]>; | ||
16 | |||
17 | //===----------------------------------------------------------------------===// | ||
18 | // SI DAG Nodes | ||
19 | //===----------------------------------------------------------------------===// | ||
20 | |||
21 | // and operation on 64-bit wide vcc | ||
22 | def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, | ||
23 | [SDNPCommutative, SDNPAssociative] | ||
24 | >; | ||
25 | |||
26 | // Special bitcast node for sharing VCC register between VALU and SALU | ||
27 | def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST", | ||
28 | SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> | ||
29 | >; | ||
30 | |||
31 | class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : | ||
32 | AMDGPUInst<outs, ins, asm, pattern> { | ||
33 | |||
34 | field bits<4> EncodingType = 0; | ||
35 | field bits<1> NeedWait = 0; | ||
36 | |||
37 | let TSFlags{3-0} = EncodingType; | ||
38 | let TSFlags{4} = NeedWait; | ||
39 | |||
40 | } | ||
41 | |||
42 | class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> : | ||
43 | InstSI <outs, ins, asm, pattern> { | ||
44 | |||
45 | field bits<32> Inst; | ||
46 | } | ||
47 | |||
48 | class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> : | ||
49 | InstSI <outs, ins, asm, pattern> { | ||
50 | |||
51 | field bits<64> Inst; | ||
52 | } | ||
53 | |||
54 | class SIOperand <ValueType vt, dag opInfo>: Operand <vt> { | ||
55 | let EncoderMethod = "encodeOperand"; | ||
56 | let MIOperandInfo = opInfo; | ||
57 | } | ||
58 | |||
59 | def IMM16bit : ImmLeaf < | ||
60 | i16, | ||
61 | [{return isInt<16>(Imm);}] | ||
62 | >; | ||
63 | |||
64 | def IMM8bit : ImmLeaf < | ||
65 | i32, | ||
66 | [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}] | ||
67 | >; | ||
68 | |||
69 | def IMM12bit : ImmLeaf < | ||
70 | i16, | ||
71 | [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}] | ||
72 | >; | ||
73 | |||
74 | def IMM32bitIn64bit : ImmLeaf < | ||
75 | i64, | ||
76 | [{return isInt<32>(Imm);}] | ||
77 | >; | ||
78 | |||
79 | class GPR4Align <RegisterClass rc> : Operand <vAny> { | ||
80 | let EncoderMethod = "GPR4AlignEncode"; | ||
81 | let MIOperandInfo = (ops rc:$reg); | ||
82 | } | ||
83 | |||
84 | class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> { | ||
85 | let EncoderMethod = "GPR2AlignEncode"; | ||
86 | let MIOperandInfo = (ops rc:$reg); | ||
87 | } | ||
88 | |||
89 | def SMRDmemrr : Operand<iPTR> { | ||
90 | let MIOperandInfo = (ops SReg_64, SReg_32); | ||
91 | let EncoderMethod = "GPR2AlignEncode"; | ||
92 | } | ||
93 | |||
94 | def SMRDmemri : Operand<iPTR> { | ||
95 | let MIOperandInfo = (ops SReg_64, i32imm); | ||
96 | let EncoderMethod = "SMRDmemriEncode"; | ||
97 | } | ||
98 | |||
99 | def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>; | ||
100 | def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>; | ||
101 | |||
102 | let Uses = [EXEC] in { | ||
103 | def EXP : Enc64< | ||
104 | (outs), | ||
105 | (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, | ||
106 | VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), | ||
107 | "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", | ||
108 | [] > { | ||
109 | |||
110 | bits<4> EN; | ||
111 | bits<6> TGT; | ||
112 | bits<1> COMPR; | ||
113 | bits<1> DONE; | ||
114 | bits<1> VM; | ||
115 | bits<8> VSRC0; | ||
116 | bits<8> VSRC1; | ||
117 | bits<8> VSRC2; | ||
118 | bits<8> VSRC3; | ||
119 | |||
120 | let Inst{3-0} = EN; | ||
121 | let Inst{9-4} = TGT; | ||
122 | let Inst{10} = COMPR; | ||
123 | let Inst{11} = DONE; | ||
124 | let Inst{12} = VM; | ||
125 | let Inst{31-26} = 0x3e; | ||
126 | let Inst{39-32} = VSRC0; | ||
127 | let Inst{47-40} = VSRC1; | ||
128 | let Inst{55-48} = VSRC2; | ||
129 | let Inst{63-56} = VSRC3; | ||
130 | let EncodingType = 0; //SIInstrEncodingType::EXP | ||
131 | |||
132 | let NeedWait = 1; | ||
133 | let usesCustomInserter = 1; | ||
134 | } | ||
135 | |||
136 | class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
137 | Enc64 <outs, ins, asm, pattern> { | ||
138 | |||
139 | bits<8> VDATA; | ||
140 | bits<4> DMASK; | ||
141 | bits<1> UNORM; | ||
142 | bits<1> GLC; | ||
143 | bits<1> DA; | ||
144 | bits<1> R128; | ||
145 | bits<1> TFE; | ||
146 | bits<1> LWE; | ||
147 | bits<1> SLC; | ||
148 | bits<8> VADDR; | ||
149 | bits<5> SRSRC; | ||
150 | bits<5> SSAMP; | ||
151 | |||
152 | let Inst{11-8} = DMASK; | ||
153 | let Inst{12} = UNORM; | ||
154 | let Inst{13} = GLC; | ||
155 | let Inst{14} = DA; | ||
156 | let Inst{15} = R128; | ||
157 | let Inst{16} = TFE; | ||
158 | let Inst{17} = LWE; | ||
159 | let Inst{24-18} = op; | ||
160 | let Inst{25} = SLC; | ||
161 | let Inst{31-26} = 0x3c; | ||
162 | let Inst{39-32} = VADDR; | ||
163 | let Inst{47-40} = VDATA; | ||
164 | let Inst{52-48} = SRSRC; | ||
165 | let Inst{57-53} = SSAMP; | ||
166 | |||
167 | let EncodingType = 2; //SIInstrEncodingType::MIMG | ||
168 | |||
169 | let NeedWait = 1; | ||
170 | let usesCustomInserter = 1; | ||
171 | } | ||
172 | |||
173 | class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
174 | Enc64<outs, ins, asm, pattern> { | ||
175 | |||
176 | bits<8> VDATA; | ||
177 | bits<12> OFFSET; | ||
178 | bits<1> OFFEN; | ||
179 | bits<1> IDXEN; | ||
180 | bits<1> GLC; | ||
181 | bits<1> ADDR64; | ||
182 | bits<4> DFMT; | ||
183 | bits<3> NFMT; | ||
184 | bits<8> VADDR; | ||
185 | bits<5> SRSRC; | ||
186 | bits<1> SLC; | ||
187 | bits<1> TFE; | ||
188 | bits<8> SOFFSET; | ||
189 | |||
190 | let Inst{11-0} = OFFSET; | ||
191 | let Inst{12} = OFFEN; | ||
192 | let Inst{13} = IDXEN; | ||
193 | let Inst{14} = GLC; | ||
194 | let Inst{15} = ADDR64; | ||
195 | let Inst{18-16} = op; | ||
196 | let Inst{22-19} = DFMT; | ||
197 | let Inst{25-23} = NFMT; | ||
198 | let Inst{31-26} = 0x3a; //encoding | ||
199 | let Inst{39-32} = VADDR; | ||
200 | let Inst{47-40} = VDATA; | ||
201 | let Inst{52-48} = SRSRC; | ||
202 | let Inst{54} = SLC; | ||
203 | let Inst{55} = TFE; | ||
204 | let Inst{63-56} = SOFFSET; | ||
205 | let EncodingType = 3; //SIInstrEncodingType::MTBUF | ||
206 | |||
207 | let NeedWait = 1; | ||
208 | let usesCustomInserter = 1; | ||
209 | let neverHasSideEffects = 1; | ||
210 | } | ||
211 | |||
212 | class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
213 | Enc64<outs, ins, asm, pattern> { | ||
214 | |||
215 | bits<8> VDATA; | ||
216 | bits<12> OFFSET; | ||
217 | bits<1> OFFEN; | ||
218 | bits<1> IDXEN; | ||
219 | bits<1> GLC; | ||
220 | bits<1> ADDR64; | ||
221 | bits<1> LDS; | ||
222 | bits<8> VADDR; | ||
223 | bits<5> SRSRC; | ||
224 | bits<1> SLC; | ||
225 | bits<1> TFE; | ||
226 | bits<8> SOFFSET; | ||
227 | |||
228 | let Inst{11-0} = OFFSET; | ||
229 | let Inst{12} = OFFEN; | ||
230 | let Inst{13} = IDXEN; | ||
231 | let Inst{14} = GLC; | ||
232 | let Inst{15} = ADDR64; | ||
233 | let Inst{16} = LDS; | ||
234 | let Inst{24-18} = op; | ||
235 | let Inst{31-26} = 0x38; //encoding | ||
236 | let Inst{39-32} = VADDR; | ||
237 | let Inst{47-40} = VDATA; | ||
238 | let Inst{52-48} = SRSRC; | ||
239 | let Inst{54} = SLC; | ||
240 | let Inst{55} = TFE; | ||
241 | let Inst{63-56} = SOFFSET; | ||
242 | let EncodingType = 4; //SIInstrEncodingType::MUBUF | ||
243 | |||
244 | let NeedWait = 1; | ||
245 | let usesCustomInserter = 1; | ||
246 | let neverHasSideEffects = 1; | ||
247 | } | ||
248 | } // End Uses = [EXEC] | ||
249 | |||
250 | class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
251 | Enc32<outs, ins, asm, pattern> { | ||
252 | |||
253 | bits<7> SDST; | ||
254 | bits<15> PTR; | ||
255 | bits<8> OFFSET = PTR{7-0}; | ||
256 | bits<1> IMM = PTR{8}; | ||
257 | bits<6> SBASE = PTR{14-9}; | ||
258 | |||
259 | let Inst{7-0} = OFFSET; | ||
260 | let Inst{8} = IMM; | ||
261 | let Inst{14-9} = SBASE; | ||
262 | let Inst{21-15} = SDST; | ||
263 | let Inst{26-22} = op; | ||
264 | let Inst{31-27} = 0x18; //encoding | ||
265 | let EncodingType = 5; //SIInstrEncodingType::SMRD | ||
266 | |||
267 | let NeedWait = 1; | ||
268 | let usesCustomInserter = 1; | ||
269 | } | ||
270 | |||
271 | class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
272 | Enc32<outs, ins, asm, pattern> { | ||
273 | |||
274 | bits<7> SDST; | ||
275 | bits<8> SSRC0; | ||
276 | |||
277 | let Inst{7-0} = SSRC0; | ||
278 | let Inst{15-8} = op; | ||
279 | let Inst{22-16} = SDST; | ||
280 | let Inst{31-23} = 0x17d; //encoding; | ||
281 | let EncodingType = 6; //SIInstrEncodingType::SOP1 | ||
282 | } | ||
283 | |||
284 | class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
285 | Enc32 <outs, ins, asm, pattern> { | ||
286 | |||
287 | bits<7> SDST; | ||
288 | bits<8> SSRC0; | ||
289 | bits<8> SSRC1; | ||
290 | |||
291 | let Inst{7-0} = SSRC0; | ||
292 | let Inst{15-8} = SSRC1; | ||
293 | let Inst{22-16} = SDST; | ||
294 | let Inst{29-23} = op; | ||
295 | let Inst{31-30} = 0x2; // encoding | ||
296 | let EncodingType = 7; // SIInstrEncodingType::SOP2 | ||
297 | } | ||
298 | |||
299 | class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
300 | Enc32<outs, ins, asm, pattern> { | ||
301 | |||
302 | bits<8> SSRC0; | ||
303 | bits<8> SSRC1; | ||
304 | |||
305 | let Inst{7-0} = SSRC0; | ||
306 | let Inst{15-8} = SSRC1; | ||
307 | let Inst{22-16} = op; | ||
308 | let Inst{31-23} = 0x17e; | ||
309 | let EncodingType = 8; // SIInstrEncodingType::SOPC | ||
310 | |||
311 | let DisableEncoding = "$dst"; | ||
312 | } | ||
313 | |||
314 | class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
315 | Enc32 <outs, ins , asm, pattern> { | ||
316 | |||
317 | bits <7> SDST; | ||
318 | bits <16> SIMM16; | ||
319 | |||
320 | let Inst{15-0} = SIMM16; | ||
321 | let Inst{22-16} = SDST; | ||
322 | let Inst{27-23} = op; | ||
323 | let Inst{31-28} = 0xb; //encoding | ||
324 | let EncodingType = 9; // SIInstrEncodingType::SOPK | ||
325 | } | ||
326 | |||
327 | class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 < | ||
328 | (outs), | ||
329 | ins, | ||
330 | asm, | ||
331 | pattern > { | ||
332 | |||
333 | bits <16> SIMM16; | ||
334 | |||
335 | let Inst{15-0} = SIMM16; | ||
336 | let Inst{22-16} = op; | ||
337 | let Inst{31-23} = 0x17f; // encoding | ||
338 | let EncodingType = 10; // SIInstrEncodingType::SOPP | ||
339 | } | ||
340 | |||
341 | |||
342 | let Uses = [EXEC] in { | ||
343 | class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
344 | Enc32 <outs, ins, asm, pattern> { | ||
345 | |||
346 | bits<8> VDST; | ||
347 | bits<8> VSRC; | ||
348 | bits<2> ATTRCHAN; | ||
349 | bits<6> ATTR; | ||
350 | |||
351 | let Inst{7-0} = VSRC; | ||
352 | let Inst{9-8} = ATTRCHAN; | ||
353 | let Inst{15-10} = ATTR; | ||
354 | let Inst{17-16} = op; | ||
355 | let Inst{25-18} = VDST; | ||
356 | let Inst{31-26} = 0x32; // encoding | ||
357 | let EncodingType = 11; // SIInstrEncodingType::VINTRP | ||
358 | |||
359 | let neverHasSideEffects = 1; | ||
360 | } | ||
361 | |||
362 | class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
363 | Enc32 <outs, ins, asm, pattern> { | ||
364 | |||
365 | bits<8> VDST; | ||
366 | bits<9> SRC0; | ||
367 | |||
368 | let Inst{8-0} = SRC0; | ||
369 | let Inst{16-9} = op; | ||
370 | let Inst{24-17} = VDST; | ||
371 | let Inst{31-25} = 0x3f; //encoding | ||
372 | |||
373 | let EncodingType = 12; // SIInstrEncodingType::VOP1 | ||
374 | let PostEncoderMethod = "VOPPostEncode"; | ||
375 | } | ||
376 | |||
377 | class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
378 | Enc32 <outs, ins, asm, pattern> { | ||
379 | |||
380 | bits<8> VDST; | ||
381 | bits<9> SRC0; | ||
382 | bits<8> VSRC1; | ||
383 | |||
384 | let Inst{8-0} = SRC0; | ||
385 | let Inst{16-9} = VSRC1; | ||
386 | let Inst{24-17} = VDST; | ||
387 | let Inst{30-25} = op; | ||
388 | let Inst{31} = 0x0; //encoding | ||
389 | |||
390 | let EncodingType = 13; // SIInstrEncodingType::VOP2 | ||
391 | let PostEncoderMethod = "VOPPostEncode"; | ||
392 | } | ||
393 | |||
394 | class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : | ||
395 | Enc64 <outs, ins, asm, pattern> { | ||
396 | |||
397 | bits<8> VDST; | ||
398 | bits<9> SRC0; | ||
399 | bits<9> SRC1; | ||
400 | bits<9> SRC2; | ||
401 | bits<3> ABS; | ||
402 | bits<1> CLAMP; | ||
403 | bits<2> OMOD; | ||
404 | bits<3> NEG; | ||
405 | |||
406 | let Inst{7-0} = VDST; | ||
407 | let Inst{10-8} = ABS; | ||
408 | let Inst{11} = CLAMP; | ||
409 | let Inst{25-17} = op; | ||
410 | let Inst{31-26} = 0x34; //encoding | ||
411 | let Inst{40-32} = SRC0; | ||
412 | let Inst{49-41} = SRC1; | ||
413 | let Inst{58-50} = SRC2; | ||
414 | let Inst{60-59} = OMOD; | ||
415 | let Inst{63-61} = NEG; | ||
416 | |||
417 | let EncodingType = 14; // SIInstrEncodingType::VOP3 | ||
418 | let PostEncoderMethod = "VOPPostEncode"; | ||
419 | } | ||
420 | |||
421 | class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> : | ||
422 | Enc32 <(outs VCCReg:$dst), ins, asm, pattern> { | ||
423 | |||
424 | bits<9> SRC0; | ||
425 | bits<8> VSRC1; | ||
426 | |||
427 | let Inst{8-0} = SRC0; | ||
428 | let Inst{16-9} = VSRC1; | ||
429 | let Inst{24-17} = op; | ||
430 | let Inst{31-25} = 0x3e; | ||
431 | |||
432 | let EncodingType = 15; //SIInstrEncodingType::VOPC | ||
433 | let PostEncoderMethod = "VOPPostEncode"; | ||
434 | let DisableEncoding = "$dst"; | ||
435 | } | ||
436 | } // End Uses = [EXEC] | ||
437 | |||
438 | class MIMG_Load_Helper <bits<7> op, string asm> : MIMG < | ||
439 | op, | ||
440 | (outs VReg_128:$vdata), | ||
441 | (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, | ||
442 | i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr, | ||
443 | GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp), | ||
444 | asm, | ||
445 | [] | ||
446 | >; | ||
447 | |||
448 | class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF < | ||
449 | op, | ||
450 | (outs regClass:$dst), | ||
451 | (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, | ||
452 | i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc, | ||
453 | i1imm:$tfe, SReg_32:$soffset), | ||
454 | asm, | ||
455 | []> { | ||
456 | let mayLoad = 1; | ||
457 | } | ||
458 | |||
459 | class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF < | ||
460 | op, | ||
461 | (outs regClass:$dst), | ||
462 | (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, | ||
463 | i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, | ||
464 | i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), | ||
465 | asm, | ||
466 | []> { | ||
467 | let mayLoad = 1; | ||
468 | } | ||
469 | |||
470 | class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF < | ||
471 | op, | ||
472 | (outs), | ||
473 | (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, | ||
474 | i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, | ||
475 | GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), | ||
476 | asm, | ||
477 | []> { | ||
478 | let mayStore = 1; | ||
479 | } | ||
480 | |||
481 | multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass, | ||
482 | ValueType vt> { | ||
483 | def _IMM : SMRD < | ||
484 | op, | ||
485 | (outs dstClass:$dst), | ||
486 | (ins SMRDmemri:$src0), | ||
487 | asm, | ||
488 | [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))] | ||
489 | >; | ||
490 | |||
491 | def _SGPR : SMRD < | ||
492 | op, | ||
493 | (outs dstClass:$dst), | ||
494 | (ins SMRDmemrr:$src0), | ||
495 | asm, | ||
496 | [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))] | ||
497 | >; | ||
498 | } | ||
499 | |||
500 | multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> { | ||
501 | defm _F32 : SMRD_Helper <op, asm, dstClass, f32>; | ||
502 | defm _I32 : SMRD_Helper <op, asm, dstClass, i32>; | ||
503 | } | ||
504 | |||
505 | include "SIInstrFormats.td" | ||
506 | include "SIInstructions.td" | ||
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td deleted file mode 100644 index f9bdc63e3e5..00000000000 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ /dev/null | |||
@@ -1,1180 +0,0 @@ | |||
1 | //===-- SIInstructions.td - SI Instruction Defintions ---------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | |||
10 | def isSI : Predicate<"Subtarget.device()" | ||
11 | "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">; | ||
12 | |||
13 | let Predicates = [isSI] in { | ||
14 | |||
15 | let neverHasSideEffects = 1 in { | ||
16 | def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>; | ||
17 | def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>; | ||
18 | def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>; | ||
19 | def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>; | ||
20 | def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>; | ||
21 | def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>; | ||
22 | def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; | ||
23 | def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; | ||
24 | def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>; | ||
25 | def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; | ||
26 | } // End neverHasSideEffects = 1 | ||
27 | ////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>; | ||
28 | ////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>; | ||
29 | ////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>; | ||
30 | ////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>; | ||
31 | ////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>; | ||
32 | ////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>; | ||
33 | ////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>; | ||
34 | ////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>; | ||
35 | //def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>; | ||
36 | //def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>; | ||
37 | def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>; | ||
38 | //def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>; | ||
39 | //def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>; | ||
40 | //def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>; | ||
41 | ////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>; | ||
42 | ////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>; | ||
43 | ////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>; | ||
44 | ////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>; | ||
45 | def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>; | ||
46 | def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>; | ||
47 | def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>; | ||
48 | def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>; | ||
49 | def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>; | ||
50 | def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>; | ||
51 | def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>; | ||
52 | ////def S_ANDN2_SAVEEXEC_B64 : SOP1_ANDN2 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>; | ||
53 | ////def S_ORN2_SAVEEXEC_B64 : SOP1_ORN2 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>; | ||
54 | def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>; | ||
55 | def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>; | ||
56 | def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>; | ||
57 | def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>; | ||
58 | def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>; | ||
59 | def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>; | ||
60 | def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>; | ||
61 | def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>; | ||
62 | def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>; | ||
63 | //def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>; | ||
64 | def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>; | ||
65 | def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>; | ||
66 | def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>; | ||
67 | def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>; | ||
68 | def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>; | ||
69 | |||
70 | /* | ||
71 | This instruction is disabled for now until we can figure out how to teach | ||
72 | the instruction selector to correctly use the S_CMP* vs V_CMP* | ||
73 | instructions. | ||
74 | |||
75 | When this instruction is enabled the code generator sometimes produces this | ||
76 | invalid sequence: | ||
77 | |||
78 | SCC = S_CMPK_EQ_I32 SGPR0, imm | ||
79 | VCC = COPY SCC | ||
80 | VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1 | ||
81 | |||
82 | def S_CMPK_EQ_I32 : SOPK < | ||
83 | 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1), | ||
84 | "S_CMPK_EQ_I32", | ||
85 | [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))] | ||
86 | >; | ||
87 | */ | ||
88 | |||
89 | def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; | ||
90 | def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; | ||
91 | def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>; | ||
92 | def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>; | ||
93 | def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>; | ||
94 | def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>; | ||
95 | def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>; | ||
96 | def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>; | ||
97 | def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>; | ||
98 | def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; | ||
99 | def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; | ||
100 | def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; | ||
101 | def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>; | ||
102 | //def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>; | ||
103 | def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>; | ||
104 | def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>; | ||
105 | def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; | ||
106 | //def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>; | ||
107 | //def EXP : EXP_ <0x00000000, "EXP", []>; | ||
108 | |||
109 | defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>; | ||
110 | defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", | ||
111 | [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT))] | ||
112 | >; | ||
113 | defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", | ||
114 | [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ))] | ||
115 | >; | ||
116 | defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", | ||
117 | [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE))] | ||
118 | >; | ||
119 | defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", | ||
120 | [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT))] | ||
121 | >; | ||
122 | defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", | ||
123 | [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE))] | ||
124 | >; | ||
125 | defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", | ||
126 | [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE))] | ||
127 | >; | ||
128 | defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>; | ||
129 | defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>; | ||
130 | defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>; | ||
131 | defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>; | ||
132 | defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>; | ||
133 | defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>; | ||
134 | defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", | ||
135 | [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE))] | ||
136 | >; | ||
137 | defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>; | ||
138 | defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>; | ||
139 | defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>; | ||
140 | defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>; | ||
141 | defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>; | ||
142 | defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>; | ||
143 | defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>; | ||
144 | defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>; | ||
145 | defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>; | ||
146 | defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>; | ||
147 | defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>; | ||
148 | defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>; | ||
149 | defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>; | ||
150 | defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>; | ||
151 | defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>; | ||
152 | defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>; | ||
153 | defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>; | ||
154 | defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>; | ||
155 | defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>; | ||
156 | defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>; | ||
157 | defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>; | ||
158 | defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>; | ||
159 | defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>; | ||
160 | defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>; | ||
161 | defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>; | ||
162 | defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>; | ||
163 | defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>; | ||
164 | defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>; | ||
165 | defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>; | ||
166 | defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>; | ||
167 | defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>; | ||
168 | defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>; | ||
169 | defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>; | ||
170 | defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>; | ||
171 | defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>; | ||
172 | defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>; | ||
173 | defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>; | ||
174 | defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>; | ||
175 | defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>; | ||
176 | defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>; | ||
177 | defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>; | ||
178 | defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>; | ||
179 | defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>; | ||
180 | defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>; | ||
181 | defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>; | ||
182 | defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>; | ||
183 | defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>; | ||
184 | defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>; | ||
185 | defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>; | ||
186 | defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>; | ||
187 | defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>; | ||
188 | defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>; | ||
189 | defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>; | ||
190 | defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>; | ||
191 | defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>; | ||
192 | defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>; | ||
193 | defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>; | ||
194 | defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>; | ||
195 | defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>; | ||
196 | defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>; | ||
197 | defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>; | ||
198 | defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>; | ||
199 | defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>; | ||
200 | defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>; | ||
201 | defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>; | ||
202 | defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>; | ||
203 | defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>; | ||
204 | defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>; | ||
205 | defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>; | ||
206 | defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>; | ||
207 | defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>; | ||
208 | defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>; | ||
209 | defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>; | ||
210 | defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>; | ||
211 | defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>; | ||
212 | defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>; | ||
213 | defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>; | ||
214 | defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>; | ||
215 | defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>; | ||
216 | defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>; | ||
217 | defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>; | ||
218 | defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>; | ||
219 | defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>; | ||
220 | defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>; | ||
221 | defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>; | ||
222 | defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>; | ||
223 | defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>; | ||
224 | defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>; | ||
225 | defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>; | ||
226 | defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>; | ||
227 | defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>; | ||
228 | defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>; | ||
229 | defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>; | ||
230 | defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>; | ||
231 | defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>; | ||
232 | defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>; | ||
233 | defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>; | ||
234 | defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>; | ||
235 | defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>; | ||
236 | defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>; | ||
237 | defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>; | ||
238 | defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>; | ||
239 | defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>; | ||
240 | defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>; | ||
241 | defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>; | ||
242 | defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>; | ||
243 | defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>; | ||
244 | defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>; | ||
245 | defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>; | ||
246 | defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>; | ||
247 | defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>; | ||
248 | defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>; | ||
249 | defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>; | ||
250 | defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>; | ||
251 | defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>; | ||
252 | defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", | ||
253 | [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETLT))] | ||
254 | >; | ||
255 | defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", | ||
256 | [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETEQ))] | ||
257 | >; | ||
258 | defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", | ||
259 | [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETLE))] | ||
260 | >; | ||
261 | defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", | ||
262 | [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETGT))] | ||
263 | >; | ||
264 | defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", | ||
265 | [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETNE))] | ||
266 | >; | ||
267 | defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", | ||
268 | [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETGE))] | ||
269 | >; | ||
270 | defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>; | ||
271 | defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>; | ||
272 | defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>; | ||
273 | defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>; | ||
274 | defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>; | ||
275 | defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>; | ||
276 | defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>; | ||
277 | defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>; | ||
278 | defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>; | ||
279 | defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>; | ||
280 | defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>; | ||
281 | defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>; | ||
282 | defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>; | ||
283 | defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>; | ||
284 | defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>; | ||
285 | defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>; | ||
286 | defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>; | ||
287 | defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>; | ||
288 | defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>; | ||
289 | defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>; | ||
290 | defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>; | ||
291 | defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>; | ||
292 | defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>; | ||
293 | defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>; | ||
294 | defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>; | ||
295 | defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>; | ||
296 | defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>; | ||
297 | defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>; | ||
298 | defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>; | ||
299 | defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>; | ||
300 | defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>; | ||
301 | defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>; | ||
302 | defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>; | ||
303 | defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>; | ||
304 | defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>; | ||
305 | defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>; | ||
306 | defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>; | ||
307 | defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>; | ||
308 | defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>; | ||
309 | defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>; | ||
310 | defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>; | ||
311 | defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>; | ||
312 | defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>; | ||
313 | defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>; | ||
314 | defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>; | ||
315 | defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>; | ||
316 | defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>; | ||
317 | defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>; | ||
318 | defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>; | ||
319 | defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>; | ||
320 | defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>; | ||
321 | defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>; | ||
322 | defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>; | ||
323 | defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>; | ||
324 | defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>; | ||
325 | defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>; | ||
326 | defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>; | ||
327 | defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>; | ||
328 | defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>; | ||
329 | defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>; | ||
330 | defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>; | ||
331 | //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; | ||
332 | //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; | ||
333 | //def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; | ||
334 | def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>; | ||
335 | //def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>; | ||
336 | //def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>; | ||
337 | //def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>; | ||
338 | //def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>; | ||
339 | //def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>; | ||
340 | //def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; | ||
341 | //def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; | ||
342 | //def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; | ||
343 | //def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>; | ||
344 | //def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>; | ||
345 | //def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>; | ||
346 | //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; | ||
347 | //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; | ||
348 | //def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; | ||
349 | //def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>; | ||
350 | //def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>; | ||
351 | //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; | ||
352 | //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; | ||
353 | //def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>; | ||
354 | //def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>; | ||
355 | //def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>; | ||
356 | //def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>; | ||
357 | //def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>; | ||
358 | //def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>; | ||
359 | //def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>; | ||
360 | //def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>; | ||
361 | //def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>; | ||
362 | //def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>; | ||
363 | //def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>; | ||
364 | //def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>; | ||
365 | //def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>; | ||
366 | //def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>; | ||
367 | //def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>; | ||
368 | //def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>; | ||
369 | //def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>; | ||
370 | //def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>; | ||
371 | //def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>; | ||
372 | //def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>; | ||
373 | //def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>; | ||
374 | //def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>; | ||
375 | //def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>; | ||
376 | //def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>; | ||
377 | //def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>; | ||
378 | //def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>; | ||
379 | //def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>; | ||
380 | //def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>; | ||
381 | //def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>; | ||
382 | //def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>; | ||
383 | //def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>; | ||
384 | //def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>; | ||
385 | //def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>; | ||
386 | //def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>; | ||
387 | //def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>; | ||
388 | //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>; | ||
389 | //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>; | ||
390 | def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>; | ||
391 | //def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>; | ||
392 | //def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>; | ||
393 | //def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; | ||
394 | //def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; | ||
395 | |||
396 | defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>; | ||
397 | |||
398 | //def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; | ||
399 | defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>; | ||
400 | defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>; | ||
401 | //def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; | ||
402 | //def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; | ||
403 | //def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; | ||
404 | //def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>; | ||
405 | //def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; | ||
406 | //def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; | ||
407 | |||
408 | //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; | ||
409 | //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; | ||
410 | //def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; | ||
411 | //def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>; | ||
412 | //def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; | ||
413 | //def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>; | ||
414 | //def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>; | ||
415 | //def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>; | ||
416 | //def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>; | ||
417 | //def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>; | ||
418 | //def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>; | ||
419 | //def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>; | ||
420 | //def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>; | ||
421 | //def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>; | ||
422 | //def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>; | ||
423 | //def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>; | ||
424 | //def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>; | ||
425 | //def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>; | ||
426 | //def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>; | ||
427 | //def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>; | ||
428 | //def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>; | ||
429 | //def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>; | ||
430 | //def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>; | ||
431 | //def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>; | ||
432 | //def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>; | ||
433 | //def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>; | ||
434 | //def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>; | ||
435 | //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>; | ||
436 | //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>; | ||
437 | //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>; | ||
438 | def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">; | ||
439 | //def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>; | ||
440 | //def IMAGE_SAMPLE_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_D", 0x00000022>; | ||
441 | //def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>; | ||
442 | //def IMAGE_SAMPLE_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_L", 0x00000024>; | ||
443 | //def IMAGE_SAMPLE_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_B", 0x00000025>; | ||
444 | //def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; | ||
445 | //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; | ||
446 | //def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>; | ||
447 | //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; | ||
448 | //def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>; | ||
449 | //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; | ||
450 | //def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>; | ||
451 | //def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>; | ||
452 | //def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; | ||
453 | //def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; | ||
454 | //def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; | ||
455 | //def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>; | ||
456 | //def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>; | ||
457 | //def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>; | ||
458 | //def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>; | ||
459 | //def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>; | ||
460 | //def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>; | ||
461 | //def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>; | ||
462 | //def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>; | ||
463 | //def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>; | ||
464 | //def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>; | ||
465 | //def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>; | ||
466 | //def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>; | ||
467 | //def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>; | ||
468 | //def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>; | ||
469 | //def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>; | ||
470 | //def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>; | ||
471 | //def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>; | ||
472 | //def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>; | ||
473 | //def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>; | ||
474 | //def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>; | ||
475 | //def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>; | ||
476 | //def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>; | ||
477 | //def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>; | ||
478 | //def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>; | ||
479 | //def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>; | ||
480 | //def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>; | ||
481 | //def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>; | ||
482 | //def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>; | ||
483 | //def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>; | ||
484 | //def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>; | ||
485 | //def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>; | ||
486 | //def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>; | ||
487 | //def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>; | ||
488 | //def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>; | ||
489 | //def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>; | ||
490 | //def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>; | ||
491 | //def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>; | ||
492 | //def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>; | ||
493 | //def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>; | ||
494 | //def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>; | ||
495 | //def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>; | ||
496 | //def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>; | ||
497 | //def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>; | ||
498 | //def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>; | ||
499 | //def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>; | ||
500 | //def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>; | ||
501 | //def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>; | ||
502 | //def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>; | ||
503 | //def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>; | ||
504 | //def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>; | ||
505 | //def V_NOP : VOP1_ <0x00000000, "V_NOP", []>; | ||
506 | |||
507 | let neverHasSideEffects = 1 in { | ||
508 | defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; | ||
509 | } // End neverHasSideEffects | ||
510 | defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; | ||
511 | //defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; | ||
512 | //defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; | ||
513 | defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", | ||
514 | [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))] | ||
515 | >; | ||
516 | //defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; | ||
517 | //defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; | ||
518 | defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", | ||
519 | [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))] | ||
520 | >; | ||
521 | defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; | ||
522 | ////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; | ||
523 | //defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>; | ||
524 | //defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>; | ||
525 | //defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>; | ||
526 | //defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>; | ||
527 | //defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>; | ||
528 | //defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>; | ||
529 | //defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>; | ||
530 | //defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>; | ||
531 | //defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>; | ||
532 | //defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>; | ||
533 | //defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>; | ||
534 | //defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>; | ||
535 | defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", | ||
536 | [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))] | ||
537 | >; | ||
538 | defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>; | ||
539 | defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>; | ||
540 | defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", | ||
541 | [(set VReg_32:$dst, (frint AllReg_32:$src0))] | ||
542 | >; | ||
543 | defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32", | ||
544 | [(set VReg_32:$dst, (ffloor AllReg_32:$src0))] | ||
545 | >; | ||
546 | defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", | ||
547 | [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))] | ||
548 | >; | ||
549 | defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; | ||
550 | defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>; | ||
551 | defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; | ||
552 | defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; | ||
553 | defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", | ||
554 | [(set VReg_32:$dst, (int_AMDGPU_rcp AllReg_32:$src0))] | ||
555 | >; | ||
556 | defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; | ||
557 | defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; | ||
558 | defm V_RSQ_LEGACY_F32 : VOP1_32 < | ||
559 | 0x0000002d, "V_RSQ_LEGACY_F32", | ||
560 | [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))] | ||
561 | >; | ||
562 | defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; | ||
563 | defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; | ||
564 | defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; | ||
565 | defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; | ||
566 | defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; | ||
567 | defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>; | ||
568 | defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>; | ||
569 | defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>; | ||
570 | defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>; | ||
571 | defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>; | ||
572 | defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>; | ||
573 | defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>; | ||
574 | defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>; | ||
575 | defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>; | ||
576 | //defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>; | ||
577 | defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>; | ||
578 | defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>; | ||
579 | //defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>; | ||
580 | defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>; | ||
581 | //def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>; | ||
582 | defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>; | ||
583 | defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>; | ||
584 | defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>; | ||
585 | |||
586 | def V_INTERP_P1_F32 : VINTRP < | ||
587 | 0x00000000, | ||
588 | (outs VReg_32:$dst), | ||
589 | (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), | ||
590 | "V_INTERP_P1_F32", | ||
591 | []> { | ||
592 | let DisableEncoding = "$m0"; | ||
593 | } | ||
594 | |||
595 | def V_INTERP_P2_F32 : VINTRP < | ||
596 | 0x00000001, | ||
597 | (outs VReg_32:$dst), | ||
598 | (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), | ||
599 | "V_INTERP_P2_F32", | ||
600 | []> { | ||
601 | |||
602 | let Constraints = "$src0 = $dst"; | ||
603 | let DisableEncoding = "$src0,$m0"; | ||
604 | |||
605 | } | ||
606 | |||
607 | def V_INTERP_MOV_F32 : VINTRP < | ||
608 | 0x00000002, | ||
609 | (outs VReg_32:$dst), | ||
610 | (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), | ||
611 | "V_INTERP_MOV_F32", | ||
612 | []> { | ||
613 | let VSRC = 0; | ||
614 | let DisableEncoding = "$m0"; | ||
615 | } | ||
616 | |||
617 | //def S_NOP : SOPP_ <0x00000000, "S_NOP", []>; | ||
618 | |||
619 | let isTerminator = 1 in { | ||
620 | |||
621 | def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM", | ||
622 | [(IL_retflag)]> { | ||
623 | let SIMM16 = 0; | ||
624 | let isBarrier = 1; | ||
625 | let hasCtrlDep = 1; | ||
626 | } | ||
627 | |||
628 | let isBranch = 1 in { | ||
629 | def S_BRANCH : SOPP < | ||
630 | 0x00000002, (ins brtarget:$target), "S_BRANCH", | ||
631 | [] | ||
632 | >; | ||
633 | |||
634 | let DisableEncoding = "$scc" in { | ||
635 | def S_CBRANCH_SCC0 : SOPP < | ||
636 | 0x00000004, (ins brtarget:$target, SCCReg:$scc), | ||
637 | "S_CBRANCH_SCC0", [] | ||
638 | >; | ||
639 | def S_CBRANCH_SCC1 : SOPP < | ||
640 | 0x00000005, (ins brtarget:$target, SCCReg:$scc), | ||
641 | "S_CBRANCH_SCC1", | ||
642 | [] | ||
643 | >; | ||
644 | } // End DisableEncoding = "$scc" | ||
645 | |||
646 | def S_CBRANCH_VCCZ : SOPP < | ||
647 | 0x00000006, (ins brtarget:$target, VCCReg:$vcc), | ||
648 | "S_CBRANCH_VCCZ", | ||
649 | [] | ||
650 | >; | ||
651 | def S_CBRANCH_VCCNZ : SOPP < | ||
652 | 0x00000007, (ins brtarget:$target, VCCReg:$vcc), | ||
653 | "S_CBRANCH_VCCNZ", | ||
654 | [] | ||
655 | >; | ||
656 | |||
657 | let DisableEncoding = "$exec" in { | ||
658 | def S_CBRANCH_EXECZ : SOPP < | ||
659 | 0x00000008, (ins brtarget:$target, EXECReg:$exec), | ||
660 | "S_CBRANCH_EXECZ", | ||
661 | [] | ||
662 | >; | ||
663 | def S_CBRANCH_EXECNZ : SOPP < | ||
664 | 0x00000009, (ins brtarget:$target, EXECReg:$exec), | ||
665 | "S_CBRANCH_EXECNZ", | ||
666 | [] | ||
667 | >; | ||
668 | } // End DisableEncoding = "$exec" | ||
669 | |||
670 | |||
671 | } // End isBranch = 1 | ||
672 | } // End isTerminator = 1 | ||
673 | |||
674 | //def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>; | ||
675 | def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", | ||
676 | [] | ||
677 | >; | ||
678 | //def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>; | ||
679 | //def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>; | ||
680 | //def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; | ||
681 | //def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>; | ||
682 | //def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>; | ||
683 | //def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>; | ||
684 | //def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>; | ||
685 | //def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>; | ||
686 | //def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>; | ||
687 | //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; | ||
688 | |||
689 | /* XXX: No VOP3 version of this instruction yet */ | ||
690 | def V_CNDMASK_B32 : VOP2 <0x00000000, (outs VReg_32:$dst), | ||
691 | (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32", | ||
692 | [(set (i32 VReg_32:$dst), | ||
693 | (select VCCReg:$vcc, VReg_32:$src1, AllReg_32:$src0))] > { | ||
694 | |||
695 | let DisableEncoding = "$vcc"; | ||
696 | } | ||
697 | |||
698 | //f32 pattern for V_CNDMASK_B32 | ||
699 | def : Pat < | ||
700 | (f32 (select VCCReg:$vcc, VReg_32:$src0, AllReg_32:$src1)), | ||
701 | (V_CNDMASK_B32 AllReg_32:$src1, VReg_32:$src0, VCCReg:$vcc) | ||
702 | >; | ||
703 | |||
704 | defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; | ||
705 | defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; | ||
706 | |||
707 | defm V_ADD_F32 : VOP2_32 < | ||
708 | 0x00000003, "V_ADD_F32", | ||
709 | [(set VReg_32:$dst, (fadd AllReg_32:$src0, VReg_32:$src1))] | ||
710 | >; | ||
711 | |||
712 | defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", | ||
713 | [(set VReg_32:$dst, (fsub AllReg_32:$src0, VReg_32:$src1))] | ||
714 | >; | ||
715 | defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; | ||
716 | defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; | ||
717 | defm V_MUL_LEGACY_F32 : VOP2_32 < | ||
718 | 0x00000007, "V_MUL_LEGACY_F32", | ||
719 | [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))] | ||
720 | >; | ||
721 | |||
722 | defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", | ||
723 | [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))] | ||
724 | >; | ||
725 | //defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>; | ||
726 | //defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>; | ||
727 | //defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>; | ||
728 | //defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; | ||
729 | defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", | ||
730 | [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))] | ||
731 | >; | ||
732 | |||
733 | defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", | ||
734 | [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))] | ||
735 | >; | ||
736 | defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; | ||
737 | defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; | ||
738 | defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; | ||
739 | defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; | ||
740 | defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; | ||
741 | defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; | ||
742 | defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; | ||
743 | defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; | ||
744 | defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; | ||
745 | defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; | ||
746 | defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; | ||
747 | defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; | ||
748 | defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", | ||
749 | [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))] | ||
750 | >; | ||
751 | defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", | ||
752 | [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))] | ||
753 | >; | ||
754 | defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", | ||
755 | [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))] | ||
756 | >; | ||
757 | defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>; | ||
758 | defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; | ||
759 | defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; | ||
760 | defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; | ||
761 | //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; | ||
762 | //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; | ||
763 | //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; | ||
764 | let Defs = [VCC] in { // Carry-out goes to VCC | ||
765 | defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32", | ||
766 | [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] | ||
767 | >; | ||
768 | defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32", | ||
769 | [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] | ||
770 | >; | ||
771 | } // End Defs = [VCC] | ||
772 | defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>; | ||
773 | defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>; | ||
774 | defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>; | ||
775 | defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>; | ||
776 | defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; | ||
777 | ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; | ||
778 | ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; | ||
779 | ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>; | ||
780 | defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", | ||
781 | [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))] | ||
782 | >; | ||
783 | ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; | ||
784 | ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; | ||
785 | def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>; | ||
786 | def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>; | ||
787 | def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>; | ||
788 | def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>; | ||
789 | def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>; | ||
790 | def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>; | ||
791 | def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>; | ||
792 | def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>; | ||
793 | def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>; | ||
794 | def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>; | ||
795 | def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>; | ||
796 | def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>; | ||
797 | ////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>; | ||
798 | ////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>; | ||
799 | ////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>; | ||
800 | ////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>; | ||
801 | //def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>; | ||
802 | |||
803 | let neverHasSideEffects = 1 in { | ||
804 | |||
805 | def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>; | ||
806 | def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>; | ||
807 | //def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>; | ||
808 | //def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>; | ||
809 | |||
810 | } // End neverHasSideEffects | ||
811 | def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; | ||
812 | def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; | ||
813 | def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; | ||
814 | def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; | ||
815 | def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; | ||
816 | def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; | ||
817 | def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>; | ||
818 | def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>; | ||
819 | def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>; | ||
820 | //def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; | ||
821 | def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>; | ||
822 | def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>; | ||
823 | def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; | ||
824 | ////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>; | ||
825 | ////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>; | ||
826 | ////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>; | ||
827 | ////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>; | ||
828 | ////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>; | ||
829 | ////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>; | ||
830 | ////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>; | ||
831 | ////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>; | ||
832 | ////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>; | ||
833 | //def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>; | ||
834 | //def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>; | ||
835 | //def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>; | ||
836 | def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; | ||
837 | ////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>; | ||
838 | def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; | ||
839 | def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; | ||
840 | def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>; | ||
841 | def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>; | ||
842 | def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>; | ||
843 | def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; | ||
844 | def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; | ||
845 | def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; | ||
846 | def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; | ||
847 | def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; | ||
848 | def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; | ||
849 | def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; | ||
850 | def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; | ||
851 | def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; | ||
852 | def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; | ||
853 | def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; | ||
854 | def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; | ||
855 | def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; | ||
856 | //def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>; | ||
857 | //def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>; | ||
858 | //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>; | ||
859 | def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; | ||
860 | def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>; | ||
861 | def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>; | ||
862 | def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>; | ||
863 | def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>; | ||
864 | def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>; | ||
865 | def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>; | ||
866 | def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>; | ||
867 | def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>; | ||
868 | def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>; | ||
869 | def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; | ||
870 | |||
871 | def S_CSELECT_B32 : SOP2 < | ||
872 | 0x0000000a, (outs SReg_32:$dst), | ||
873 | (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", | ||
874 | [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))] | ||
875 | >; | ||
876 | |||
877 | def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; | ||
878 | |||
879 | // f32 pattern for S_CSELECT_B32 | ||
880 | def : Pat < | ||
881 | (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)), | ||
882 | (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc) | ||
883 | >; | ||
884 | |||
885 | def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; | ||
886 | |||
887 | def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", | ||
888 | [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))] | ||
889 | >; | ||
890 | def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64", | ||
891 | [(set VCCReg:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))] | ||
892 | >; | ||
893 | def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; | ||
894 | def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; | ||
895 | def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; | ||
896 | def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>; | ||
897 | ////def S_ANDN2_B32 : SOP2_ANDN2 <0x00000014, "S_ANDN2_B32", []>; | ||
898 | ////def S_ANDN2_B64 : SOP2_ANDN2 <0x00000015, "S_ANDN2_B64", []>; | ||
899 | ////def S_ORN2_B32 : SOP2_ORN2 <0x00000016, "S_ORN2_B32", []>; | ||
900 | ////def S_ORN2_B64 : SOP2_ORN2 <0x00000017, "S_ORN2_B64", []>; | ||
901 | def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>; | ||
902 | def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>; | ||
903 | def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>; | ||
904 | def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>; | ||
905 | def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>; | ||
906 | def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>; | ||
907 | def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>; | ||
908 | def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>; | ||
909 | def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>; | ||
910 | def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>; | ||
911 | def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>; | ||
912 | def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>; | ||
913 | def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>; | ||
914 | def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>; | ||
915 | def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>; | ||
916 | def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>; | ||
917 | def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>; | ||
918 | def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>; | ||
919 | def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; | ||
920 | //def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; | ||
921 | def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; | ||
922 | |||
923 | class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI < | ||
924 | (outs VReg_32:$dst), | ||
925 | (ins immType:$src0), | ||
926 | "V_MOV_IMM", | ||
927 | [(set VReg_32:$dst, (immNode:$src0))] | ||
928 | >; | ||
929 | |||
930 | let isCodeGenOnly = 1, isPseudo = 1 in { | ||
931 | |||
932 | def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>; | ||
933 | def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>; | ||
934 | |||
935 | def S_MOV_IMM_I32 : InstSI < | ||
936 | (outs SReg_32:$dst), | ||
937 | (ins i32imm:$src0), | ||
938 | "S_MOV_IMM_I32", | ||
939 | [(set SReg_32:$dst, (imm:$src0))] | ||
940 | >; | ||
941 | |||
942 | // i64 immediates aren't really supported in hardware, but LLVM will use the i64 | ||
943 | // type for indices on load and store instructions. The pattern for | ||
944 | // S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits, | ||
945 | // which the hardware can handle. | ||
946 | def S_MOV_IMM_I64 : InstSI < | ||
947 | (outs SReg_64:$dst), | ||
948 | (ins i64imm:$src0), | ||
949 | "S_MOV_IMM_I64 $dst, $src0", | ||
950 | [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))] | ||
951 | >; | ||
952 | |||
953 | } // End isCodeGenOnly, isPseudo = 1 | ||
954 | |||
955 | class SI_LOAD_LITERAL<Operand ImmType> : | ||
956 | Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> { | ||
957 | |||
958 | bits<32> imm; | ||
959 | let Inst{31-0} = imm; | ||
960 | } | ||
961 | |||
962 | def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>; | ||
963 | def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>; | ||
964 | |||
965 | let isCodeGenOnly = 1, isPseudo = 1 in { | ||
966 | |||
967 | def SET_M0 : InstSI < | ||
968 | (outs SReg_32:$dst), | ||
969 | (ins i32imm:$src0), | ||
970 | "SET_M0", | ||
971 | [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))] | ||
972 | >; | ||
973 | |||
974 | def CONFIG_WRITE : InstSI < | ||
975 | (outs i32imm:$reg), | ||
976 | (ins i32imm:$val), | ||
977 | "CONFIG_WRITE $reg, $val", | ||
978 | [] > { | ||
979 | field bits<32> Inst = 0; | ||
980 | } | ||
981 | |||
982 | def LOAD_CONST : AMDGPUShaderInst < | ||
983 | (outs GPRF32:$dst), | ||
984 | (ins i32imm:$src), | ||
985 | "LOAD_CONST $dst, $src", | ||
986 | [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] | ||
987 | >; | ||
988 | |||
989 | let usesCustomInserter = 1 in { | ||
990 | |||
991 | def SI_V_CNDLT : InstSI < | ||
992 | (outs VReg_32:$dst), | ||
993 | (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2), | ||
994 | "SI_V_CNDLT $dst, $src0, $src1, $src2", | ||
995 | [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))] | ||
996 | >; | ||
997 | |||
998 | def SI_INTERP : InstSI < | ||
999 | (outs VReg_32:$dst), | ||
1000 | (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), | ||
1001 | "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params", | ||
1002 | [] | ||
1003 | >; | ||
1004 | |||
1005 | def SI_INTERP_CONST : InstSI < | ||
1006 | (outs VReg_32:$dst), | ||
1007 | (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), | ||
1008 | "SI_INTERP_CONST $dst, $attr_chan, $attr, $params", | ||
1009 | [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan, | ||
1010 | imm:$attr, SReg_32:$params))] | ||
1011 | >; | ||
1012 | |||
1013 | def SI_KIL : InstSI < | ||
1014 | (outs), | ||
1015 | (ins VReg_32:$src), | ||
1016 | "SI_KIL $src", | ||
1017 | [(int_AMDGPU_kill VReg_32:$src)] | ||
1018 | >; | ||
1019 | |||
1020 | def SI_WQM : InstSI < | ||
1021 | (outs), | ||
1022 | (ins), | ||
1023 | "SI_WQM", | ||
1024 | [(int_SI_wqm)] | ||
1025 | >; | ||
1026 | |||
1027 | } // end usesCustomInserter | ||
1028 | |||
1029 | // SI Psuedo branch instructions. These are used by the CFG structurizer pass | ||
1030 | // and should be lowered to ISA instructions prior to codegen. | ||
1031 | |||
1032 | let isBranch = 1, isTerminator = 1 in { | ||
1033 | def SI_IF_NZ : InstSI < | ||
1034 | (outs), | ||
1035 | (ins brtarget:$target, VCCReg:$vcc), | ||
1036 | "SI_BRANCH_NZ", | ||
1037 | [(IL_brcond bb:$target, VCCReg:$vcc)] | ||
1038 | >; | ||
1039 | |||
1040 | def SI_IF_Z : InstSI < | ||
1041 | (outs), | ||
1042 | (ins brtarget:$target, VCCReg:$vcc), | ||
1043 | "SI_BRANCH_Z", | ||
1044 | [] | ||
1045 | >; | ||
1046 | } // end isBranch = 1, isTerminator = 1 | ||
1047 | } // end IsCodeGenOnly, isPseudo | ||
1048 | |||
1049 | /* int_SI_vs_load_input */ | ||
1050 | def : Pat< | ||
1051 | (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, | ||
1052 | VReg_32:$buf_idx_vgpr), | ||
1053 | (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0, | ||
1054 | VReg_32:$buf_idx_vgpr, SReg_128:$tlst, | ||
1055 | 0, 0, (i32 SREG_LIT_0)) | ||
1056 | >; | ||
1057 | |||
1058 | /* int_SI_export */ | ||
1059 | def : Pat < | ||
1060 | (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, | ||
1061 | VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), | ||
1062 | (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, | ||
1063 | VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3) | ||
1064 | >; | ||
1065 | |||
1066 | /* int_SI_sample */ | ||
1067 | def : Pat < | ||
1068 | (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), | ||
1069 | (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, | ||
1070 | SReg_256:$rsrc, SReg_128:$sampler) | ||
1071 | >; | ||
1072 | |||
1073 | def CLAMP_SI : CLAMP<VReg_32>; | ||
1074 | def FABS_SI : FABS<VReg_32>; | ||
1075 | def FNEG_SI : FNEG<VReg_32>; | ||
1076 | |||
1077 | def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>; | ||
1078 | def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>; | ||
1079 | def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>; | ||
1080 | def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>; | ||
1081 | |||
1082 | def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>; | ||
1083 | def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>; | ||
1084 | def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>; | ||
1085 | def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>; | ||
1086 | |||
1087 | def : Vector_Build <v4f32, VReg_128, f32, VReg_32>; | ||
1088 | def : Vector_Build <v4i32, SReg_128, i32, SReg_32>; | ||
1089 | |||
1090 | def : BitConvert <i32, f32, SReg_32>; | ||
1091 | def : BitConvert <i32, f32, VReg_32>; | ||
1092 | |||
1093 | def : BitConvert <f32, i32, SReg_32>; | ||
1094 | def : BitConvert <f32, i32, VReg_32>; | ||
1095 | |||
1096 | def : Pat < | ||
1097 | (i64 (SIvcc_bitcast VCCReg:$vcc)), | ||
1098 | (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64)) | ||
1099 | >; | ||
1100 | |||
1101 | def : Pat < | ||
1102 | (i1 (SIvcc_bitcast SReg_64:$vcc)), | ||
1103 | (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg) | ||
1104 | >; | ||
1105 | |||
1106 | /********** ===================== **********/ | ||
1107 | /********** Interpolation Paterns **********/ | ||
1108 | /********** ===================== **********/ | ||
1109 | |||
1110 | def : Pat < | ||
1111 | (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params), | ||
1112 | (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan, | ||
1113 | imm:$attr, SReg_32:$params) | ||
1114 | >; | ||
1115 | |||
1116 | def : Pat < | ||
1117 | (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), | ||
1118 | (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan, | ||
1119 | imm:$attr, SReg_32:$params) | ||
1120 | >; | ||
1121 | |||
1122 | def : Pat < | ||
1123 | (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params), | ||
1124 | (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan, | ||
1125 | imm:$attr, SReg_32:$params) | ||
1126 | >; | ||
1127 | |||
1128 | def : Pat < | ||
1129 | (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), | ||
1130 | (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan, | ||
1131 | imm:$attr, SReg_32:$params) | ||
1132 | >; | ||
1133 | |||
1134 | def : Pat < | ||
1135 | (int_SI_fs_read_face), | ||
1136 | (f32 FRONT_FACE) | ||
1137 | >; | ||
1138 | |||
1139 | def : Pat < | ||
1140 | (int_SI_fs_read_pos 0), | ||
1141 | (f32 POS_X_FLOAT) | ||
1142 | >; | ||
1143 | |||
1144 | def : Pat < | ||
1145 | (int_SI_fs_read_pos 1), | ||
1146 | (f32 POS_Y_FLOAT) | ||
1147 | >; | ||
1148 | |||
1149 | def : Pat < | ||
1150 | (int_SI_fs_read_pos 2), | ||
1151 | (f32 POS_Z_FLOAT) | ||
1152 | >; | ||
1153 | |||
1154 | def : Pat < | ||
1155 | (int_SI_fs_read_pos 3), | ||
1156 | (f32 POS_W_FLOAT) | ||
1157 | >; | ||
1158 | |||
1159 | /********** ================== **********/ | ||
1160 | /********** Intrinsic Patterns **********/ | ||
1161 | /********** ================== **********/ | ||
1162 | |||
1163 | /* llvm.AMDGPU.pow */ | ||
1164 | /* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */ | ||
1165 | def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>; | ||
1166 | |||
1167 | def : Pat < | ||
1168 | (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1), | ||
1169 | (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1)) | ||
1170 | >; | ||
1171 | |||
1172 | /********** ================== **********/ | ||
1173 | /********** VOP3 Patterns **********/ | ||
1174 | /********** ================== **********/ | ||
1175 | |||
1176 | def : Pat <(f32 (IL_mad AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2)), | ||
1177 | (V_MAD_LEGACY_F32 AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, | ||
1178 | 0, 0, 0, 0)>; | ||
1179 | |||
1180 | } // End isSI predicate | ||
diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td deleted file mode 100644 index b3ecba3e2d0..00000000000 --- a/src/gallium/drivers/radeon/SIIntrinsics.td +++ /dev/null | |||
@@ -1,40 +0,0 @@ | |||
1 | //===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // SI Intrinsic Definitions | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | |||
15 | let TargetPrefix = "SI", isTarget = 1 in { | ||
16 | |||
17 | def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; | ||
18 | def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; | ||
19 | /* XXX: We may need a seperate intrinsic here for loading integer values */ | ||
20 | def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; | ||
21 | def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; | ||
22 | def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ; | ||
23 | def int_SI_wqm : Intrinsic <[], [], []>; | ||
24 | |||
25 | def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>; | ||
26 | |||
27 | /* Interpolation Intrinsics */ | ||
28 | |||
29 | def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>; | ||
30 | class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; | ||
31 | |||
32 | def int_SI_fs_interp_linear_center : Interp; | ||
33 | def int_SI_fs_interp_linear_centroid : Interp; | ||
34 | def int_SI_fs_interp_persp_center : Interp; | ||
35 | def int_SI_fs_interp_persp_centroid : Interp; | ||
36 | def int_SI_fs_interp_constant : Interp; | ||
37 | |||
38 | def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>; | ||
39 | def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; | ||
40 | } | ||
diff --git a/src/gallium/drivers/radeon/SILowerFlowControl.cpp b/src/gallium/drivers/radeon/SILowerFlowControl.cpp deleted file mode 100644 index bf5192efe3d..00000000000 --- a/src/gallium/drivers/radeon/SILowerFlowControl.cpp +++ /dev/null | |||
@@ -1,161 +0,0 @@ | |||
1 | //===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF) | ||
11 | // to predicated instructions. | ||
12 | // | ||
13 | // All flow control (except loops) is handled using predicated instructions and | ||
14 | // a predicate stack. Each Scalar ALU controls the operations of 64 Vector | ||
15 | // ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs | ||
16 | // by writting to the 64-bit EXEC register (each bit corresponds to a | ||
17 | // single vector ALU). Typically, for predicates, a vector ALU will write | ||
18 | // to its bit of the VCC register (like EXEC VCC is 64-bits, one for each | ||
19 | // Vector ALU) and then the ScalarALU will AND the VCC register with the | ||
20 | // EXEC to update the predicates. | ||
21 | // | ||
22 | // For example: | ||
23 | // %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2 | ||
24 | // SI_IF_NZ %VCC | ||
25 | // %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 | ||
26 | // ELSE | ||
27 | // %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0 | ||
28 | // ENDIF | ||
29 | // | ||
30 | // becomes: | ||
31 | // | ||
32 | // %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask | ||
33 | // %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask | ||
34 | // S_CBRANCH_EXECZ label0 // This instruction is an | ||
35 | // // optimization which allows us to | ||
36 | // // branch if all the bits of | ||
37 | // // EXEC are zero. | ||
38 | // %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch | ||
39 | // | ||
40 | // label0: | ||
41 | // %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the | ||
42 | // // Then block. | ||
43 | // %EXEC = S_AND_B64 %SGPR0, %EXEC | ||
44 | // S_BRANCH_EXECZ label1 // Use our branch optimization | ||
45 | // // instruction again. | ||
46 | // %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block | ||
47 | // label1: | ||
48 | // S_MOV_B64 // Restore the old EXEC value | ||
49 | //===----------------------------------------------------------------------===// | ||
50 | |||
51 | #include "AMDGPU.h" | ||
52 | #include "SIInstrInfo.h" | ||
53 | #include "llvm/CodeGen/MachineFunction.h" | ||
54 | #include "llvm/CodeGen/MachineFunctionPass.h" | ||
55 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
56 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
57 | |||
58 | using namespace llvm; | ||
59 | |||
60 | namespace { | ||
61 | |||
62 | class SILowerFlowControlPass : public MachineFunctionPass { | ||
63 | |||
64 | private: | ||
65 | static char ID; | ||
66 | const TargetInstrInfo *TII; | ||
67 | std::vector<unsigned> PredicateStack; | ||
68 | std::vector<unsigned> UnusedRegisters; | ||
69 | |||
70 | void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); | ||
71 | void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); | ||
72 | |||
73 | public: | ||
74 | SILowerFlowControlPass(TargetMachine &tm) : | ||
75 | MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } | ||
76 | |||
77 | virtual bool runOnMachineFunction(MachineFunction &MF); | ||
78 | |||
79 | const char *getPassName() const { | ||
80 | return "SI Lower flow control instructions"; | ||
81 | } | ||
82 | |||
83 | }; | ||
84 | |||
85 | } // End anonymous namespace | ||
86 | |||
87 | char SILowerFlowControlPass::ID = 0; | ||
88 | |||
89 | FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) { | ||
90 | return new SILowerFlowControlPass(tm); | ||
91 | } | ||
92 | |||
93 | bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) { | ||
94 | |||
95 | // Find all the unused registers that can be used for the predicate stack. | ||
96 | for (TargetRegisterClass::iterator S = AMDGPU::SReg_64RegClass.begin(), | ||
97 | I = AMDGPU::SReg_64RegClass.end(); | ||
98 | I != S; --I) { | ||
99 | unsigned Reg = *I; | ||
100 | if (!MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { | ||
101 | UnusedRegisters.push_back(Reg); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); | ||
106 | BB != BB_E; ++BB) { | ||
107 | MachineBasicBlock &MBB = *BB; | ||
108 | for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); | ||
109 | I != MBB.end(); I = Next, Next = llvm::next(I)) { | ||
110 | MachineInstr &MI = *I; | ||
111 | switch (MI.getOpcode()) { | ||
112 | default: break; | ||
113 | case AMDGPU::SI_IF_NZ: | ||
114 | pushExecMask(MBB, I); | ||
115 | BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64), | ||
116 | AMDGPU::EXEC) | ||
117 | .addOperand(MI.getOperand(0)) // VCC | ||
118 | .addReg(AMDGPU::EXEC); | ||
119 | MI.eraseFromParent(); | ||
120 | break; | ||
121 | case AMDGPU::ELSE: | ||
122 | BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64), | ||
123 | AMDGPU::EXEC) | ||
124 | .addReg(AMDGPU::EXEC); | ||
125 | BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64), | ||
126 | AMDGPU::EXEC) | ||
127 | .addReg(PredicateStack.back()) | ||
128 | .addReg(AMDGPU::EXEC); | ||
129 | MI.eraseFromParent(); | ||
130 | break; | ||
131 | case AMDGPU::ENDIF: | ||
132 | popExecMask(MBB, I); | ||
133 | MI.eraseFromParent(); | ||
134 | break; | ||
135 | } | ||
136 | } | ||
137 | } | ||
138 | return false; | ||
139 | } | ||
140 | |||
141 | void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB, | ||
142 | MachineBasicBlock::iterator I) { | ||
143 | |||
144 | assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack"); | ||
145 | unsigned StackReg = UnusedRegisters.back(); | ||
146 | UnusedRegisters.pop_back(); | ||
147 | PredicateStack.push_back(StackReg); | ||
148 | BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64), | ||
149 | StackReg) | ||
150 | .addReg(AMDGPU::EXEC); | ||
151 | } | ||
152 | |||
153 | void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB, | ||
154 | MachineBasicBlock::iterator I) { | ||
155 | unsigned StackReg = PredicateStack.back(); | ||
156 | PredicateStack.pop_back(); | ||
157 | UnusedRegisters.push_back(StackReg); | ||
158 | BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64), | ||
159 | AMDGPU::EXEC) | ||
160 | .addReg(StackReg); | ||
161 | } | ||
diff --git a/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp b/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp deleted file mode 100644 index 720245091f7..00000000000 --- a/src/gallium/drivers/radeon/SILowerLiteralConstants.cpp +++ /dev/null | |||
@@ -1,105 +0,0 @@ | |||
1 | //===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // This pass performs the following transformation on instructions with | ||
10 | // literal constants: | ||
11 | // | ||
12 | // %VGPR0 = V_MOV_IMM_I32 1 | ||
13 | // | ||
14 | // becomes: | ||
15 | // | ||
16 | // BUNDLE | ||
17 | // * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT | ||
18 | // * SI_LOAD_LITERAL 1 | ||
19 | // | ||
20 | // The resulting sequence matches exactly how the hardware handles immediate | ||
21 | // operands, so this transformation greatly simplifies the code generator. | ||
22 | // | ||
23 | // Only the *_MOV_IMM_* support immediate operands at the moment, but when | ||
24 | // support for immediate operands is added to other instructions, they | ||
25 | // will be lowered here as well. | ||
26 | //===----------------------------------------------------------------------===// | ||
27 | |||
28 | #include "AMDGPU.h" | ||
29 | #include "llvm/CodeGen/MachineFunction.h" | ||
30 | #include "llvm/CodeGen/MachineFunctionPass.h" | ||
31 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
32 | #include "llvm/CodeGen/MachineInstrBundle.h" | ||
33 | |||
34 | using namespace llvm; | ||
35 | |||
36 | namespace { | ||
37 | |||
38 | class SILowerLiteralConstantsPass : public MachineFunctionPass { | ||
39 | |||
40 | private: | ||
41 | static char ID; | ||
42 | const TargetInstrInfo *TII; | ||
43 | |||
44 | public: | ||
45 | SILowerLiteralConstantsPass(TargetMachine &tm) : | ||
46 | MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } | ||
47 | |||
48 | virtual bool runOnMachineFunction(MachineFunction &MF); | ||
49 | |||
50 | const char *getPassName() const { | ||
51 | return "SI Lower literal constants pass"; | ||
52 | } | ||
53 | }; | ||
54 | |||
55 | } // End anonymous namespace | ||
56 | |||
57 | char SILowerLiteralConstantsPass::ID = 0; | ||
58 | |||
59 | FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) { | ||
60 | return new SILowerLiteralConstantsPass(tm); | ||
61 | } | ||
62 | |||
63 | bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) { | ||
64 | for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); | ||
65 | BB != BB_E; ++BB) { | ||
66 | MachineBasicBlock &MBB = *BB; | ||
67 | for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); | ||
68 | I != MBB.end(); I = Next, Next = llvm::next(I)) { | ||
69 | MachineInstr &MI = *I; | ||
70 | switch (MI.getOpcode()) { | ||
71 | default: break; | ||
72 | case AMDGPU::S_MOV_IMM_I32: | ||
73 | case AMDGPU::S_MOV_IMM_I64: | ||
74 | case AMDGPU::V_MOV_IMM_F32: | ||
75 | case AMDGPU::V_MOV_IMM_I32: { | ||
76 | unsigned MovOpcode; | ||
77 | unsigned LoadLiteralOpcode; | ||
78 | MachineOperand LiteralOp = MI.getOperand(1); | ||
79 | if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) { | ||
80 | MovOpcode = AMDGPU::V_MOV_B32_e32; | ||
81 | } else { | ||
82 | MovOpcode = AMDGPU::S_MOV_B32; | ||
83 | } | ||
84 | if (LiteralOp.isImm()) { | ||
85 | LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32; | ||
86 | } else { | ||
87 | LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32; | ||
88 | } | ||
89 | MachineInstr *First = | ||
90 | BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode), | ||
91 | MI.getOperand(0).getReg()) | ||
92 | .addReg(AMDGPU::SI_LITERAL_CONSTANT); | ||
93 | MachineInstr *Last = | ||
94 | BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode)) | ||
95 | .addOperand(MI.getOperand(1)); | ||
96 | Last->setIsInsideBundle(); | ||
97 | llvm::finalizeBundle(MBB, First, Last); | ||
98 | MI.eraseFromParent(); | ||
99 | break; | ||
100 | } | ||
101 | } | ||
102 | } | ||
103 | } | ||
104 | return false; | ||
105 | } | ||
diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp deleted file mode 100644 index 3cc1cd67c47..00000000000 --- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp +++ /dev/null | |||
@@ -1,19 +0,0 @@ | |||
1 | //===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | |||
10 | |||
11 | #include "SIMachineFunctionInfo.h" | ||
12 | |||
13 | using namespace llvm; | ||
14 | |||
15 | SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) | ||
16 | : MachineFunctionInfo(), | ||
17 | SPIPSInputAddr(0), | ||
18 | ShaderType(0) | ||
19 | { } | ||
diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h deleted file mode 100644 index 68097b44c52..00000000000 --- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config | ||
11 | // register, which is to tell the hardware which interpolation parameters to | ||
12 | // load. | ||
13 | // | ||
14 | //===----------------------------------------------------------------------===// | ||
15 | |||
16 | |||
17 | #ifndef _SIMACHINEFUNCTIONINFO_H_ | ||
18 | #define _SIMACHINEFUNCTIONINFO_H_ | ||
19 | |||
20 | #include "llvm/CodeGen/MachineFunction.h" | ||
21 | |||
22 | namespace llvm { | ||
23 | |||
24 | class SIMachineFunctionInfo : public MachineFunctionInfo { | ||
25 | |||
26 | private: | ||
27 | |||
28 | public: | ||
29 | SIMachineFunctionInfo(const MachineFunction &MF); | ||
30 | unsigned SPIPSInputAddr; | ||
31 | unsigned ShaderType; | ||
32 | |||
33 | }; | ||
34 | |||
35 | } // End namespace llvm | ||
36 | |||
37 | |||
38 | #endif //_SIMACHINEFUNCTIONINFO_H_ | ||
diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.cpp b/src/gallium/drivers/radeon/SIRegisterInfo.cpp deleted file mode 100644 index 8fe8ae584e2..00000000000 --- a/src/gallium/drivers/radeon/SIRegisterInfo.cpp +++ /dev/null | |||
@@ -1,60 +0,0 @@ | |||
1 | //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // This file contains the SI implementation of the TargetRegisterInfo class. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | |||
15 | #include "SIRegisterInfo.h" | ||
16 | #include "AMDGPUTargetMachine.h" | ||
17 | |||
18 | using namespace llvm; | ||
19 | |||
20 | SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm, | ||
21 | const TargetInstrInfo &tii) | ||
22 | : AMDGPURegisterInfo(tm, tii), | ||
23 | TM(tm), | ||
24 | TII(tii) | ||
25 | { } | ||
26 | |||
27 | BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const | ||
28 | { | ||
29 | BitVector Reserved(getNumRegs()); | ||
30 | return Reserved; | ||
31 | } | ||
32 | |||
33 | unsigned SIRegisterInfo::getBinaryCode(unsigned reg) const | ||
34 | { | ||
35 | switch (reg) { | ||
36 | case AMDGPU::M0: return 124; | ||
37 | case AMDGPU::SREG_LIT_0: return 128; | ||
38 | default: return getHWRegNum(reg); | ||
39 | } | ||
40 | } | ||
41 | |||
42 | const TargetRegisterClass * | ||
43 | SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const | ||
44 | { | ||
45 | switch (rc->getID()) { | ||
46 | case AMDGPU::GPRF32RegClassID: | ||
47 | return &AMDGPU::VReg_32RegClass; | ||
48 | default: return rc; | ||
49 | } | ||
50 | } | ||
51 | |||
52 | const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( | ||
53 | MVT VT) const | ||
54 | { | ||
55 | switch(VT.SimpleTy) { | ||
56 | default: | ||
57 | case MVT::i32: return &AMDGPU::VReg_32RegClass; | ||
58 | } | ||
59 | } | ||
60 | #include "SIRegisterGetHWRegNum.inc" | ||
diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.h b/src/gallium/drivers/radeon/SIRegisterInfo.h deleted file mode 100644 index 99005cbccc1..00000000000 --- a/src/gallium/drivers/radeon/SIRegisterInfo.h +++ /dev/null | |||
@@ -1,54 +0,0 @@ | |||
1 | //===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // Interface definition for SIRegisterInfo | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | |||
15 | #ifndef SIREGISTERINFO_H_ | ||
16 | #define SIREGISTERINFO_H_ | ||
17 | |||
18 | #include "AMDGPURegisterInfo.h" | ||
19 | |||
20 | namespace llvm { | ||
21 | |||
22 | class AMDGPUTargetMachine; | ||
23 | class TargetInstrInfo; | ||
24 | |||
25 | struct SIRegisterInfo : public AMDGPURegisterInfo | ||
26 | { | ||
27 | AMDGPUTargetMachine &TM; | ||
28 | const TargetInstrInfo &TII; | ||
29 | |||
30 | SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); | ||
31 | |||
32 | virtual BitVector getReservedRegs(const MachineFunction &MF) const; | ||
33 | |||
34 | /// getBinaryCode - Returns the hardware encoding for a register | ||
35 | virtual unsigned getBinaryCode(unsigned reg) const; | ||
36 | |||
37 | /// getISARegClass - rc is an AMDIL reg class. This function returns the | ||
38 | /// SI register class that is equivalent to the given AMDIL register class. | ||
39 | virtual const TargetRegisterClass * | ||
40 | getISARegClass(const TargetRegisterClass * rc) const; | ||
41 | |||
42 | /// getHWRegNum - Generated function that returns the hardware encoding for | ||
43 | /// a register | ||
44 | unsigned getHWRegNum(unsigned reg) const; | ||
45 | |||
46 | /// getCFGStructurizerRegClass - get the register class of the specified | ||
47 | /// type to use in the CFGStructurizer | ||
48 | virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; | ||
49 | |||
50 | }; | ||
51 | |||
52 | } // End namespace llvm | ||
53 | |||
54 | #endif // SIREGISTERINFO_H_ | ||
diff --git a/src/gallium/drivers/radeon/SISchedule.td b/src/gallium/drivers/radeon/SISchedule.td deleted file mode 100644 index 28b65b82585..00000000000 --- a/src/gallium/drivers/radeon/SISchedule.td +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | //===-- SISchedule.td - SI Scheduling definitons -------------------------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // TODO: This is just a place holder for now. | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | |||
15 | def SI_Itin : ProcessorItineraries <[], [], []>; | ||
diff --git a/src/gallium/drivers/radeon/TargetInfo/AMDGPUTargetInfo.cpp b/src/gallium/drivers/radeon/TargetInfo/AMDGPUTargetInfo.cpp deleted file mode 100644 index 380e7deb280..00000000000 --- a/src/gallium/drivers/radeon/TargetInfo/AMDGPUTargetInfo.cpp +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | //===-- TargetInfo/AMDGPUTargetInfo.cpp - TODO: Add brief description -------===// | ||
2 | // | ||
3 | // The LLVM Compiler Infrastructure | ||
4 | // | ||
5 | // This file is distributed under the University of Illinois Open Source | ||
6 | // License. See LICENSE.TXT for details. | ||
7 | // | ||
8 | //===----------------------------------------------------------------------===// | ||
9 | // | ||
10 | // TODO: Add full description | ||
11 | // | ||
12 | //===----------------------------------------------------------------------===// | ||
13 | |||
14 | #include "AMDGPU.h" | ||
15 | #include "llvm/Support/TargetRegistry.h" | ||
16 | |||
17 | using namespace llvm; | ||
18 | |||
19 | /// The target for the AMDGPU backend | ||
20 | Target llvm::TheAMDGPUTarget; | ||
21 | |||
22 | /// Extern function to initialize the targets for the AMDGPU backend | ||
23 | extern "C" void LLVMInitializeAMDGPUTargetInfo() { | ||
24 | RegisterTarget<Triple::r600, false> | ||
25 | R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX"); | ||
26 | } | ||
diff --git a/src/gallium/drivers/radeon/loader.cpp b/src/gallium/drivers/radeon/loader.cpp deleted file mode 100644 index 3ea8cd8900e..00000000000 --- a/src/gallium/drivers/radeon/loader.cpp +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | |||
2 | #include "radeon_llvm_emit.h" | ||
3 | |||
4 | #include <llvm/Support/CommandLine.h> | ||
5 | #include <llvm/Support/IRReader.h> | ||
6 | #include <llvm/Support/SourceMgr.h> | ||
7 | #include <llvm/LLVMContext.h> | ||
8 | #include <llvm/Module.h> | ||
9 | #include <stdio.h> | ||
10 | |||
11 | #include <llvm-c/Core.h> | ||
12 | |||
13 | using namespace llvm; | ||
14 | |||
15 | static cl::opt<std::string> | ||
16 | InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-")); | ||
17 | |||
18 | static cl::opt<std::string> | ||
19 | TargetGPUName("gpu", cl::desc("target gpu name"), cl::value_desc("gpu_name")); | ||
20 | |||
21 | int main(int argc, char ** argv) | ||
22 | { | ||
23 | unsigned char * bytes; | ||
24 | unsigned byte_count; | ||
25 | |||
26 | std::auto_ptr<Module> M; | ||
27 | LLVMContext &Context = getGlobalContext(); | ||
28 | SMDiagnostic Err; | ||
29 | cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n"); | ||
30 | M.reset(ParseIRFile(InputFilename, Err, Context)); | ||
31 | |||
32 | Module * mod = M.get(); | ||
33 | |||
34 | radeon_llvm_compile(wrap(mod), &bytes, &byte_count, TargetGPUName.c_str(), 1); | ||
35 | } | ||
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp index b889f029694..903e1028a09 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp | |||
@@ -39,12 +39,7 @@ | |||
39 | #include <llvm/Target/TargetMachine.h> | 39 | #include <llvm/Target/TargetMachine.h> |
40 | #include <llvm/Transforms/Scalar.h> | 40 | #include <llvm/Transforms/Scalar.h> |
41 | #include <llvm-c/Target.h> | 41 | #include <llvm-c/Target.h> |
42 | |||
43 | #if HAVE_LLVM < 0x0302 | ||
44 | #include <llvm/Target/TargetData.h> | ||
45 | #else | ||
46 | #include <llvm/DataLayout.h> | 42 | #include <llvm/DataLayout.h> |
47 | #endif | ||
48 | 43 | ||
49 | #include <iostream> | 44 | #include <iostream> |
50 | #include <stdlib.h> | 45 | #include <stdlib.h> |
@@ -52,16 +47,6 @@ | |||
52 | 47 | ||
53 | using namespace llvm; | 48 | using namespace llvm; |
54 | 49 | ||
55 | #ifndef EXTERNAL_LLVM | ||
56 | extern "C" { | ||
57 | |||
58 | void LLVMInitializeAMDGPUAsmPrinter(void); | ||
59 | void LLVMInitializeAMDGPUTargetMC(void); | ||
60 | void LLVMInitializeAMDGPUTarget(void); | ||
61 | void LLVMInitializeAMDGPUTargetInfo(void); | ||
62 | } | ||
63 | #endif | ||
64 | |||
65 | namespace { | 50 | namespace { |
66 | 51 | ||
67 | class LLVMEnsureMultithreaded { | 52 | class LLVMEnsureMultithreaded { |
@@ -89,17 +74,10 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes, | |||
89 | 74 | ||
90 | Triple AMDGPUTriple(sys::getDefaultTargetTriple()); | 75 | Triple AMDGPUTriple(sys::getDefaultTargetTriple()); |
91 | 76 | ||
92 | #if HAVE_LLVM == 0x0302 | ||
93 | LLVMInitializeAMDGPUTargetInfo(); | ||
94 | LLVMInitializeAMDGPUTarget(); | ||
95 | LLVMInitializeAMDGPUTargetMC(); | ||
96 | LLVMInitializeAMDGPUAsmPrinter(); | ||
97 | #else | ||
98 | LLVMInitializeR600TargetInfo(); | 77 | LLVMInitializeR600TargetInfo(); |
99 | LLVMInitializeR600Target(); | 78 | LLVMInitializeR600Target(); |
100 | LLVMInitializeR600TargetMC(); | 79 | LLVMInitializeR600TargetMC(); |
101 | LLVMInitializeR600AsmPrinter(); | 80 | LLVMInitializeR600AsmPrinter(); |
102 | #endif | ||
103 | 81 | ||
104 | std::string err; | 82 | std::string err; |
105 | const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err); | 83 | const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err); |
@@ -130,11 +108,7 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes, | |||
130 | )); | 108 | )); |
131 | TargetMachine &AMDGPUTargetMachine = *tm.get(); | 109 | TargetMachine &AMDGPUTargetMachine = *tm.get(); |
132 | PassManager PM; | 110 | PassManager PM; |
133 | #if HAVE_LLVM < 0x0302 | ||
134 | PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData())); | ||
135 | #else | ||
136 | PM.add(new DataLayout(*AMDGPUTargetMachine.getDataLayout())); | 111 | PM.add(new DataLayout(*AMDGPUTargetMachine.getDataLayout())); |
137 | #endif | ||
138 | PM.add(createPromoteMemoryToRegisterPass()); | 112 | PM.add(createPromoteMemoryToRegisterPass()); |
139 | AMDGPUTargetMachine.setAsmVerbosityDefault(true); | 113 | AMDGPUTargetMachine.setAsmVerbosityDefault(true); |
140 | 114 | ||